MySQL 表分区和事件调度器

MySQL 分区方案实践

在实际项目中,要用到分区的,一般都是数据量较大,且数据量会不断增加的,所以分区数据范围不会太大。

比如按时间分区,一般都是按月分区,按年的就没有必要。

之前在项目中,按月分区时,本能就想到按下面这样精确到月分创建分区表:

1
2
3
PARTITION BY RANGE (YEAR(created_at)*100+MONTH(created_at)) (
    PARTITION p202505 VALUES LESS THAN (202506),
    ...

但是项目运行一段时间后,数据量会不断增加,就会发现 monthly 分区就无法满足需求了。

这个时候就发现这样分区就有点傻了,其他就算按月分,也完全应该精确到天粒度,

这样后续想缩小分区天数,修改起来的方便的多,也不需要考虑数据迁移什么的。

下面还是以按月分区举例,但是精确到天粒度(每月1号开始),

按每个月1号开始进行分区,可以方便后续调整分区粒度。以下是几种实现方案:

方案1:使用 RANGE COLUMNS + DATE 类型

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
CREATE TABLE orders (
    id BIGINT AUTO_INCREMENT,
    order_no VARCHAR(32) NOT NULL,
    created_at DATE NOT NULL,  -- 使用DATE类型,只存储日期
    customer_id BIGINT NOT NULL,
    amount DECIMAL(12,2) NOT NULL,
    -- 其他字段...
    PRIMARY KEY (id, created_at),
    UNIQUE KEY uk_order_no (order_no, created_at),
    KEY idx_customer_created (customer_id, created_at),
    KEY idx_created (created_at)
)
PARTITION BY RANGE COLUMNS(created_at) (
    PARTITION p202301 VALUES LESS THAN ('2023-02-01'),
    PARTITION p202302 VALUES LESS THAN ('2023-03-01'),
    PARTITION p202303 VALUES LESS THAN ('2023-04-01'),
    PARTITION p202304 VALUES LESS THAN ('2023-05-01'),
    PARTITION p202305 VALUES LESS THAN ('2023-06-01'),
    PARTITION p202306 VALUES LESS THAN ('2023-07-01'),
    PARTITION p202307 VALUES LESS THAN ('2023-08-01'),
    PARTITION p202308 VALUES LESS THAN ('2023-09-01'),
    PARTITION p202309 VALUES LESS THAN ('2023-10-01'),
    PARTITION p202310 VALUES LESS THAN ('2023-11-01'),
    PARTITION p202311 VALUES LESS THAN ('2023-12-01'),
    PARTITION p202312 VALUES LESS THAN ('2024-01-01'),
    PARTITION pmax VALUES LESS THAN MAXVALUE
);

方案2:使用 RANGE + TO_DAYS() 函数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
CREATE TABLE orders (
    id BIGINT AUTO_INCREMENT,
    order_no VARCHAR(32) NOT NULL,
    created_at DATETIME NOT NULL,  -- 使用DATETIME类型
    customer_id BIGINT NOT NULL,
    amount DECIMAL(12,2) NOT NULL,
    -- 其他字段...
    PRIMARY KEY (id, created_at),
    UNIQUE KEY uk_order_no (order_no, created_at),
    KEY idx_customer_created (customer_id, created_at),
    KEY idx_created (created_at)
)
PARTITION BY RANGE (TO_DAYS(created_at)) (
    PARTITION p202301 VALUES LESS THAN (TO_DAYS('2023-02-01')),
    PARTITION p202302 VALUES LESS THAN (TO_DAYS('2023-03-01')),
    PARTITION p202303 VALUES LESS THAN (TO_DAYS('2023-04-01')),
    PARTITION p202304 VALUES LESS THAN (TO_DAYS('2023-05-01')),
    PARTITION p202305 VALUES LESS THAN (TO_DAYS('2023-06-01')),
    PARTITION p202306 VALUES LESS THAN (TO_DAYS('2023-07-01')),
    PARTITION p202307 VALUES LESS THAN (TO_DAYS('2023-08-01')),
    PARTITION p202308 VALUES LESS THAN (TO_DAYS('2023-09-01')),
    PARTITION p202309 VALUES LESS THAN (TO_DAYS('2023-10-01')),
    PARTITION p202310 VALUES LESS THAN (TO_DAYS('2023-11-01')),
    PARTITION p202311 VALUES LESS THAN (TO_DAYS('2023-12-01')),
    PARTITION p202312 VALUES LESS THAN (TO_DAYS('2024-01-01')),
    PARTITION pmax VALUES LESS THAN MAXVALUE
);

方案3:动态生成分区语句(推荐用于生产环境)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
-- 生成分区语句的函数或脚本
SET @start_date = '2023-01-01';
SET @end_date = '2024-01-01';
SET @sql = 'CREATE TABLE orders (
    id BIGINT AUTO_INCREMENT,
    order_no VARCHAR(32) NOT NULL,
    created_at DATE NOT NULL,
    customer_id BIGINT NOT NULL,
    amount DECIMAL(12,2) NOT NULL,
    PRIMARY KEY (id, created_at),
    UNIQUE KEY uk_order_no (order_no, created_at),
    KEY idx_created (created_at)
)
PARTITION BY RANGE COLUMNS(created_at) (';

WHILE @start_date < @end_date DO
    SET @next_month = DATE_ADD(@start_date, INTERVAL 1 MONTH);
    SET @partition_name = CONCAT('p', DATE_FORMAT(@start_date, '%Y%m'));
    SET @sql = CONCAT(@sql, 'PARTITION ', @partition_name, 
                     ' VALUES LESS THAN (''', @next_month, '''),');
    SET @start_date = @next_month;
END WHILE;

SET @sql = CONCAT(@sql, 'PARTITION pmax VALUES LESS THAN MAXVALUE)');
PREPARE stmt FROM @sql;
EXECUTE stmt;

分区维护管理

1. 添加新分区

1
2
3
4
5
-- 每月初执行,添加下个月的分区
ALTER TABLE orders REORGANIZE PARTITION pmax INTO (
    PARTITION p202401 VALUES LESS THAN ('2024-02-01'),
    PARTITION pmax VALUES LESS THAN MAXVALUE
);

2. 删除旧分区(数据归档)

1
2
-- 删除一年前的数据分区
ALTER TABLE orders DROP PARTITION p202301;

3. 查询分区信息

1
2
3
4
5
6
7
8
-- 查看分区信息
SELECT PARTITION_NAME, TABLE_ROWS, PARTITION_EXPRESSION
FROM INFORMATION_SCHEMA.PARTITIONS 
WHERE TABLE_NAME = 'orders';

-- 查看分区使用情况
EXPLAIN PARTITIONS SELECT * FROM orders 
WHERE created_at BETWEEN '2023-05-01' AND '2023-05-31';

查询优化示例

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
-- 利用分区裁剪的查询
SELECT * FROM orders 
WHERE created_at >= '2023-05-01' 
  AND created_at < '2023-06-01'
  AND customer_id = 12345;

-- 跨分区查询(仍能利用分区)
SELECT COUNT(*) FROM orders 
WHERE created_at BETWEEN '2023-05-01' AND '2023-07-31';

-- 按月份聚合统计
SELECT 
    DATE_FORMAT(created_at, '%Y-%m') as month,
    COUNT(*) as order_count,
    SUM(amount) as total_amount
FROM orders 
WHERE created_at >= '2023-01-01'
GROUP BY DATE_FORMAT(created_at, '%Y-%m');

自动分区维护脚本

  1. 自动生成按月分区(从1号开始)脚本示例
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
-- 每月自动添加分区的存储过程
DELIMITER //
CREATE PROCEDURE auto_add_order_partition()
BEGIN
    DECLARE next_month DATE;
    DECLARE partition_name VARCHAR(20);
    
    SET next_month = DATE_FORMAT(DATE_ADD(NOW(), INTERVAL 1 MONTH), '%Y-%m-01');
    SET partition_name = CONCAT('p', DATE_FORMAT(next_month, '%Y%m'));
    
    SET @sql = CONCAT(
        'ALTER TABLE orders REORGANIZE PARTITION pmax INTO (',
        'PARTITION ', partition_name, ' VALUES LESS THAN (''', 
        DATE_FORMAT(DATE_ADD(next_month, INTERVAL 1 MONTH), '%Y-%m-01'), '''),',
        'PARTITION pmax VALUES LESS THAN MAXVALUE)'
    );
    
    PREPARE stmt FROM @sql;
    EXECUTE stmt;
    DEALLOCATE PREPARE stmt;
END//
DELIMITER ;

-- 创建事件每月执行
CREATE EVENT auto_add_partition_event
ON SCHEDULE EVERY 1 MONTH
STARTS '2023-12-01 00:00:00'
DO
CALL auto_add_order_partition();
  1. 自动生成10天分区脚本示例
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
DELIMITER //
CREATE PROCEDURE create_10day_partitions()
BEGIN
    DECLARE start_date DATE DEFAULT '2024-05-01';
    DECLARE end_date DATE DEFAULT '2024-12-31';
    DECLARE current_date DATE;
    DECLARE partition_sql LONGTEXT DEFAULT '';
    
    SET current_date = start_date;
    
    WHILE current_date <= end_date DO
        SET @partition_name = CONCAT('p', DATE_FORMAT(current_date, '%Y%m%d'));
        SET @next_date = DATE_ADD(current_date, INTERVAL 10 DAY);
        SET @less_than_value = TO_DAYS(@next_date);
        
        SET partition_sql = CONCAT(partition_sql, 
            'PARTITION ', @partition_name, 
            ' VALUES LESS THAN (', @less_than_value, '),');
        
        SET current_date = @next_date;
    END WHILE;
    
    SET partition_sql = CONCAT(partition_sql, 'PARTITION pmax VALUES LESS THAN MAXVALUE)');
    
    -- 输出或执行SQL
    SELECT partition_sql;
END//
DELIMITER ;

CALL create_10day_partitions();

优势总结

  1. 灵活性:按月分区便于后续调整为按周或按季度分区
  2. 管理方便:每月维护一次分区即可
  3. 性能优化:查询时可以精确裁剪到具体月份
  4. 数据生命周期:便于按月份进行数据归档和清理

这种按月分区方案在数据量管理和查询性能之间取得了很好的平衡。