MySQL 表分区和事件调度器
MySQL 分区方案实践
在实际项目中,要用到分区的,一般都是数据量较大,且数据量会不断增加的,所以分区数据范围不会太大。
比如按时间分区,一般都是按月分区,按年的就没有必要。
之前在项目中,按月分区时,本能就想到按下面这样精确到月分创建分区表:
1
2
3
|
PARTITION BY RANGE (YEAR(created_at)*100+MONTH(created_at)) (
PARTITION p202505 VALUES LESS THAN (202506),
...
|
但是项目运行一段时间后,数据量会不断增加,就会发现 monthly 分区就无法满足需求了。
这个时候就发现这样分区就有点傻了,其他就算按月分,也完全应该精确到天粒度,
这样后续想缩小分区天数,修改起来的方便的多,也不需要考虑数据迁移什么的。
下面还是以按月分区举例,但是精确到天粒度(每月1号开始),
按每个月1号开始进行分区,可以方便后续调整分区粒度。以下是几种实现方案:
方案1:使用 RANGE COLUMNS + DATE 类型
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
CREATE TABLE orders (
id BIGINT AUTO_INCREMENT,
order_no VARCHAR(32) NOT NULL,
created_at DATE NOT NULL, -- 使用DATE类型,只存储日期
customer_id BIGINT NOT NULL,
amount DECIMAL(12,2) NOT NULL,
-- 其他字段...
PRIMARY KEY (id, created_at),
UNIQUE KEY uk_order_no (order_no, created_at),
KEY idx_customer_created (customer_id, created_at),
KEY idx_created (created_at)
)
PARTITION BY RANGE COLUMNS(created_at) (
PARTITION p202301 VALUES LESS THAN ('2023-02-01'),
PARTITION p202302 VALUES LESS THAN ('2023-03-01'),
PARTITION p202303 VALUES LESS THAN ('2023-04-01'),
PARTITION p202304 VALUES LESS THAN ('2023-05-01'),
PARTITION p202305 VALUES LESS THAN ('2023-06-01'),
PARTITION p202306 VALUES LESS THAN ('2023-07-01'),
PARTITION p202307 VALUES LESS THAN ('2023-08-01'),
PARTITION p202308 VALUES LESS THAN ('2023-09-01'),
PARTITION p202309 VALUES LESS THAN ('2023-10-01'),
PARTITION p202310 VALUES LESS THAN ('2023-11-01'),
PARTITION p202311 VALUES LESS THAN ('2023-12-01'),
PARTITION p202312 VALUES LESS THAN ('2024-01-01'),
PARTITION pmax VALUES LESS THAN MAXVALUE
);
|
方案2:使用 RANGE + TO_DAYS() 函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
CREATE TABLE orders (
id BIGINT AUTO_INCREMENT,
order_no VARCHAR(32) NOT NULL,
created_at DATETIME NOT NULL, -- 使用DATETIME类型
customer_id BIGINT NOT NULL,
amount DECIMAL(12,2) NOT NULL,
-- 其他字段...
PRIMARY KEY (id, created_at),
UNIQUE KEY uk_order_no (order_no, created_at),
KEY idx_customer_created (customer_id, created_at),
KEY idx_created (created_at)
)
PARTITION BY RANGE (TO_DAYS(created_at)) (
PARTITION p202301 VALUES LESS THAN (TO_DAYS('2023-02-01')),
PARTITION p202302 VALUES LESS THAN (TO_DAYS('2023-03-01')),
PARTITION p202303 VALUES LESS THAN (TO_DAYS('2023-04-01')),
PARTITION p202304 VALUES LESS THAN (TO_DAYS('2023-05-01')),
PARTITION p202305 VALUES LESS THAN (TO_DAYS('2023-06-01')),
PARTITION p202306 VALUES LESS THAN (TO_DAYS('2023-07-01')),
PARTITION p202307 VALUES LESS THAN (TO_DAYS('2023-08-01')),
PARTITION p202308 VALUES LESS THAN (TO_DAYS('2023-09-01')),
PARTITION p202309 VALUES LESS THAN (TO_DAYS('2023-10-01')),
PARTITION p202310 VALUES LESS THAN (TO_DAYS('2023-11-01')),
PARTITION p202311 VALUES LESS THAN (TO_DAYS('2023-12-01')),
PARTITION p202312 VALUES LESS THAN (TO_DAYS('2024-01-01')),
PARTITION pmax VALUES LESS THAN MAXVALUE
);
|
方案3:动态生成分区语句(推荐用于生产环境)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
-- 生成分区语句的函数或脚本
SET @start_date = '2023-01-01';
SET @end_date = '2024-01-01';
SET @sql = 'CREATE TABLE orders (
id BIGINT AUTO_INCREMENT,
order_no VARCHAR(32) NOT NULL,
created_at DATE NOT NULL,
customer_id BIGINT NOT NULL,
amount DECIMAL(12,2) NOT NULL,
PRIMARY KEY (id, created_at),
UNIQUE KEY uk_order_no (order_no, created_at),
KEY idx_created (created_at)
)
PARTITION BY RANGE COLUMNS(created_at) (';
WHILE @start_date < @end_date DO
SET @next_month = DATE_ADD(@start_date, INTERVAL 1 MONTH);
SET @partition_name = CONCAT('p', DATE_FORMAT(@start_date, '%Y%m'));
SET @sql = CONCAT(@sql, 'PARTITION ', @partition_name,
' VALUES LESS THAN (''', @next_month, '''),');
SET @start_date = @next_month;
END WHILE;
SET @sql = CONCAT(@sql, 'PARTITION pmax VALUES LESS THAN MAXVALUE)');
PREPARE stmt FROM @sql;
EXECUTE stmt;
|
分区维护管理
1. 添加新分区
1
2
3
4
5
|
-- 每月初执行,添加下个月的分区
ALTER TABLE orders REORGANIZE PARTITION pmax INTO (
PARTITION p202401 VALUES LESS THAN ('2024-02-01'),
PARTITION pmax VALUES LESS THAN MAXVALUE
);
|
2. 删除旧分区(数据归档)
1
2
|
-- 删除一年前的数据分区
ALTER TABLE orders DROP PARTITION p202301;
|
3. 查询分区信息
1
2
3
4
5
6
7
8
|
-- 查看分区信息
SELECT PARTITION_NAME, TABLE_ROWS, PARTITION_EXPRESSION
FROM INFORMATION_SCHEMA.PARTITIONS
WHERE TABLE_NAME = 'orders';
-- 查看分区使用情况
EXPLAIN PARTITIONS SELECT * FROM orders
WHERE created_at BETWEEN '2023-05-01' AND '2023-05-31';
|
查询优化示例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
-- 利用分区裁剪的查询
SELECT * FROM orders
WHERE created_at >= '2023-05-01'
AND created_at < '2023-06-01'
AND customer_id = 12345;
-- 跨分区查询(仍能利用分区)
SELECT COUNT(*) FROM orders
WHERE created_at BETWEEN '2023-05-01' AND '2023-07-31';
-- 按月份聚合统计
SELECT
DATE_FORMAT(created_at, '%Y-%m') as month,
COUNT(*) as order_count,
SUM(amount) as total_amount
FROM orders
WHERE created_at >= '2023-01-01'
GROUP BY DATE_FORMAT(created_at, '%Y-%m');
|
自动分区维护脚本
- 自动生成按月分区(从1号开始)脚本示例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
-- 每月自动添加分区的存储过程
DELIMITER //
CREATE PROCEDURE auto_add_order_partition()
BEGIN
DECLARE next_month DATE;
DECLARE partition_name VARCHAR(20);
SET next_month = DATE_FORMAT(DATE_ADD(NOW(), INTERVAL 1 MONTH), '%Y-%m-01');
SET partition_name = CONCAT('p', DATE_FORMAT(next_month, '%Y%m'));
SET @sql = CONCAT(
'ALTER TABLE orders REORGANIZE PARTITION pmax INTO (',
'PARTITION ', partition_name, ' VALUES LESS THAN (''',
DATE_FORMAT(DATE_ADD(next_month, INTERVAL 1 MONTH), '%Y-%m-01'), '''),',
'PARTITION pmax VALUES LESS THAN MAXVALUE)'
);
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
END//
DELIMITER ;
-- 创建事件每月执行
CREATE EVENT auto_add_partition_event
ON SCHEDULE EVERY 1 MONTH
STARTS '2023-12-01 00:00:00'
DO
CALL auto_add_order_partition();
|
- 自动生成10天分区脚本示例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
DELIMITER //
CREATE PROCEDURE create_10day_partitions()
BEGIN
DECLARE start_date DATE DEFAULT '2024-05-01';
DECLARE end_date DATE DEFAULT '2024-12-31';
DECLARE current_date DATE;
DECLARE partition_sql LONGTEXT DEFAULT '';
SET current_date = start_date;
WHILE current_date <= end_date DO
SET @partition_name = CONCAT('p', DATE_FORMAT(current_date, '%Y%m%d'));
SET @next_date = DATE_ADD(current_date, INTERVAL 10 DAY);
SET @less_than_value = TO_DAYS(@next_date);
SET partition_sql = CONCAT(partition_sql,
'PARTITION ', @partition_name,
' VALUES LESS THAN (', @less_than_value, '),');
SET current_date = @next_date;
END WHILE;
SET partition_sql = CONCAT(partition_sql, 'PARTITION pmax VALUES LESS THAN MAXVALUE)');
-- 输出或执行SQL
SELECT partition_sql;
END//
DELIMITER ;
CALL create_10day_partitions();
|
优势总结
- 灵活性:按月分区便于后续调整为按周或按季度分区
- 管理方便:每月维护一次分区即可
- 性能优化:查询时可以精确裁剪到具体月份
- 数据生命周期:便于按月份进行数据归档和清理
这种按月分区方案在数据量管理和查询性能之间取得了很好的平衡。