业务方的SQL: UPDATE agent_commission_statistics set settled =1 where `date` ='2024-08'
这个SQL要改429699条数据,直接执行会影响业务稳定性,并且造成主从复制延迟。
第一种思路:
需要通过脚本,循环分批次批量更改数据N行记录,比如2000行
实验开始
1.创建表结构
CREATE TABLE agent_commission_statistics (
id bigint NOT NULL AUTO_INCREMENT,
name varchar(255) DEFAULT NULL,
date char(7) DEFAULT NULL,
agent_id bigint NOT NULL,
settled int NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=500001 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;2.创建python脚本,构造数据,批量插入50万行数据
cat insert_piliang.py
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import pymysql
import random
import time
# 数据库配置
db_config = {
'host': '127.0.0.1',
'user': 'dba_tools',
'password': 'hoSk3778ClguTfCK',
'database': 'agentdata',
'port': 3366,
'charset': 'utf8'
}
# 表名
table_name = 'agent_commission_statistics'
# 生成随机数据,确保 settled 永远为 int 类型的 0
def generate_random_data(batch_size=5000):
data = []
dates = ['2024-08', '2024-07', '2024-06']
for _ in range(batch_size):
name = f"agent_{random.randint(1000, 9999)}"
date = random.choice(dates)
agent_id = random.randint(100000, 999999)
settled = 0 # 明确为int类型的0
data.append((name, date, agent_id, settled))
return data
# 批量插入数据
def batch_insert_data(conn, data):
insert_sql = f"INSERT INTO {table_name} (name, date, agent_id, settled) VALUES (%s, %s, %s, %s)"
with conn.cursor() as cursor:
cursor.executemany(insert_sql, data)
conn.commit()
def main():
total_records = 500000 # 总共插入50万条数据
batch_size = 5000 # 每批插入5000条数据
inserted_records = 0 # 记录已插入的条数
try:
# 建立数据库连接
conn = pymysql.connect(**db_config)
start_time = time.time() # 记录开始时间
while inserted_records < total_records:
# 生成随机数据
data = generate_random_data(batch_size)
# 批量插入数据
batch_insert_data(conn, data)
inserted_records += len(data)
print(f"已插入 {inserted_records} 条数据")
end_time = time.time() # 记录结束时间
print(f"插入完成,耗时 {end_time - start_time:.2f} 秒,共插入 {inserted_records} 条数据")
except pymysql.MySQLError as e:
print(f"数据库操作失败:{e}")
finally:
if conn:
conn.close()
print("数据库连接已关闭")
if __name__ == "__main__":
main()3.执行插入脚本
python3 insert_piliang.py4.查询数据
mysql> select count(*) from agent_commission_statistics where date='2024-08' and settled=0;
+----------+
| count(*) |
+----------+
| 166013 |
+----------+
1 row in set (0.17 sec)
mysql> select count(*) as count,date from agent_commission_statistics group by date order by count desc;
+--------+---------+
| count | date |
+--------+---------+
| 167218 | 2024-07 |
| 166769 | 2024-06 |
| 166013 | 2024-08 |
+--------+---------+
3 rows in set (0.16 sec)5.使用批量更新脚本更新, 这里bath_size=2000,也就是每次偏移是2000,可以根据实际情况修改
cat update.py
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import pymysql
import time
# 数据库配置
db_config = {
'host': '127.0.0.1',
'user': 'dba_tools',
'password': 'hoSk3778ClguTfCK',
'database': 'agentdata',
'port': 3366,
'charset': 'utf8'
}
# 表名
table_name = 'agent_commission_statistics'
# 查询条件 (可以在这里修改)
where_condition = "date = '2024-08' AND settled = 0"
# 更新条件 (可以在这里修改)
set_condition = "settled = 1"
# 分批更新的偏移量
batch_size = 2000
def get_min_max_id(cursor, table_name, where_condition):
# 获取min_id和max_id,确保条件通过 where_condition 控制
query = f"SELECT MIN(id), MAX(id) FROM {table_name} WHERE {where_condition}"
cursor.execute(query)
result = cursor.fetchone()
# 检查是否存在符合条件的行,防止返回 None 导致后续逻辑错误
if result is None or result[0] is None or result[1] is None:
return None, None
return result[0], result[1]
def count_settled_records(cursor, table_name, where_condition):
# 获取符合条件的未结算的记录数
query = f"SELECT COUNT(*) FROM {table_name} WHERE {where_condition}"
cursor.execute(query)
result = cursor.fetchone()
return result[0]
def update_table_data(conn, table_name, where_condition, set_condition, batch_size):
with conn.cursor() as cursor:
# 获取min_id和max_id
min_id, max_id = get_min_max_id(cursor, table_name, where_condition)
if min_id is None or max_id is None:
print(f"未找到符合条件的记录,表: {table_name}")
return
begin_id = min_id
total_updated_rows = 0 # 累计总的更改行数
while begin_id <= max_id:
# 构造更新语句
update_sql = f"""
UPDATE {table_name}
SET {set_condition}
WHERE {where_condition}
AND id >= {begin_id}
AND id < {begin_id + batch_size}
LIMIT {batch_size}
"""
print(update_sql)
# 设置事务隔离级别适合8.0
cursor.execute("SET SESSION transaction_isolation = 'REPEATABLE-READ'")
# 执行更新操作
cursor.execute(update_sql)
conn.commit()
# 检查受影响的行数并累积
affected_rows = cursor.rowcount
total_updated_rows += affected_rows
if affected_rows > 0:
print(f"{table_name} 表本次更改的行数是: {affected_rows}")
else:
print(f"{table_name} 表当前范围内没有数据需要更新")
# 更新begin_id
begin_id += batch_size
time.sleep(1)
print(f"{table_name} 表数据更改成功,总的更改行数是: {total_updated_rows}")
def main():
try:
# 建立数据库连接
conn = pymysql.connect(**db_config)
with conn.cursor() as cursor:
# 运行之前,获取符合条件的记录数
total_before = count_settled_records(cursor, table_name, where_condition)
print(f"开始时符合条件的记录数为: {total_before}")
# 执行数据更新操作
update_table_data(conn, table_name, where_condition, set_condition, batch_size)
with conn.cursor() as cursor:
# 运行之后,获取已更改的记录数
total_after = count_settled_records(cursor, table_name, where_condition)
total_updated = total_before - total_after
print(f"{table_name} 表最终更改的总行数是: {total_updated}")
except pymysql.MySQLError as e:
print(f"数据库操作失败: {e}")
finally:
# 确保连接关闭
if conn:
conn.close()
print(f"操作完成,对应的表名是: {table_name}")
if __name__ == "__main__":
main()6.执行更新脚本
python3 update.py7.观测一下执行的速度,数据的变化
mysql> select count(*) from agent_commission_statistics where date='2024-08' and settled=0;
+----------+
| count(*) |
+----------+
| 146108 |
+----------+
1 row in set (0.13 sec)
mysql> select count(*) from agent_commission_statistics where date='2024-08' and settled=0;
+----------+
| count(*) |
+----------+
| 146108 |
+----------+
1 row in set (0.14 sec)
mysql> select count(*) from agent_commission_statistics where date='2024-08' and settled=0;
+----------+
| count(*) |
+----------+
| 145430 |
+----------+
1 row in set (0.13 sec)
mysql> select count(*) from agent_commission_statistics where date='2024-08' and settled=0;
+----------+
| count(*) |
+----------+
| 144098 |
+----------+
1 row in set (0.11 sec)
mysql>脚本结束之后会展示总更新的行数

总结:
get_min_max_id函数优化:避免没有符合条件的id返回None时出错,改为返回None, None,并在后续逻辑中进行检查。- 灵活性保持:
where_condition和set_condition仍然是可配置的,确保代码灵活性和易维护
















