-- 准备测试数据库和数据表,并写入初始数据
mysql> create database demo;
mysql> use demo;
mysql> create table mytable (id int not null primary key, day date not null);
mysql> insert into mytable(id, day) values(1, "2018-04-14");
mysql> insert into mytable(id, day) values(2, "2018-04-15");
mysql> insert into mytable(id, day) values(3, "2018-04-16");
mysql> insert into mytable(id, day) values(4, "2018-04-16");
mysql> insert into mytable(id, day) values(5, "2018-04-15");
mysql> insert into mytable(id, day) values(6, "2018-04-16");
mysql> select * from mytable;
+----+------------+
| id | day |
+----+------------+
| 1 | 2018-04-14 |
| 2 | 2018-04-15 |
| 3 | 2018-04-16 |
| 4 | 2018-04-16 |
| 5 | 2018-04-15 |
| 6 | 2018-04-16 |
+----+------------+
分组查询
-- 分组查询,计算每个 day 的数量
mysql> select day, count(*) from mytable group by day;
+------------+----------+
| day | count(*) |
+------------+----------+
| 2018-04-14 | 1 |
| 2018-04-15 | 2 |
| 2018-04-16 | 3 |
+------------+----------+
-- 只显示相同 day 数量大于 1 的数据
mysql> select day, count(*) from mytable group by day having count(*)>1;
+------------+----------+
| day | count(*) |
+------------+----------+
| 2018-04-15 | 2 |
| 2018-04-16 | 3 |
+------------+----------+
WHERE
子句过滤的是分组之前的行 HAVING
子句过滤的是分组之后的行
-- 创建临时表,并插入 id 最小的记录
mysql> create temporary table temptable(day date not null, min_id int not null);
mysql> insert into temptable(day, min_id) select day, min(id) from mytable group by day having count(*)>1;
mysql> select * from temptable;
+------------+--------+
| day | min_id |
+------------+--------+
| 2018-04-15 | 2 |
| 2018-04-16 | 3 |
+------------+--------+
删除重复项
-- 删除 id 不在临时表中 day 重复的记录,即保留了相同 day 值,id 较小的记录
mysql> delete from mytable where exists(
-> select * from temptable where temptable.day = mytable.day and temptable.min_id <> mytable.id);
mysql> select * from mytable;
+----+------------+
| id | day |
+----+------------+
| 1 | 2018-04-14 |
| 2 | 2018-04-15 |
| 3 | 2018-04-16 |
+----+------------+
参考
- 技术栈之Mysql】SQL查找删除重复行
- How to find duplicate rows with SQL