SQL示例

一、前言

  • 安装MySQL
  • 启动MySQL
MySQL is configured to only allow connections from localhost by default

To connect run:
    mysql -u root

To start mysql now and restart at login:
 brew services start mysql
  • 下载客户端工具Sequel Ace并登录

二、示例SQL

  • where过滤数据

使用where过滤数据

SELECT
    *
FROM
    actor
WHERE
    last_name = 'ALLEN' OR last_name = 'DAVIS'
ORDER BY 
    `actor_id`;

where可以和and或者or联合使用,来圈定条件;常见的比较符:

=、<>、!=、>、<、>=、<=、IN、NOT IN、BETWEEN、LIKE、IS NULL、IS NOT NULL
  • case when示例

统计address表格中,postal_code在区间的分布

select 

sum(
case when `postal_code` < 1000 then 1 else 0 
end
) as c1,

sum(
case 
	when (`postal_code` < 5000 and `postal_code`>=1000) then 1 else 0 
end
) as c2,

sum(
case 
	when (`postal_code` < 10000 and `postal_code`>=5000) then 1 else 0 
end
) as c3,

sum(
case when (`postal_code`>=10000) then 1 else 0 
end
) as c4

from address;
  • 派生表的子查询

通过派生表temp1,统计符合条件的数量

select 
count(*) as cnt 
from(
	SELECT *
	FROM actor
	WHERE last_name = 'ALLEN' OR last_name = 'DAVIS'
	ORDER BY `actor_id`
    ) temp1;
  • 常见通用格式(统计数字)
SELECT  t1.a1    -- 所有要展示的字段,都写在这里,可能不止来源一个表
        ,t1.a2
        ,COUNT(DISTINCT t1.a1) AS cnt
        ,t2.a3
        ,t2.a4
FROM    t1    -- 来源于第一个表
LEFT JOIN (
              SELECT  a3
                      ,a4
              FROM    xxx
              WHERE   ds = 'xxx'
          ) t2    -- 第二个表
ON      t1.id = t2.id    -- JOIN的关联条件
WHERE   t1.a1 = 'xx'    -- 过滤条件
AND     t1.ds = '20230711'
AND     t1.a1 IS NOT NULL
GROUP BY t1.a1    -- 分组:注意,所有select中出现的字段(除聚合函数外),都需要出现在GROUP BY中
         ,t1.a2
         ,t2.a3
         ,t2.a4
HAVING  cnt > 0    -- 对分组统计之后的最终数据做一个过滤,这里可以直接使用聚合函数as之后的别名
ORDER BY cnt DESC    -- 排序
LIMIT 30
;

1.执行顺序:->from (去加载t1 和 t2这2个表 ) -> join -> on -> where -> group by->select 后面的聚合函数count,sum -> having -> distinct -> order by -> limit

2.FROM 子句执行顺序为从后往前、从右到左

  • 查询最近几天某城市的xx
SELECT  ds
        ,SUM(CASE WHEN cnt >= 1 THEN 1 ELSE 0 END) AS fake_cnt_android
        ,COUNT(*) AS all_cnt_android
FROM    (
            SELECT  user_id AS id
                    ,SUM(CASE WHEN (location_type = '21' OR location_type = '25') THEN 1 ELSE 0 END) AS cnt
                    ,ds
            FROM    xxxxxxxx
            WHERE   ds IN ('20230623','20230624','20230625','20230626','20230627')
            AND     city_name = 'xx市'
            AND     location_type IS NOT NULL -- 过滤掉无效数据
            AND     location_type != -1 -- 过滤掉xxx
            GROUP BY ds
                     ,id
        ) id_cnt
GROUP BY ds
;
  • 一个数据开发的ODPS SQL示例
CREATE TABLE IF NOT EXISTS temp_xxx
(
    city       STRING COMMENT '城市'
    ,line_name STRING COMMENT 'xxx'
    ,fake_cnt  BIGINT COMMENT 'xx数量'
    ,all_cnt   BIGINT COMMENT '总数量'
    ,ratio     STRING COMMENT '百分比'
)
COMMENT 'xxx情况表'
PARTITIONED BY 
(
    ds         STRING COMMENT 'day 分区字段'
)
LIFECYCLE 30
;


INSERT OVERWRITE TABLE temp_xxx PARTITION (ds='${bizdate}')
SELECT  city
        ,line_name
        ,fake_cnt
        ,all_cnt
        ,CONCAT(CAST(ROUND(100 * fake_cnt / all_cnt,2) AS STRING),'%') AS ratio
FROM    (
            SELECT  CASE    mode
                            WHEN 'xx' THEN 'xxxxx'
                            WHEN 'xx' THEN 'xxxxx'
                            ELSE 'xxxxxx'
                    END AS line_name
                    ,city
                    ,SUM(CASE WHEN cnt >= 1 THEN 1 ELSE 0 END) AS fake_cnt
                    ,COUNT(*) AS all_cnt
            FROM    (
                        SELECT  rider_._id
                                ,SUM(CASE WHEN location_type = '21' THEN 1 ELSE 0 END) AS cnt
                                ,ds
                                ,rider_.city_name AS city
                                ,id_mode.tms_line_name AS mode
                        FROM    xxxxxxxxxx AS rider_
                        LEFT JOIN   (
                                        SELECT  _id AS id
                                                ,tms_line_name
                                        FROM    xxxxxxxx.xxxdi
                                        WHERE   ds = '${bizdate}'
                                        AND     tms_line_name IS NOT NULL
                                    ) id_mode
                        ON      id_mode.id = rider_._id
                        WHERE   ds = '${bizdate}'
                        AND     _id IS NOT NULL 
                        AND     city_id IN ('91','66')
                        AND     location_type IS NOT NULL -- 过滤掉无效数据
                        GROUP BY ds
                                 ,rider_._id
                                 ,mode
                                 ,rider_.city_name
                    ) id_cnt
            GROUP BY city
                     ,line_name --GROUP BY ROLLUP(city,mode)
        ) mod_cnt
;

其中bizdate可以在调度配置中设置

参考资料:https://help.aliyun.com/zh/maxcompute/