背景:目前公司的报表计算需要20分钟,导致需要这么久的原因是,一次计算操作,执行了10万次+的update操作;看代码,有两个地方是这样写的,循环执行update,每个循环达到了4.6万次。
一,开始着手优化,第一个地方计算的是平均值,之前是根据report_date,trade_code这两个字段分组循环计算每一个平均值,现在我改成了一条SQL来执行;速度显著提高。
SET @rownum := 0;
INSERT INTO zy_ticket_industry_history_0914
SELECT
@rownum := @rownum +1 AS 'id',
zb.report_date,zi.trade_code,
(SELECT COUNT(trade_code) AS 'total' FROM zy_ticket_info zin WHERE zin.trade_code = zi.trade_code GROUP BY zin.trade_code) AS company_count,
CONVERT(MAX(zp.total_operating_income),DECIMAL(20,2)) AS 'business_income_top',
(SELECT zzi.zq_jc AS 'business_income_top_name' FROM zy_ticket_profit zzp ,zy_ticket_info zzi WHERE zzp.zq_code=zzi.zq_code AND zi.trade_code=zzi.trade_code AND zzi.enable='Y' ORDER BY zzp.total_operating_income DESC LIMIT 1) AS 'business_income_top_name',
CONVERT(AVG(zb.total_assets),DECIMAL(20,2)) AS 'total_assets_avg' ,
CONVERT(AVG(zb.total_current_assets),DECIMAL(20,2)) AS 'current_assets_avg',
CONVERT(AVG( zb.total_liabilit )/ AVG( zb.total_assets ),DECIMAL(20,2)) AS 'leverage_ratio_avg' ,
CONVERT(AVG( zb.total_current_assets )/ AVG( zb.total_current_liabilit ),DECIMAL(20,2)) AS 'current_ratio_avg' ,
CONVERT((AVG(zb.total_current_assets)-AVG(zb.net_inventory))/AVG(zb.total_current_assets ),DECIMAL(20,2)) AS 'quick_ratio_avg' ,
CONVERT((AVG(zc.cash_increase)+AVG(zb.account_receivable))/AVG(zb.total_current_liabilit ),DECIMAL(20,2)) AS 'conserv_quick_ratio_avg' ,
CONVERT(AVG(zc.cash_increase),DECIMAL(20,2)) AS 'cash_avg' ,
CONVERT(AVG(zb.total_current_liabilit),DECIMAL(20,2)) AS 'current_liabilities_avg',
CONVERT((AVG(zp.total_operating_income)-AVG(zp.operating_cost)-AVG(zp.income_tax_expense)),DECIMAL(20,2)) AS 'operating_cash_avg',
CONVERT((AVG(zp.net_profit)-AVG(zp.no_operating_income)),DECIMAL(20,2)) AS 'kf_net_profit_avg',
CONVERT((AVG(zb.total_current_assets)-AVG(zb.net_inventory))/AVG(zb.total_current_liabilit),DECIMAL(20,2)) AS 'roe_avg',
CONVERT(AVG(zp.total_profit),DECIMAL(20,2)) AS 'sale_gross_profit_avg',
CONVERT(AVG(zp.net_profit),DECIMAL(20,2)) AS 'sale_net_profit_avg',
CONVERT(AVG(zp.operating_cost)/(AVG(zb.net_inventory)+AVG(zb2.net_inventory)),DECIMAL(20,2)) AS 'inventory_turnover_avg',
CONVERT(AVG(zp.total_operating_income)/(AVG(zb.long_receivables)+AVG(zb2.long_receivables)),DECIMAL(20,2)) AS 'receive_turnover_avg',
'Y' AS `enable`,
NULL AS create_by,
NOW() AS create_time,
NULL AS update_by,
NOW() AS update_time,
NULL AS remark
FROM zy_ticket_balance zb LEFT JOIN zy_ticket_info zi ON(zb.zq_code=zi.zq_code)
LEFT JOIN zy_ticket_cash zc ON(zb.zq_code=zc.zq_code AND zb.report_date=zc.report_date)
LEFT JOIN zy_ticket_profit zp ON( zb.zq_code=zp.zq_code AND zb.report_date=zp.report_date)
LEFT JOIN zy_ticket_balance zb2 ON(zb.zq_code=zb2.zq_code AND zb.report_date= DATE_ADD(zb2.report_date,INTERVAL 1 YEAR))
WHERE zb.zq_code=zi.zq_code AND zi.enable='Y'
GROUP BY zi.`trade_code` ,zb.report_date
ORDER BY report_date ASC,zi.trade_code ASC
第二个地方计算的是金额,排名,百分比,之前是根据zq_code,report_date,trade_code这两个字段分组循环计算的,基于前面那个,这个我也想通过一个SQL直接计算所有数据然后Insert;在SQL层面去优化;
而事实证明,想过很多种办法,都不行,SQL写出来,查询效率也很低,速度提高不明显,没有实际意义。最终的解决方案是动态校验zq_code,report_date,trade_code,只需执行16次update;
当然也可以更少,将操作表一致的进行合并,我为了SQL语句更清晰点,就分开写了。
(中间想通过SQL解决问题的时候,一到测试环境因为内存溢出,方案报废,而运营因为线上资源有限,不允许修改配置,然后才想到这个最终方案;其实应该一开始就这样做的,被第一步严重影响了思路::>_<::)
SQL:
-- 将数据先计算出来,然后在动态判断zq_code,trade_code,report_date,在进行更新。
UPDATE zy_ticket_report t1 INNER JOIN (
SELECT tt.*,ROUND((companyCount-totalAssetsRank)/companyCount*100,2) AS totalAssetsTranscend
FROM (
SELECT t.*, IF (@pa = t.tradeCode,@curRank := @curRank + 1,@curRank := 1) AS totalAssetsRank,@pa := t.tradeCode AS blTradeCode
FROM
( SELECT zb.report_date AS reportDate,zi.zq_code AS zqCode,zi.trade_code AS tradeCode,zi.zq_jc AS zqJc,com.companyCount,zb.total_assets AS 'totalAssets'
FROM zy_ticket_info zi JOIN zy_ticket_balance zb ON (zb.zq_code = zi.zq_code )
JOIN (SELECT trade_code,COUNT(trade_code) AS 'companyCount' FROM zy_ticket_info zin GROUP BY zin.trade_code) AS com ON(com.trade_code = zi.trade_code)
WHERE zi.enable = 'Y'
GROUP BY zb.report_date,zi.trade_code,zb.id
ORDER BY zb.report_date,zi.trade_code,zb.total_assets DESC ) AS t,
(SELECT @curRank := 0, @pa := '') b
) AS tt
) AS t2
SET t1.total_assets=t2.totalAssets,t1.total_assets_rank=t2.totalAssetsRank, t1.total_assets_transcend=t2.totalAssetsTranscend,t1.update_time = NOW()
WHERE t1.`zq_code` = t2.zqCode AND t1.`trade_code` = t2.tradeCode AND t1.`report_date` = t2.reportDate
JAVA:
优化后,数据计算只需60秒左右。o(* ̄▽ ̄*)ブ
记录下这次优化,学到的:
1,查询Map结构的对象集合,根据key获取整个对象。
XML:
<select id="getZys" resultMap="retMap">
SELECT
CONCAT_WS('-',t.zqCode,t.tradeCode,t.reportDate) AS mapKey,t.*,IF (@pa = t.tradeCode,@curRank := @curRank + 1,@curRank := 1) AS totalOperatingIncomeRank, @pa := t.tradeCode AS blTradeCode
FROM
(SELECT zp.report_date AS reportDate,zi.zq_code AS zqCode,zi.trade_code AS tradeCode,zi.zq_jc AS zqJc,com.companyCount,zp.total_operating_income AS 'totalOperatingIncome'
FROM zy_ticket_info zi JOIN zy_ticket_profit zp ON (zp.zq_code = zi.zq_code)
JOIN (SELECT trade_code,COUNT(trade_code) AS 'companyCount' FROM zy_ticket_info zin GROUP BY zin.trade_code) AS com ON(com.trade_code = zi.trade_code)
WHERE zi.enable = 'Y'
GROUP BY zp.report_date,zi.trade_code,zp.id
ORDER BY zp.report_date,zi.trade_code,zp.total_operating_income DESC) AS t,
(SELECT @curRank := 0, @pa := '') b
</select>
<resultMap id="retMap" type="com.zy.ticket.model.vo.TicketReportVO">
<result column="mapKey" property="mapKey" javaType="String"/>
<result column="zqCode" property="zqCode" javaType="String"/>
<result column="zqJc" property="zqJc" javaType="String"/>
<result column="tradeName" property="tradeName" javaType="String"/>
<result column="tradeCode" property="tradeCode" javaType="String"/>
<result column="reportDate" property="reportDate" javaType="String"/>
<result column="companyCount" property="companyCount" javaType="Integer"/>
<result column="totalAssets" property="totalAssets" javaType="BigDecimal"/>
<result column="totalAssetsRank" property="totalAssetsRank" javaType="Integer"/>
<result column="totalAssetsTranscend" property="totalAssetsTranscend" javaType="BigDecimal"/>
</resultMap>
Mapper:
@MapKey("mapKey")
Map<String,TicketReportVO> getZys();
Service:
Map<String,TicketReportVO> zysMap = zyTicketReportMapper.getZys();
zysMap.get(vo.getZqCode()+"-"+vo.getTradeCode()+"-"+vo.getReportDate()).getLeverageRatio()
2,insert语句插入的字段越少,速度越快。以下4.6万条数据,只需3秒。
<insert id="baseInsert" parameterType="java.util.List">
<selectKey resultType="java.lang.Long" keyProperty="id" order="AFTER">
SELECT
LAST_INSERT_ID()
</selectKey>
insert into zy_ticket_report
(zq_code, zq_jc,trade_name, trade_code, report_date,company_count)
values
<foreach collection="list" item="item" separator=",">
(#{item.zqCode}, #{item.zqJc},#{item.tradeName},
#{item.tradeCode}, #{item.reportDate},#{item.companyCount})
</foreach>
;COMMIT;
</insert>
3,SQL分组查询数据时显示所有数据,只需要在Group by最后加上主键ID。
SELECT zb.report_date AS reportDate,zi.zq_code AS zqCode,zi.trade_code AS tradeCode,zi.zq_jc AS zqJc,com.companyCount,zb.total_assets AS 'totalAssets'
FROM zy_ticket_info zi JOIN zy_ticket_balance zb ON (zb.zq_code = zi.zq_code )
JOIN (SELECT trade_code,COUNT(trade_code) AS 'companyCount' FROM zy_ticket_info zin GROUP BY zin.trade_code) AS com ON(com.trade_code = zi.trade_code)
WHERE zi.enable = 'Y'
GROUP BY zb.report_date,zi.trade_code,zb.id
ORDER BY zb.report_date,zi.trade_code,zb.total_assets DESC
4,SQL分组查询数据显示所有数据,并且根据分组设置序号。
SELECT t.*, IF (@pa = t.tradeCode,@curRank := @curRank + 1,@curRank := 1) AS totalAssetsRank,@pa := t.tradeCode AS blTradeCode
FROM
( SELECT zb.report_date AS reportDate,zi.zq_code AS zqCode,zi.trade_code AS tradeCode,zi.zq_jc AS zqJc,com.companyCount,zb.total_assets AS 'totalAssets'
FROM zy_ticket_info zi JOIN zy_ticket_balance zb ON (zb.zq_code = zi.zq_code )
JOIN (SELECT trade_code,COUNT(trade_code) AS 'companyCount' FROM zy_ticket_info zin GROUP BY zin.trade_code) AS com ON(com.trade_code = zi.trade_code)
WHERE zi.enable = 'Y'
GROUP BY zb.report_date,zi.trade_code,zb.id
ORDER BY zb.report_date,zi.trade_code,zb.total_assets DESC
) AS t ,(SELECT @curRank := 0, @pa := '') b
5,如果同一个表的insert和updat在一个事务里操作,在insert语句后写commit,手动进行数据库提交,速度会提高很多。
6,只有insert操作的时候,不需要去添加事务处理。