IDL 8.7开始增加了机器学习模块,可以利用IDL简单快捷的利用机器学习实现分类和回归。在此利用SVM对PM2.5进行回归预测。由于CSDN中关于IDL的代码较少,将自己边学边用的东西放上来交流,力求代码详尽小白能看懂
1. 数据加载
数据采用的是如下图所示,使用后面六列产量来回归求解PM2.5,由于AOD具有异常值(-9999),需要利用循环排除掉该部分数据。
file = read_csv('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\data.csv')
result = where(file.field05 ne -9999, count);返回了不是-9999的索引,一共count个
;将六个变量读进来,专置成一个6列的数组
x = Transpose([[file.field05], [file.field06], [file.field07],[file.field08],[file.field09],[file.field10]])
y = file.field04;读取训练集中的回归值
;新建数组,将A0D ne -9999的有效数据行存储
data=Fltarr(6,count)
scores=Fltarr(count)
FOR i=0,count-1,1 DO BEGIN
num=result[i]
data[*,i]=x[*,num]
scores[i]=y[num]
ENDFOR
2. 数据归一化
使用SVM要对数据进行归一化,数据参数间量纲差异大的时候会严重运行效率,并且要保留归一化模型以用作测试集和后期预测。
;归一化
Normalizer1 = IDLmlVarianceNormalizer(data)
Normalizer1.Normalize,data
3. 数据分割
打乱数据,并随机选取70%数据为train集,30%数据为test集。
;选取70作为train集,30作为test集
IDLmlShuffle,data,scores;打乱数据
part=IDLmlPartition({train:70,test:30},data,scores)
4. 模型训练
利用train集构建模型。
nAttributes=6
Model = IDLmlSupportVectorMachineRegression(nAttributes,KERNEL=IDLmlSVMRadialKernel)
loss = Model.Train(part.train.data, SCORES=part.train.scores)
;===================================================训练集结果验证=======================================
train_results = Model.Evaluate(part.train.data,SCORES=part.train.scores,LOSS=loss)
train_results=reform(train_results)
train_scores=part.train.scores
5. 测试模型精度
MAE = mean(abs(train_scores - train_results))
RMSE = sqrt(mean((train_scores-train_results)^2))
BIAS=MEAN(train_results-train_scores)
R_fenzi=(TOTAL( (train_results-mean(train_results)) * (train_scores-mean(train_scores)) ))^2
R_fenmu=(TOTAL((train_results-mean(train_results))^2))*(total((train_scores-mean(train_scores))^2))
R=sqrt(R_fenzi/R_fenmu)
PRINT,'RMSE='+STRTRIM(STRING(RMSE),2)
PRINT,'MAE='+STRTRIM(STRING(MAE),2)
PRINT,'BIAS='+STRTRIM(STRING(BIAS),2)
PRINT,'R='+STRTRIM(STRING(R),2)
PRINT,'LOSS='+STRTRIM(STRING(loss),2)
6. 用test集验证模型
与train的相似,但是将上述用train训练的模型应用在test上
test_results = Model.Evaluate(part.test.data,SCORES=part.test.scores,LOSS=loss)
test_results=reform(test_results)
test_scores=part.test.scores
MAE = mean(abs(test_scores - test_results))
RMSE = sqrt(mean((test_scores-test_results)^2))
BIAS=MEAN(test_results-test_scores)
R_fenzi=(TOTAL( (test_results-mean(test_results)) * (test_scores-mean(test_scores)) ))^2
R_fenmu=(TOTAL((test_results-mean(test_results))^2))*(total((test_scores-mean(test_scores))^2))
R=sqrt(R_fenzi/R_fenmu)
PRINT,'RMSE='+STRTRIM(STRING(RMSE),2)
PRINT,'MAE='+STRTRIM(STRING(MAE),2)
PRINT,'BIAS='+STRTRIM(STRING(BIAS),2)
PRINT,'R='+STRTRIM(STRING(R),2)
PRINT,'LOSS='+STRTRIM(STRING(loss),2)
7. 预测
到这里模型的构建就完成了,开始利用上述构建的模型进行预测。
;导入要预测的tif数据
AOD=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\AOD.tif')
RH=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\RH.tif')
WS=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\WS.tif')
TMP=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\TMP.tif')
PBL=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\PBL.tif')
PS=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\PS.tif',geotiff=geotiff)
sizes=(AOD.dim)[0]*(AOD.dim)[1]
day_188_data=Transpose([[Reform(AOD,sizes)],[Reform(RH,sizes)],[Reform(WS,sizes)],[Reform(TMP,sizes)],[Reform(PBL,sizes)],[Reform(PS,sizes)]]);
Normalizer1.Normalize,day_188_data
day_188_data_results = Model.Evaluate(day_188_data)
MASK=(AOD EQ -9999)*0+(AOD NE -9999)*1
SVM_result=reform(day_188_data_results,(AOD.dim)[0],(AOD.dim)[1])
SVM_result=SVM_result*MASK/MASK
outfile='C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\FFNN_PM2.5_regression.tif'
WRITE_TIFF,outfile,SVM_result,/FLOAT,/SIGNED,GEOTIFF=GEOTIFF,COMPRESSION=1
8. 完整代码
PRO MachineLearning_regression_course
COMPILE_OPT IDL2
ENVI,/restore_base_save_files
ENVI_BATCH_INIT,NO_STATUS_WINDOW=1
StartTime=SYSTIME(1)
;====================训练数据集==============================
file = read_csv('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\data.csv')
result = where(file.field05 ne -9999, count);返回了不是-9999的索引,一共count个
;将六个变量读进来,转置成一个6列的数组
x = Transpose([[file.field05], [file.field06], [file.field07],[file.field08],[file.field09],[file.field10]])
y = file.field04;读取训练集中的回归值
;新建数组,将A0D ne -9999的有效数据行存储
data=Fltarr(6,count)
scores=Fltarr(count)
FOR i=0,count-1,1 DO BEGIN
num=result[i]
data[*,i]=x[*,num]
scores[i]=y[num]
ENDFOR
;归一化
Normalizer1 = IDLmlVarianceNormalizer(data)
Normalizer1.Normalize,data
;选取70作为训练集,30作为验证集
IDLmlShuffle,data,scores;打乱数据
part=IDLmlPartition({train:70,test:30},data,scores)
nAttributes=6
Model = IDLmlSupportVectorMachineRegression(nAttributes,KERNEL=IDLmlSVMRadialKernel)
loss = Model.Train(part.train.data, SCORES=part.train.scores)
;==========================训练集结果验证============
train_results = Model.Evaluate(part.train.data,SCORES=part.train.scores)
train_scores=part.train.scores
MAE = mean(abs(train_scores - train_results))
RMSE = sqrt(mean((train_scores-train_results)^2))
BIAS=MEAN(train_results-train_scores)
R_fenzi=(TOTAL( (train_results-mean(train_results)) * (train_scores-mean(train_scores)) ))^2
R_fenmu=(TOTAL((train_results-mean(train_results))^2))*(total((train_scores-mean(train_scores))^2))
R=sqrt(R_fenzi/R_fenmu)
PRINT,'RMSE='+STRTRIM(STRING(RMSE),2)
PRINT,'MAE='+STRTRIM(STRING(MAE),2)
PRINT,'BIAS='+STRTRIM(STRING(BIAS),2)
PRINT,'R='+STRTRIM(STRING(R),2)
file_svm_link = 'C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\train_results.txt'
OPENW, lun, file_svm_link,/get_lun
PRINTF, lun, train_results
FREE_LUN, lun
file_svm_link = 'C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\train_scores.txt'
OPENW, lun, file_svm_link,/get_lun
PRINTF, lun, train_scores
FREE_LUN, lun
;=======================================================================
;===========================测试集结果验证=================================
test_results = Model.Evaluate(part.test.data,SCORES=part.test.scores)
test_scores=part.test.scores
MAE = mean(abs(test_scores - test_results))
RMSE = sqrt(mean((test_scores-test_results)^2))
BIAS=MEAN(test_results-test_scores)
R_fenzi=(TOTAL( (test_results-mean(test_results)) * (test_scores-mean(test_scores)) ))^2
R_fenmu=(TOTAL((test_results-mean(test_results))^2))*(total((test_scores-mean(test_scores))^2))
R=sqrt(R_fenzi/R_fenmu)
PRINT,'RMSE='+STRTRIM(STRING(RMSE),2)
PRINT,'MAE='+STRTRIM(STRING(MAE),2)
PRINT,'BIAS='+STRTRIM(STRING(BIAS),2)
PRINT,'R='+STRTRIM(STRING(R),2)
file_svm_link = 'C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\test_results.txt'
OPENW, lun, file_svm_link,/get_lun
PRINTF, lun, test_results
FREE_LUN, lun
file_svm_link = 'C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\test_scores.txt'
OPENW, lun, file_svm_link,/get_lun
PRINTF, lun, test_scores
FREE_LUN, lun
;
;==========================导入要预测的tif数据=============================
AOD=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\AOD.tif')
RH=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\RH.tif')
WS=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\WS.tif')
TMP=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\TMP.tif')
PBL=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\PBL.tif')
PS=read_tiff('C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\PS.tif',geotiff=geotiff)
sizes=(AOD.dim)[0]*(AOD.dim)[1]
day_188_data=Transpose([[Reform(AOD,sizes)],[Reform(RH,sizes)],[Reform(WS,sizes)],[Reform(TMP,sizes)],[Reform(PBL,sizes)],[Reform(PS,sizes)]]);
Normalizer1.Normalize,day_188_data
day_188_data_results = Model.Evaluate(day_188_data)
MASK=(AOD EQ -9999)*0+(AOD NE -9999)*1
SVM_result=reform(day_188_data_results,(AOD.dim)[0],(AOD.dim)[1])
SVM_result=SVM_result*MASK/MASK
outfile='C:\Users\user\Desktop\机器学习\ex03\ex03\reg_data\188day\PM2.5_regression.tif'
WRITE_TIFF,outfile,SVM_result,/FLOAT,/SIGNED,GEOTIFF=GEOTIFF,COMPRESSION=1
EndTime=SYSTIME(/seconds)
PRINT, '统计时间:', (EndTime-StartTime),'s'
print,'finish'
END