转换NOAA天气数据文件“ .fly”为Pandas DataFrame

 获取数据 ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily

In [1]:

import  matplotlib.pyplot  as  plt 
import  pandas  as  pd 
import  numpy  as  np 
import  re 
import  ftplib

% matplotlib notebook

In [2]:

# download data from FTP

def  download_file_from_ftp ( FTP_SERVER , FTP_PATH , FILENAME ): 
    with  ftplib . FTP ( FTP_SERVER )  as  ftp : 
        ftp . login () 
        ftp . cwd ( FTP_PATH ) 
        with  open ( FILENAME ,  'wb' )  as  f : 
            ftp . retrbinary ( 'RETR '  +  FILENAME ,  f . write )

查询站ID

In [3]:

def  get_station_ID ( station_to_find ,  filename ): 
    for  line  in  open ( filename ): 
        if  station_to_find  in  line : 
            line_with_station = line 
            station_ID = re . split ( " " , line_with_station )[ 0 ] 
            return  station_ID 
    return  None 
# warning, it is slow, download it only once 
download_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily" ,  "ghcnd-stations.txt" )

station_to_find = "GUANGZHOU"  # USE CAPS 
station_ID = get_station_ID ( station_to_find ,  "ghcnd-stations.txt" )

下载天气数据

In [4]:

weather_data_filename = station_ID + '.dly'

# warning, it is slow, download it only once 
download_file_from_ftp ( "ftp.ncdc.noaa.gov" ,  "/pub/data/ghcn/daily/all" ,  weather_data_filename )

将.fly转换为pandas Dataframe

In [7]:

 

df = convert_dly_to_dataframe ( weather_data_filename ) 
df . head ()

Out[7]:

  YEAR MONTH ELEMENT VALUE1 VALUE2 VALUE3 VALUE4 VALUE5 VALUE6 VALUE7 ... VALUE22 VALUE23 VALUE24 VALUE25 VALUE26 VALUE27 VALUE28 VALUE29 VALUE30 VALUE31
0 1945 11 TAVG NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN 107.0 NaN
1 1945 12 TAVG 123.0 136.0 152.0 144.0 146.0 189.0 219.0 ... 179.0 146.0 128.0 107.0 104.0 112.0 122.0 127.0 129.0 156.0
2 1946 1 TAVG 150.0 150.0 123.0 117.0 112.0 121.0 125.0 ... 146.0 153.0 173.0 196.0 211.0 212.0 218.0 201.0 156.0 131.0
3 1946 2 TAVG 114.0 112.0 147.0 181.0 195.0 192.0 149.0 ... 201.0 196.0 231.0 226.0 221.0 229.0 240.0 NaN NaN NaN
4 1946 3 TAVG 237.0 162.0 142.0 133.0 183.0 187.0 160.0 ... 183.0 192.0 205.0 216.0 223.0 238.0 207.0 195.0 233.0 228.0