代码记录:Change vic format file to standar file. 语言:python IDE;pycharm
"""""
==============================================================
get annual time series form vic format file
notice: format should be year month day prcp or single precp
2018.5.29 hjh ! Sep should be checked unix or pc? !
==============================================================
"""
import pandas as pd
import os
import time
start = time.time()
path = 'E:/Grid_pre_row/chirps/day/' # file get in
out_file = 'F:/Syr/grid_pre_annual/chirps_result' # outfile_name
out_file2 = 'G:/P/chirps/chirps.csv' # standard outfile
start_year = 1981 # user input
end_year = 2016
files = [f for f in os.listdir(path) if f.startswith('data')] # get a list of files
que = input('your input file format is standard? Y or N: ')
for file in files: # read file one by one
reader = pd.read_table(path+file, header=None, sep='\s+')
if que == 'Y':
reader.columns = ['year', 'month', 'day', 'prep']
else:
reader.columns = ['prep']
x = pd.to_datetime(pd.date_range(str(start_year), str(end_year)+'-12-31'))
reader.insert(0, 'ymd', x)
reader['year'] = reader['ymd'].dt.year
reader['month'] = reader['ymd'].dt.month
reader['day'] = reader['ymd'].dt.day
del reader['ymd']
reader.reindex(columns=['year', 'month', 'day', 'prep'])
reader_group = reader.groupby(['year', 'month'])['prep'].sum().reset_index() # group file
lat = float(file.split('_')[1]) # get file lat
lon = float(file.split('_')[2]) # get file lon
ll = [lat, lon] # lat lon
i = 0 # month slice start loc
while True:
month_value = reader_group.iloc[i:i+12, 2] # 1-12 prcp, iloc is better
year_index = reader_group.iloc[i:i+1, 0] # strange.....
reader_row = ll+list(year_index)+list(month_value)
i = i+12
with open(out_file, 'at') as f:
print(*reader_row, sep=' ', file=f) # *out put many objects
if i >= (end_year-start_year+1)*12-1: # last index in this file
break
print('reading'+' '+str(file))
final_file = pd.read_table(out_file, header=None, sep='\s+') # read result to Dataframe
column_names = ['lat', 'lon', 'year', 'one', 'two', 'three', 'four', 'five',
'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'december']
final_file.columns = [column_names] # give name to result
final_file = pd.DataFrame(final_file.apply(lambda x: pd.to_numeric(x, errors='ignore')))
'''A B C D represents (spring summer autumn winter)''' # why must add values!
final_file['A'] = final_file['three'].values+final_file['four'].values+final_file['five'].values
final_file['B'] = final_file['six'].values+final_file['seven'].values+final_file['eight'].values
final_file['C'] = final_file['nine'].values+final_file['ten'].values+final_file['eleven'].values
final_file['D'] = final_file['one'].values+final_file['two'].values+final_file['december'].values
final_file['annual'] = final_file['A'].values + final_file['B'].values + final_file['C'].values + final_file['D'].values
final_file.to_csv(out_file2, index=None)
end = time.time()
a = end-start
print('programme run '+str(a)+' s') # 600 S 51-07
把ascii格式的文本呢,转成一个csv文件,便于画图和分析数据等。