对气象文件的数据提取与处理,包含.nc文件与.csv文件

import netCDF4 as nc
import sys
import glob
import csv

#读取t文件夹下的nc文件的北京温度值  t 气温

def t_getcsv(path):
    sites1_t_data = glob.glob(path + 'data/t2015/*.nc')
    sites1_t_data.sort(key=lambda x: float(x[-14:-3]))
    print('t',sites1_t_data, len(sites1_t_data))
    beij1_t = []
    for i in sites1_t_data:
        nc_obj = nc.Dataset(i)
        lev = list(nc_obj.variables['level'][:])
        print(lev)
        UU = nc_obj.variables['lon'][151:160]
        NN = nc_obj.variables['lat'][63:72]
        #print(UU,'\n',NN)

        lev700 = lev.index(700)
        lev850 = lev.index(850)
        lev925 = lev.index(925)

        for c in [lev700, lev850, lev925]:
            cc = []
            hour = int(i[-5:-3])+7
            if hour > 24:
                q = sites1_t_data.index(i)
                q1 = i
                if q < len(sites1_t_data)-1:
                    q1 = sites1_t_data[q + 1]

                cc.append(q1[-14:-5]+'01')
            elif hour ==7:
                cc.append(i[-14:-5] +'07')
            else:
                cc.append(i[-14:-5] + str(hour))

            c = int(c)
            cc.append(lev[c])
            for m in range(151, 161, 1):
                #mm = nc_obj.variables['lon'][m]
                m = int(m)
                for n in range(63, 73, 1):
                    # nn = nc_obj.variables['lat'][n]
                    n = int(n)
                    # cc.extend([m, n])

                    # print(m,n,c)
                    hh = nc_obj.variables['t'][c][n][m]
                    cc.append(hh)
            print('t','\n', cc,)
            beij1_t.append(cc)

            '''if c == lev700:
                data700.extend(cc[2:])
            elif c == lev850:
                data850.extend(cc[2:])
            else:
                data925.extend(cc[2:])'''
        nc_obj.close()
    with open('t100_getdata.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['time', 't10x10'])
        for row in beij1_t:
            writer.writerow(row)

    print('写入完成','\n')



#读取v文件夹下的nc文件的北京  v 南北风 北京155  67
def v_getcsv(path):
    sites1_v_data = glob.glob(path + 'data/v/v.ano.*.nc')
    sites1_v_data.sort(key=lambda x: float(x[-14:-3]))
    beij1_v = []

    for i in sites1_v_data:
        nc_obj = nc.Dataset(i)
        lev = list(nc_obj.variables['level'][:])
        UU = nc_obj.variables['lon'][151:167]
        NN = nc_obj.variables['lat'][66:68]
        #print(UU,NN)

        lev700 = lev.index(700)
        lev850 = lev.index(850)
        lev925 = lev.index(925)


        # 取10x10个坐标点的值,每个点在高度上取三个值,一行数据共100个值,一个文件取三行值
        for c in [lev700, lev850, lev925]:
            cc = []
            hour = int(i[-5:-3])+7
            if hour > 24:
                q = sites1_v_data.index(i)
                q1 = i
                if q < len(sites1_v_data)-1:
                    q1 = sites1_v_data[q + 1]

                cc.append(q1[-14:-5] +'01')
            elif hour ==7:
                cc.append(i[-14:-5] + '07')
            else:
                cc.append(i[-14:-5] + str(hour))

            c = int(c)
            cc.append(lev[c])
            for m in range(151, 161, 1):
                #mm = nc_obj.variables['lon'][m]
                m = int(m)
                for n in range(63, 73, 1):
                    # nn = nc_obj.variables['lat'][n]
                    n = int(n)
                    # cc.extend([m, n])

                    # print(m,n,c)
                    hh = nc_obj.variables['v'][c][n][m]
                    cc.append(hh)

            #print('v', i[-14:-3], '\n', cc, '\n',len(cc))
            beij1_v.append(cc)


        nc_obj.close()
    with open('v100_getdata.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(
            ['time', 'v10x10'])
        for row in beij1_v:
            writer.writerow(row)
        '''#print(beij1_v, len(beij1_v), '\n')
    with open('v700_getdata.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        for row in data700:
           writer.writerow([row])
    return data700,data850,data925'''


#读取z文件夹下的nc文件的北京  z  气压
def z_getcsv(path):
    sites1_z_data = glob.glob(path + 'data/z/z.ano.*.nc')
    sites1_z_data.sort(key=lambda x: float(x[-14:-3]))
    beij1_z = []
    for i in sites1_z_data:
        nc_obj = nc.Dataset(i)
        lev = list(nc_obj.variables['level'][:])

        lev700 = lev.index(700)
        lev850 = lev.index(850)
        lev925 = lev.index(925)


        for c in [lev700, lev850, lev925]:
            cc = []
            hour = int(i[-5:-3])+7
            if hour > 24:
                q = sites1_z_data.index(i)
                q1 = i
                if q < len(sites1_z_data)-1:
                    q1 = sites1_z_data[q + 1]

                cc.append(q1[-14:-5]+'01')
            elif hour ==7:
                cc.append(i[-14:-5] +'07')
            else:
                cc.append(i[-14:-5] + str(hour))

            c = int(c)
            cc.append(lev[c])
            for m in range(151, 161, 1):
                #mm = nc_obj.variables['lon'][m]
                m = int(m)
                for n in range(63, 73, 1):
                    # nn = nc_obj.variables['lat'][n]
                    n = int(n)
                    # cc.extend([m, n])

                    # print(m,n,c)
                    hh = nc_obj.variables['z'][c][n][m]
                    cc.append(hh)

            #print('z', i[-14:-3], '\n', cc, '\n',len(cc))
            beij1_z.append(cc)
        nc_obj.close()

    with open('z100_getdata.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(
            ['time', 'z10x10'])
        for row in beij1_z:
            writer.writerow(row)
    #print(beij1_z, len(beij1_z), '\n')


#读取站点1,1001A,万寿西宫,北京,116.366,39.8673,的空气质量数据sites1_data
def sites_getcsv(path):
    sites1_data = glob.glob(path + 'data/china_sites_2016/china_sites_*.csv')
    sites1_data.sort(key=lambda x: float(x[-12:-4]))
    # print(sites1_data, len(sites1_data))
    #beij1_sitedata = []
    pm_day = []
    for i in sites1_data:
        csvfile = open(i, encoding='utf-8')
        csvreader = csv.reader(csvfile)
        csvreader = list(csvreader)
        cc = []
        #cc.append(csvreader[i][-12:-4])
        for c in range(1, len(csvreader)):
            if csvreader[c][2] == 'PM2.5' and csvreader[c][3]:
                hh = [int(x) for x in csvreader[c][1:4:2]]
                cc.append(hh)
        #beij1_sitedata.append(cc)

        #print(beij1_sitedata,len(beij1_sitedata))

        # pm2.5缺失值填充
        #pm_day = []
        pm_hour = []

        pm_hour.append(i[-12:-4])
        # print(len(j))
        if len(cc) < 24:
            sum_pm = 0
            ll = []
            for i in cc:
                sum_pm += int(i[1])
                ll.append(int(i[0]))
            aver = round(sum_pm / len(cc))  # 均值四舍五入取整
            # print(aver)
            for i in range(24):
                if i not in ll:
                    cc.append([i, aver])
                else:
                    pass
            cc.sort(key=lambda x: int(x[0]))

        for x in cc:
            pm_hour.append(x[1])
        pm_day.append(pm_hour)


    index_pm = []
    with open('sites_getdata.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['time','PM2.5'])
        for row in pm_day:
            writer.writerow(row)
            #index_pm.extend(row[1:])

    '''
    i = 1
    with open('index_pm.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        for row in index_pm[7:]:
            writer.writerow([i,row])
            i = i + 1
    '''
    #return index_pm[7:]
#def get_lstm_csv():


def main():
    path = sys.path[0] + '/'
    #print(path)
    #t_getcsv(path)
    #v_getcsv(path)
    #z_getcsv(path)
    #sites_getcsv(path)

    file = path + 'data/v100_2015f_halfyear.csv'
    file2 = path + 'data/v100_2015l_halfyear.csv'


    csvfile = open(file, encoding='utf-8')
    csvreader = csv.reader(csvfile)
    csvfile2 = open(file2, encoding='utf-8')
    csvreader2 = csv.reader(csvfile2)
    #print(csvreader2[:19])

    data700 = []
    data850 = []
    data925 = []
    data2700 =[]
    for ll in csvreader2:
        if ll[1]=='700':
            data2700.extend(ll[55:58])#该部分内容是将提取数据的网格变小,变成3x3
            data2700.extend(ll[65:68])
            data2700.extend(ll[75:78])
    for l in csvreader:
        if l[1] == '700':
            data700.extend(l[55:58])
            data700.extend(l[65:68])
            data700.extend(l[75:78])
        elif l[1]=='850':
            data850.extend(l[55:57])
        else:
            data925.extend(l[2:])

    i = 1
    with open('v_data850.csv', 'w', newline='') as f: #数据写入
        writer = csv.writer(f)
        for row in data850:
            writer.writerow([i,row])
            i = i + 1

    print(data2700)


if __name__ == '__main__':
    main()

猜你喜欢

转载自blog.csdn.net/u011537121/article/details/81543434
今日推荐