netCDF文件内容提取

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/bnxf00000/article/details/54944797

      很久之前写的一个小程序,当初要提取大量netCDF文件中的部分数据。所以写了一个小程序来进行自动化处理。

      ncBrowse可以用来查看netCDF文件的中的字段和信息。

      我写的程序用到了netCDF4这个包,具体源码如下:

 
 
#!/usr/bin env python
#-*- coding:utf-8 -*-
# __author__ = 'ShadonSniper'
from netCDF4 import Dataset
import  os
import argparse
from threading import *
import time
 
 
"""
TEMP_FLAG = False
TEMP_ADJUSTED_FLAG = False
PSAL_FLAG = False
PSAL_ADJUSTED_FLAG = False
PRES_FLAG = False
PRES_ADJUSTED_FLAG=False
CYCLE_NUMBER_FLAG = False
PROJECT_NAME_FLAG = False
DATA_MODE_FLAG = False
LATITUDE_FLAG = False
LONGITUDE_FLAG = False
PI_NAME_FLAG = False
POSITIONING_SYSTEM_FLAG = False
"""
def nc2txt(nc_file): #nc_file is your nc file path
    TEMP_FLAG = False
    TEMP_ADJUSTED_FLAG = False
    PSAL_FLAG = False
    PSAL_ADJUSTED_FLAG = False
    PRES_FLAG = False
    PRES_ADJUSTED_FLAG = False
    CYCLE_NUMBER_FLAG = False
    PROJECT_NAME_FLAG = False
    DATA_MODE_FLAG = False
    LATITUDE_FLAG = False
    LONGITUDE_FLAG = False
    PI_NAME_FLAG = False
    POSITIONING_SYSTEM_FLAG = False
    print '[+] File: %s Processing!'%(nc_file)
    nc_fid = Dataset(nc_file, 'r')
    for attr in nc_fid.variables:
        #print attr
        if attr == 'TEMP': TEMP_FLAG = True
        elif attr == 'TEMP_ADJUSTED': TEMP_ADJUSTED_FLAG = True
        elif attr == 'PSAL': PSAL_FLAG=True
        elif attr == 'PSAL_ADJUSTED': PSAL_ADJUSTED_FLAG = True
        elif attr == 'PRES': PRES_FLAG = True
        elif attr == 'PRES_ADJUSTED': PRES_ADJUSTED_FLAG = True
        elif attr == 'CYCLE_NUMBER': CYCLE_NUMBER_FLAG = True
        elif attr =='PROJECT_NAME': PROJECT_NAME_FLAG = True
        elif attr =='DATA_MODE': DATA_MODE_FLAG = True
        elif attr == 'LATITUDE': LATITUDE_FLAG = True
        elif attr == 'LONGITUDE': LONGITUDE_FLAG = True
        elif attr == 'PI_NAME': PI_NAME_FLAG = True
        elif attr == 'POSITIONING_SYSTEM': POSITIONING_SYSTEM_FLAG = True
        else: pass
 
 
    PLATFORM_NUMBER=nc_fid.variables['PLATFORM_NUMBER'][:]
    attribute_tsize=nc_fid.variables['PLATFORM_NUMBER'][:].size
    attribute_size = nc_fid.variables['PLATFORM_NUMBER'][0].size
    attribute_sum = attribute_tsize/attribute_size
    if CYCLE_NUMBER_FLAG:
        CYCLE_NUMBER=nc_fid.variables['CYCLE_NUMBER'][:]
    DATE_CREATION = ''.join(nc_fid.variables['DATE_CREATION'][:])+ \
                            '('+nc_fid.variables['DATE_CREATION'].conventions+')'
    if PROJECT_NAME_FLAG:
        PROJECT_NAME=nc_fid.variables['PROJECT_NAME'][:]
    if PI_NAME_FLAG:
        PI_NAME=nc_fid.variables['PI_NAME'][:]
    if DATA_MODE_FLAG:
        DATA_MODE=''.join(nc_fid.variables['DATA_MODE'][:])
    if POSITIONING_SYSTEM_FLAG:
        POSITIONING_SYSTEM=nc_fid.variables['POSITIONING_SYSTEM'][:]
    if LATITUDE_FLAG:
        LATITUDE = nc_fid.variables['LATITUDE'][:]
    if LONGITUDE_FLAG:
        LONGITUDE = nc_fid.variables['LONGITUDE'][:]
    if TEMP_FLAG:
        TEMP = nc_fid.variables['TEMP'][:]
        TEMP_LEN = nc_fid.variables['TEMP'][0].size
    if TEMP_ADJUSTED_FLAG:
        TEMP_ADJUSTED=nc_fid.variables['TEMP_ADJUSTED'][:]
    if PSAL_FLAG:
        PSAL = nc_fid.variables['PSAL'][:]
    if PSAL_ADJUSTED_FLAG:
        PSAL_ADJUSTED=nc_fid.variables['PSAL_ADJUSTED'][:]
    if PRES_FLAG:
        PRES=nc_fid.variables['PRES'][:]
    if PRES_ADJUSTED_FLAG:
        PRES_ADJUSTED=nc_fid.variables['PRES_ADJUSTED'][:]
    nc_name = os.path.splitext(os.path.basename(nc_file))[0]
    nc_dir = os.path.dirname(nc_file)
    txt_name = nc_dir+'/'+nc_name+'.txt'
    txt_fp = open(txt_name,'a')
    txt_fp.write('**********************************************************************************HEADER**********************************************************************************\n')
    txt_fp.write('PLATFORM_NUMBER: ')
    txt_fp.write('\n')
    FormatOutput_Str(txt_fp,PLATFORM_NUMBER,10,attribute_sum)
    txt_fp.write('\n')
    if CYCLE_NUMBER_FLAG:
        txt_fp.write('CYCLE_NUMBER: ')
        txt_fp.write('\n')
        FormatOutput_Num(txt_fp,CYCLE_NUMBER,10,attribute_sum)
        txt_fp.write('\n')
    txt_fp.write('DATE_CREATION: ')
    txt_fp.write(DATE_CREATION)
    txt_fp.write('\n')
    if PROJECT_NAME_FLAG:
        txt_fp.write('PROJECT_NAME: ')
        txt_fp.write('\n')
        FormatOutput_Str(txt_fp,PROJECT_NAME,10,attribute_sum)
        txt_fp.write('\n')
    if PI_NAME_FLAG:
        txt_fp.write('PI_NAME: ')
        txt_fp.write('\n')
        FormatOutput_Str(txt_fp,PI_NAME,10,attribute_sum)
        txt_fp.write('\n')
    if DATA_MODE_FLAG:
        txt_fp.write('DATA_MODE: ')
        txt_fp.write('\n')
        txt_fp.write(DATA_MODE)
        txt_fp.write('\n')
    if POSITIONING_SYSTEM_FLAG:
        txt_fp.write('POSITIONING_SYSTEM: ')
        txt_fp.write('\n')
        FormatOutput_Str(txt_fp,POSITIONING_SYSTEM,10,attribute_sum)
        txt_fp.write('\n')
    txt_fp.write('**********************************************************************************LOCATION*********************************************************************************\n')
    if LATITUDE_FLAG:
        txt_fp.write('LATITUDE: ')
        txt_fp.write('\n')
        FormatOutput_Num(txt_fp,LATITUDE,10,attribute_sum)
        txt_fp.write('\n')
    if LONGITUDE_FLAG:
        txt_fp.write('LONGITUDE: ')
        txt_fp.write('\n')
        FormatOutput_Num(txt_fp,LONGITUDE,10,attribute_sum)
        txt_fp.write('\n')
    txt_fp.write('************************************************************************************DATA***********************************************************************************\n')
    #TEMP related value print
    if TEMP_FLAG:
        print >> txt_fp,"TEMP= "
        #print >> txt_fp,'\n'
        FormatOutput(nc_fid,txt_fp,"TEMP",10,attribute_sum)
        txt_fp.write('\n')
    if TEMP_ADJUSTED_FLAG:
        print >> txt_fp,"TEMP_ADJUSTED="
        #print >> txt_fp,'\n'
        FormatOutput(nc_fid,txt_fp,"TEMP_ADJUSTED",10,attribute_sum)
        txt_fp.write('\n')
    if PSAL_FLAG:
        print >> txt_fp,"PSAL="
        #print >> txt_fp,'\n'
        FormatOutput(nc_fid,txt_fp,"PSAL",10,attribute_sum)
        txt_fp.write('\n')
    if PSAL_ADJUSTED_FLAG:
        print >> txt_fp,"PSAL_ADJUSTED="
        #print >> txt_fp,'\n'
        FormatOutput(nc_fid,txt_fp,"PSAL_ADJUSTED",10,attribute_sum)
        txt_fp.write('\n')
    if PRES_FLAG:
        print >> txt_fp,"PRES="
        #print >> txt_fp,'\n'
        FormatOutput(nc_fid,txt_fp,"PRES",10,attribute_sum)
        txt_fp.write('\n')
    if PRES_ADJUSTED_FLAG:
        print >> txt_fp,"PRES_ADJUSTED="
        #print >> txt_fp,'\n'
        FormatOutput(nc_fid,txt_fp,"PRES_ADJUSTED",10,attribute_sum)
    nc_fid.close()
    txt_fp.close()
    print '[+] File: %s Process Completely!'%(nc_file)
 
 
 
 
#Traversal nc file
def GetFileFromThisRootDir(dir):
    allfiles = []
    ext='nc'
    for root,dirs,files in os.walk(dir):
        for filespath in files:
            filepath = os.path.join(root, filespath)
            extension = os.path.splitext(filepath)[1][1:]
            if extension in ext:
                allfiles.append(filepath)
    return allfiles
 
 
def FormatOutput(nc,fp,attribute,interval,sum):
    len = nc.variables[attribute][0].size
    for i in range(0,sum):
        if i!=0:
            #print >> fp,'\n\n',
            fp.write('\n\n')
        for j in range(0,len):
            #print  >> fp,nc.variables[attribute][i][j],
            #print >> fp,'\t\t',
            if j%interval==0 and j!=0:
                #print fp,'\n'
                fp.write('\n')
            fp.write(str(nc.variables[attribute][i][j]))
            fp.write('\t\t')
 
 
 
 
def FormatOutput_Num(fp,attribute_list,interval,sum):
    for i in range(0,sum):
        #print >> fp,attribute_list[i],
        #print >> fp,'\t\t',
        if i%interval==0 and i!=0:
            #print >> fp,'\n'
            fp.write('\n')
        fp.write(str(attribute_list[i]))
        fp.write('\t\t')
 
 
 
 
def FormatOutput_Str(fp,attribute_list,interval,sum):
    for i in range(0,sum):
        result = ''.join(attribute_list[i][:].data)
        #print >> fp,result,
        #print >> fp,'\t\t',
        if i%interval==0 and i!=0:
            #print >> fp,'\n'
            fp.write('\n')
        fp.write(result)
        fp.write('\t\t')
 
 
 
 
if __name__ =='__main__':
    start = time.time()
    parser = argparse.ArgumentParser(description='Process nc file')
    parser.add_argument('--nc_file',type=str,dest='nc_file',required=True,help='nc file path')
    parser.add_argument('--threads',type=int,dest='threads',default=1,help='set thread number,default is 1')
    args = parser.parse_args()
    nc_path =args.nc_file
    threads = args.threads
    nc_list=[]
    nc_list=GetFileFromThisRootDir(nc_path)
    nc_total=len(nc_list)
 
 
    # multithreading process nc file
    quotinet = nc_total/threads
    reminder = nc_total%threads
    all_threads=[]
    if quotinet==0:
        for nc_file in nc_list:
            t = Thread(target=nc2txt,args=(nc_file,))
            t.start()
            all_threads.append(t)
        #t.join()
    elif quotinet==nc_total:
        for nc_file in nc_list:
            #print repr(nc_file)
            nc2txt(nc_file)
    else:
        for k in range(0,quotinet):
            for i in range(0,threads):
                t = Thread(target=nc2txt,args=(nc_list[i+k*threads],))
                t.start()
                all_threads.append(t)
            #t.join()
        if reminder==0: pass
        else:
            for i in range(0,reminder):
                t = Thread(target=nc2txt,args=(nc_list[quotinet*threads+i],))
                t.start()
                all_threads.append(t)
    for t in all_threads:
        t.join()
            #t.join()
    print "Elapsed Time: %s"%(time.time()-start)
    print 'All Process Completely!'
 
 

      需要的朋友可参考修改。

猜你喜欢

转载自blog.csdn.net/bnxf00000/article/details/54944797