extract things from text files

#coding=utf-8
#!/usr/bin/python
import os
from glob import glob
import re

fo = open("foo.csv", "a+")

def isDigit(my_str):
try:
float(my_str)
except ValueError:
return False
return True

def china(file_name, onlyname):
print(file_name)
file = open(file_name)
ratio=[]
price=[]
d3=[]
d6=[]
for line in file:
l1=line.split("\t")
if len(l1)>14 :
if isDigit(l1[3]):
print(l1[3])
d3.append(float(l1[3]))
if isDigit(l1[5]):
print(l1[5])
d6.append(float(l1[5]))
if isDigit(l1[7]):
print(l1[7])
ratio.append(float(l1[7]))
if isDigit(l1[16]):
print(l1[16])
price.append(float(l1[16]))
nd3=0
for f1 in d3:
if f1>=13:
nd3 += 1
print("nd3:"+str(nd3))
nd6=0
for f6 in d3:
if f6>=16:
nd6 += 1
print("nd6:"+str(nd6))
r=[0 for i in range(16)]
for r1 in ratio:
if r1>=9.9:
r[0] += 1
if r1<=-9.9:
r[1] += 1
if r1>=8:
r[2] += 1
if r1<=-8:
r[3] += 1
if r1>=7:
r[4] += 1
if r1<=-7:
r[5] += 1
if r1>=6:
r[6] += 1
if r1<=-6:
r[7] += 1
if r1>=5:
r[8] += 1
if r1<=-5:
r[9] += 1
if r1>=4:
r[10] += 1
if r1<=-4:
r[11] += 1
if r1>=3:
r[12] += 1
if r1<=-3:
r[13] += 1
if r1>=2:
r[14] += 1
if r1<=-2:
r[15] += 1
print(r)
print("price all:"+str(sum(price)))
# price.sort()
# flog = open("2.log","w")
##print(price)
# print >> flog,price;
# flog.close
fo = open("foo.csv", "a")
fo.write(onlyname+','+str(nd3)+','+str(nd6)+','+str(r[0])+','+str(r[1])+','+str(r[2])+','+str(r[3])+',,,,,'+str(r[4])+','+str(r[5])+','+str(r[6])+','+str(r[7])+','+str(r[8])+','+str(r[9])+','+str(r[10])+','+str(r[11])+','+str(r[12])+','+str(r[13])+','+str(r[14])+','+str(r[15])+','+str(sum(price))+'\n')
fo.close()

def hk(file_name, onlyname):
#there will be counting error with names such as YUGANG INT'L and VALE COMMON-DRS
#2916 BRK_A 伯克希尔哈撒韦-A213330.00-0.13
print(file_name)
file = open(file_name)
ratio=[]
price=[]
issuename=[]
for line in file:
l1=line.split("\t")
if len(l1)>5 :
m=re.findall('\d+\.\d+',l1[2])
if m:
if len(m)>1:
m1=re.findall('-\d+\.\d+',l1[2])
if m1:
ratio.append(float(m1[0]))
else:
ratio.append(float(m[1]))

else:
issuename.append(l1[2][len(l1[2])-1])
issuename.append(l1[2])
print(l1[2])
if isDigit(l1[3]):
print(l1[3])
ratio.append(float(l1[3]))
#m=re.findall('\d+\.\d+',l1[2])
if isDigit(m[0]):
print(m[0])
price.append(float(m[0]))

else:
if isDigit(l1[4]):
print(l1[4])
ratio.append(float(l1[4]))
if isDigit(l1[3]):
print(l1[3])
price.append(float(l1[3]))
r=[0 for i in range(16)]
r8=[0 for i in range(40)]
for r1 in ratio:
if r1>=9.9:
r[0] += 1
if r1<=-9.9:
r[1] += 1
if r1>=8:
r[2] += 1
if r1<=-8:
r[3] += 1
r8.append(r1)
if r1>=7:
r[4] += 1
if r1<=-7:
r[5] += 1
if r1>=6:
r[6] += 1
if r1<=-6:
r[7] += 1
if r1>=5:
r[8] += 1
if r1<=-5:
r[9] += 1
if r1>=4:
r[10] += 1
if r1<=-4:
r[11] += 1
if r1>=3:
r[12] += 1
if r1<=-3:
r[13] += 1
if r1>=2:
r[14] += 1
if r1<=-2:
r[15] += 1
print(r)
print("price all:"+str(sum(price)))
r8.sort()
print(r8)
#print(issuename)
fo = open("foo.csv", "a")
fo.write(onlyname+','+str(r[0])+','+str(r[1])+','+str(r[2])+','+str(r[3])+',,,,,'+str(r[4])+','+str(r[5])+','+str(r[6])+','+str(r[7])+','+str(r[8])+','+str(r[9])+','+str(r[10])+','+str(r[11])+','+str(r[12])+','+str(r[13])+','+str(r[14])+','+str(r[15])+','+str(sum(price))+'\n')
fo.close()

def printSeparator(func):
def deco(path):
print("call method %s, result is:" % func.__name__)
print("-" * 40)
func(path)
print("=" * 40)
return deco
@printSeparator
def traverseDirByShell(path):
for f in os.popen('ls ' + path):
print f.strip()
@printSeparator
def traverseDirByGlob(path):
path = os.path.expanduser(path)
for f in glob(path + '/*'):
print f.strip()
@printSeparator
def traverseDirByListdir(path):
path = os.path.expanduser(path)
for f in os.listdir(path):
print f.strip()
if os.path.isdir(f.strip()):
for f1 in os.listdir(f.strip()):
print f1.strip()
print f1
@printSeparator
def traverseDirByOSWalk(path):
fo = open("foo.csv", "r")
path = os.path.expanduser(path)
all_the_text = fo.read( )
fo.close()
#t='时间,用户,最大出境流速(Mbps),最大入境流速(Mbps),最大总流速(Mbps),峰值95'+'\n'
#fo.writelines(t)
for (dirname, subdir, subfile) in os.walk(path):
#print('dirname is %s, subdir is %s, subfile is %s' % (dirname, subdir, subfile))
print('[' + dirname + ']')
for f in subfile:
print(dirname)
#print(dirname.decode('utf8')[0:5].encode('utf8'))
l1=dirname.split('\\')
l2=l1[len(l1)-1]
print(l2)
if f.lower().endswith('.txt'):
if all_the_text.count(f)==0:
print(f+' not counted')
if f[6]=='.':
print(f+ " is A file")
china(os.path.join(dirname, f), f)
if f[6].lower()=='h':
print(f+ " is hongkong file")
hk(os.path.join(dirname, f), f)
if f[6].lower()=='u':
print(f+ " is usa file")
hk(os.path.join(dirname, f), f)
# print(os.path.join(dirname, f))
# file = open(os.path.join(dirname, f))
# file.readline()
# file.readline()

# l3=file.readline()
# print(l3)
# l4=l3.split(',')
# print(l4[2]+',')
# l5=file.readline()
# l6=l5.split(',')
# print(l2+','+l4[0]+','+l4[2]+','+l4[3]+','+l4[4]+','+l6[2])
# fo.writelines(l2+','+l4[0]+','+l4[2]+','+l4[3]+','+l4[4]+','+l6[2]+'\n')
if __name__ == '__main__':
path = os.getcwd()
# traverseDirByGlob(path)
# traverseDirByGlob(path)
# traverseDirByListdir(path)
traverseDirByOSWalk(path)

extract things from text files

猜你喜欢