The beginning of training pandas for data processing

import urllib.request;
from pandas import DataFrame;
from pandas import Series;
from bs4 import BeautifulSoup;

response = urllib.request.urlopen('file:///F:/python/untitled1/core/do_data/2month.html');
html = response.read();
soup = BeautifulSoup(html,"html.parser")
trs = soup.find_all('tr')
ths = trs[0].find_all('th');

index_d = []
for th in ths:
    index_d.append(th.getText())
data = DataFrame(columns=index_d)
print(index_d)

for tr in trs :
    tds = tr.find_all('td')
    td_datas = []
    for td in tds:
        td_datas.append(td.getText())
    if len(td_datas) != 0:
        data=data.append(
            Series(
                td_datas,
                index=index_d
            ), ignore_index=True
        )

print(len(data))

str2s = []

for i in range(len(data["股票全码"])):
    str2 =str(data["股票全码"][i])
    str2 = str2.replace("SZ","0|")
    str2 = str2.replace("SH","1|")
    str2 = str2 + " |" + The Data [ " limit time " ] [i] + "  " + the Data [ " historical reasons limit " ] [i] + "  " + the Data [ " limit the reasons the election " ] [i] 
    str2s.append (str2) 

the Data [ " new new " ] = str2s 
Data = data.drop_duplicates (Subset = [ ' ticker ' ], = Keep ' Last ' , InPlace = False)
 Print (len (Data)) 
DF2 = Data [ " new new " ].
values
#print(type(df2))

file = open('data.txt', 'w')
file.writelines("\n".join(df2));
file.close()

 

Guess you like

Origin www.cnblogs.com/rongye/p/12466584.html