2021-03-11 python 获取期刊名和ISSN

import requests
from bs4 import BeautifulSoup
import requests
import re
import json
import pandas as pd
import xlwt
from tkinter.filedialog import askopenfilename
path_and_name = askopenfilename(title='Two column data : title and ISSN', filetypes=[('CSV', '*.csv')],
                                 initialdir='D:\\')
data = pd.read_csv(path_and_name)
data=data.iloc[:,0]

mark= []
workbook = xlwt.Workbook(encoding = 'utf-8')
worksheet = workbook.add_sheet('My Worksheet',cell_overwrite_ok=True)
worksheet.write(0, 0, 'Journal')
worksheet.write(0, 1, 'ISSN')


for ii in range(len(data)):
    journal= data[ii].replace("&", "26%")
    url='https://www.iikx.com/e/action/ListInfo.php?title='+journal+'&classid=125%2C124%2C126%2C127%2C128%2C129%2C130%2C131%2C132%2C133%2C134%2C135%2C123%2C136&jcr21=&orderby=&ph=1&jcr21Selected='
    req=requests.get(url)
    print(req.status_code)
    req.encoding='utf-8'
    html=req.text
    Soup = BeautifulSoup(html,'lxml')
    r = re.findall('<td>(.*?)</td>', req.text)
    #print(r)
    if r!=[]:
        ISSN=r[1]
        mark.append(ISSN)
        worksheet.write(ii+1, 0, journal)
        worksheet.write(ii+1, 1, ISSN)
    else:
        ISSN="non"
        mark.append(ISSN)
        worksheet.write(ii + 1, 0, journal)
        worksheet.write(ii + 1, 1, ISSN)
print(mark)
workbook.save('D:\juurnal_and_ISSN.xls')

猜你喜欢

转载自blog.csdn.net/you_us/article/details/114681521
今日推荐