python IEEE OSA GOOGLE学术下载

用了QT5做了个简单的下载界面

import time
from selenium import webdriver
from  bs4 import BeautifulSoup
from PIL import Image
from pytesseract import *
from PIL import Image,ImageEnhance
import PIL.ImageOps
from optparse import OptionParser
import requests
import sys
from PyQt5 import QtWidgets
from PyQt5.QtWidgets import  QMessageBox
from PyQt5.QtGui import QPixmap

class GridLayout(QtWidgets.QWidget):
    def __init__(self):
        super().__init__()
        global browser1
        browser1 = webdriver.PhantomJS(executable_path="D:/Program Files (x86)/phantomjs/bin/phantomjs.exe")

        self.setWindowTitle("下载论文")

        wlayout = QtWidgets.QVBoxLayout()# 全局布局(1个):垂直

        hlayout1 = QtWidgets.QHBoxLayout()#局部布局
        hlayout2 = QtWidgets.QHBoxLayout()

        hlayout3 = QtWidgets.QHBoxLayout()
        hlayout4 = QtWidgets.QHBoxLayout()

        hlayout5 = QtWidgets.QHBoxLayout()
        hlayout6 = QtWidgets.QHBoxLayout()

        #osa
        self.button_osa = QtWidgets.QPushButton("开始")
        self.mylineedit_osa=QtWidgets.QLineEdit()
        hlayout1.addWidget(QtWidgets.QLabel("OSA下载:"))
        hlayout1.addWidget(self.mylineedit_osa)
        hlayout1.addWidget(self.button_osa)
        self.button_osa.clicked.connect(self.startosa)

        self.veriimg1 = QtWidgets.QLabel()

        hlayout2.addWidget(self.veriimg1)
        self.mylineedit_osacode=QtWidgets.QLineEdit()
        hlayout2.addWidget(self.mylineedit_osacode)
        self.button_osa1 = QtWidgets.QPushButton("下载")
        hlayout2.addWidget(self.button_osa1)
        self.button_osa1.clicked.connect(self.downloadosa)
        #IEEE下载
        self.button_ieee = QtWidgets.QPushButton("开始")
        self.mylineedit_ieee=QtWidgets.QLineEdit()
        hlayout3.addWidget(QtWidgets.QLabel("IEEE下载:"))
        hlayout3.addWidget(self.mylineedit_ieee)
        hlayout3.addWidget(self.button_ieee)
        self.button_ieee.clicked.connect(self.startieee)

        self.veriimg2 = QtWidgets.QLabel()

        hlayout4.addWidget(self.veriimg2)
        self.mylineedit_ieeecode=QtWidgets.QLineEdit()
        hlayout4.addWidget(self.mylineedit_ieeecode)
        self.button_ieee2 = QtWidgets.QPushButton("下载")
        hlayout4.addWidget(self.button_ieee2)

        #GOOGLE学术下载
        self.button_google = QtWidgets.QPushButton("开始")
        self.mylineedit_google=QtWidgets.QLineEdit()
        hlayout5.addWidget(QtWidgets.QLabel("GOOGLE学术下载:"))
        hlayout5.addWidget(self.mylineedit_google)
        hlayout5.addWidget(self.button_google)

        self.veriimg3 = QtWidgets.QLabel()

        hlayout6.addWidget(self.veriimg3)
        self.mylineedit_googlecode=QtWidgets.QLineEdit()
        hlayout6.addWidget(self.mylineedit_googlecode)
        self.button_google2 = QtWidgets.QPushButton("下载")
        hlayout6.addWidget(self.button_google2)


        hwg1 = QtWidgets.QWidget()#6个部件
        hwg2 = QtWidgets.QWidget()
        hwg3 = QtWidgets.QWidget()
        hwg4 = QtWidgets.QWidget()
        hwg5 = QtWidgets.QWidget()
        hwg6 = QtWidgets.QWidget()
        hwg1.setLayout(hlayout1) # 6个部件设置局部布局
        hwg2.setLayout(hlayout2)
        hwg3.setLayout(hlayout3)
        hwg4.setLayout(hlayout4)
        hwg5.setLayout(hlayout5)
        hwg6.setLayout(hlayout6)

        wlayout.addWidget(hwg1)#6个部件加至全局
        wlayout.addWidget(hwg2)
        wlayout.addWidget(hwg3)
        wlayout.addWidget(hwg4)
        wlayout.addWidget(hwg5)
        wlayout.addWidget(hwg6)


        self.setLayout(wlayout)

        self.resize(350, 300)
    def startosa(self):
        #browser1 = webdriver.Chrome()
        #browser1.minimize_window()
        a=self.mylineedit_osa.text()
        #search1 = "http://scholar.google.com.hk/scholar?hl=zh-CN&as_sdt=0%2C5&q="+a+"&btnG="
        URL1 = "https://www.osapublishing.org/search.cfm?q="+a+"&meta=1&cj=1&cc=1"

        browser1.get(URL1)

        time.sleep(2)

        try:
            browser1.find_element("xpath",r'//*[@id="results"]/li/div[1]/ul/li[2]/strong[2]/a').click()

        except:
            print("无此文件")

        # 获取打开的多个窗口句柄
        windows = browser1.window_handles
        # 切换到当前最新打开的窗口
        browser1.switch_to.window(windows[-1])
        #browser1.minimize_window()
        #截屏
        browser1.get_screenshot_as_file(r"E:\learn\pc_code\python\7.12\osa.png")
        im_re = Image.open(r"E:\learn\pc_code\python\7.12\osa.png")
        #谷歌
        #box = (110,90,340,150)
        #SELINUM浏览器
        box = (0,120,280,200)
        im = im_re.crop(box)
        im.save(r"E:\learn\pc_code\python\7.12\osa_cp.png")
        fname1=r"E:\learn\pc_code\python\7.12\osa_cp.png"
        self.veriimg1.setPixmap(QPixmap(fname1))


    def downloadosa(self):
        a=self.mylineedit_osa.text().strip()
        vericode=self.mylineedit_osacode.text()
        browser1.find_element("id","CaptchaCode").send_keys(vericode)
        browser1.find_element("xpath",r"/html/body/div/div/form/input[3]").click()

        # 获取打开的多个窗口句柄
        windows = browser1.window_handles
        # 切换到当前最新打开的窗口
        browser1.switch_to.window(windows[-1])
        #browser1.minimize_window()
        url_dn = browser1.current_url
#https://www.osapublishing.org/view_article.cfm?gotourl=https%3A%2F%2Fwww%2Eosapublishing%2Eorg%2FDirectPDFAccess%2F5A66C0AB%2D9F67%2DEAE8%2DB68302C7F2BCCE7A%5F165724%2Fjlt%2D26%2D12%2D1586%2Epdf%3Fda%3D1%26id%3D165724%26seq%3D0%26mobile%3Dno&org=Shanghai%20Jiao%20Tong%20University
        res1=requests.get(url_dn)
        soup1 = BeautifulSoup(res1.text,'html.parser')
        a1 = soup1.find_all('frame')
        href_re=a1[1].get('src')
        res2=requests.get(href_re)
        print(href_re)
        with open("E:/learn/pc_code/python/7.13/" + a + '.pdf','wb') as code:
            code.write(res2.content)
            code.close()
        QMessageBox.information(self, "标题", "下载成功",
                                QMessageBox.Yes)
        browser1.quit()

    def startieee(self):

        a=self.mylineedit_ieee.text()
        url_ieee='https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText='+a
        driver = webdriver.PhantomJS(executable_path="D:/Program Files (x86)/phantomjs/bin/phantomjs.exe")
        driver.get(url_ieee)
        #print('1')
        time.sleep(2)
    #    print('2')
        soup = BeautifulSoup(driver.page_source,'html.parser')
        time.sleep(2)
    #    print('3')
        soup = BeautifulSoup(driver.page_source,'html.parser')
    #    print('4')
        text = soup.find_all('a',attrs={'aria-label':'PDF'})
    #    print('5')
        try:
            ieeeid = text[0].get('data-artnum')
            href = 'https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=' + ieeeid
            driver.quit()
            res1=requests.get(href)
            soup1 = BeautifulSoup(res1.text,'html.parser')
            a1 = soup1.find_all('iframe')
            href_re=a1[0].get('src')
            res2=requests.get(href_re)
            with open("E:/learn/pc_code/python/7.12/" + a + '.pdf','wb') as code:
                code.write(res2.content)
                code.close()
            QMessageBox.information(self, "标题", "下载成功",
                                    QMessageBox.Yes)
        except:
            QMessageBox.warning(self, "警告", "无此文件",
                                    QMessageBox.Cancel)
    #    print('6')

app = QtWidgets.QApplication(sys.argv)
grid_layout = GridLayout()
grid_layout.show()
sys.exit(app.exec_())

猜你喜欢

转载自blog.csdn.net/Neekity/article/details/86359719