Python implementa la modificación por lotes de tipos de artículos en CSDN para que "solo sean visibles para los fanáticos"

Referencia principal de este artículo : https://blog.csdn.net/Oh_Python/article/details/126899483
Debido a que el artículo de referencia es del pasado, CSDN volvió a formatear el formato. Este código no se pudo ejecutar antes. Revisé el código según la última versión y puede completar la función.

Principio : el control de los componentes de la página web se realiza a través de la biblioteca de Python Selenium, y los
pasos del proceso para simular clics manuales son : primero ejecute test1.py y luego ejecute test2.py
test1.py es para obtener información de inicio de sesión, y el inicio de sesión debe ser completado en 10 segundos. test2.py es para simular todo el proceso de operación manual

prueba1.py

from selenium import webdriver
from time import sleep
import json
if __name__ == '__main__':
  driver = webdriver.Chrome()
  driver.maximize_window()
  driver.get('https://passport.csdn.net/login?code=public')
  sleep(10)

  dictCookies = driver.get_cookies() # 获取list的cookies
  jsonCookies = json.dumps(dictCookies) # 转换成字符串保存
  with open('csdn_cookies.txt', 'w') as f:
    f.write(jsonCookies)
  print('cookies保存成功!')

prueba2.py

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json

class CSDN(object):
    def __init__(self):
        # 实例化driver对象
        self.driver = webdriver.Chrome()

    def login(self):
        self.driver.get('https://mp.csdn.net/mp_blog/manage/article?')

        # 获取保存下的cookie值
        with open('csdn_cookies.txt', 'r', encoding='utf8') as f:
            listCookies = json.loads(f.read())

        # 往browser里添加cookies
        for cookie in listCookies:
            cookie_dict = {
    
    
                'domain': '.csdn.net',
                'name': cookie.get('name'),
                'value': cookie.get('value'),
                "expires": '',
                'path': '/',
                'httpOnly': False,
                'HostOnly': False,
                'Secure': False
            }
            self.driver.add_cookie(cookie_dict)

        self.driver.refresh()  # 刷新网页,cookies才成功

        # 放大窗口
        self.driver.maximize_window()

        # 等待内容管理加载完毕后点击
        WebDriverWait(self.driver, 1000).until(
            EC.presence_of_element_located((By.XPATH, '//ul[@role="menu"]/li/a[text()="内容管理"]'))
        )
        # 点击内容管理
        self.driver.find_element(By.XPATH, '//ul[@role="menu"]/li/a[text()="内容管理"]').click()


    # 分析网页
    def parse_page(self):
        """
        用户选择好分栏并点击后,输入1,程序继续运行
        :return:
        """

        user1 = input('输入1继续:')

        # 等待文章url加载
        WebDriverWait(self.driver, 1000).until(
            EC.presence_of_element_located((By.XPATH, '//p[@class="article-list-item-txt"]/a'))
        )
        time.sleep(2)

        # 用try来进行下一页的搜索
        page_urls = [] # 存储文章url
        page_types = [] # 存储文章标签
        while True:
            try:
                # 先添加文章url和文章标签
                page_urls += [ele.get_attribute('href') for ele in self.driver.find_elements(By.XPATH, '//p[@class="article-list-item-txt"]/a')]
                page_types += [ele.text for ele in self.driver.find_elements(By.XPATH, '//div[@class="article-list-type"]/span[1]')]
                # 点击下一页
                self.driver.find_element(By.XPATH, '//*[@id="view-containe"]/div/div/div[4]/div/button[2]').click()
                time.sleep(3)
            except:
                break

        li_page_urls = []
        # 获取文章标签为原创的文章url
        for index in range(len(page_types)):
            if page_types[index] == '原创':
                li_page_urls.append(page_urls[index])

        # 调用修改文章为粉丝可见的函数
        self.change_fans(li_page_urls)

    # 获取文章的url并点击,修改文章为粉丝可见
    def change_fans(self, page_urls):
        print(len(page_urls))
        for page_url in page_urls:
            self.driver.get(page_url)
            print(page_url)
            while True:
                Non_blocking = None
                try:
                    if self.driver.switch_to.alert:
                        Non_blocking = False
                        # 如果alert弹窗是需要点击确定或关闭,那么做相应操作
                        self.driver.switch_to.alert.accept()
                        self.driver.switch_to.alert.dismiss()

                except Exception as e:
                    Non_blocking = True
                if Non_blocking:
                    # 将文章滚动条拉到底
                    if self.driver.execute_script('document.documentElement.scrollTop=1000000'):
                        time.sleep(2)
                        break
                    else:
                        break
                        # time.sleep(0.3)
                # time.sleep(3)

            # 用try来进行markdown文章的编辑
            try:
                self.driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/button').click()
                self.driver.execute_script('document.documentElement.scrollTop=1000000')
                time.sleep(2)
                # while True:
                #     Non_blocking = None
                #     try:
                #         if self.driver.switch_to.alert:
                #             Non_blocking = False
                #             # 如果alert弹窗是需要点击确定或关闭,那么做相应操作
                #             self.driver.switch_to.alert.accept()
                #             # self.driver.switch_to.alert.dismiss()
                #
                #     except Exception as e:
                #         Non_blocking = True
                #     if Non_blocking:
                #         # 将文章滚动条拉到底
                #         if self.driver.execute_script('document.documentElement.scrollTop=1000000'):
                #             time.sleep(2)
                #             break
                #         else:
                #             time.sleep(0.3)
				
				# 如果后续按键改版了,就改这个格式就可以
                # 点击粉丝可见
                self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[1]/div[7]/div/div[3]').click()

                # 点击初级
                # self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[1]/div[9]/div/div/label[1]').click()

                # 点击发送文章
                self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[2]/button[2]').click()

                # 防止出现提示,阻碍进程
                try:
                    self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[2]/button[2]').click()
                except:
                    pass

            except:
                pass

            # # 用try来进行富文本编辑文章的发送
            # try:
            #     # while True:
            #     #     Non_blocking = None
            #     #     try:
            #     #         if self.driver.switch_to.alert:
            #     #             Non_blocking = False
            #     #             # 如果alert弹窗是需要点击确定或关闭,那么做相应操作
            #     #             self.driver.switch_to.alert.accept()
            #     #             # self.driver.switch_to.alert.dismiss()
            #     #
            #     #     except Exception as e:
            #     #         Non_blocking = True
            #     #     if Non_blocking:
            #     #         # 将文章滚动条拉到底
            #     #         if self.driver.execute_script('document.documentElement.scrollTop=1000000'):
            #     #             time.sleep(2)
            #     #             break
            #     #         else:
            #     #             time.sleep(0.3)
            #
            #     # 点击粉丝可见
            #     self.driver.execute_script('document.documentElement.scrollTop=1000000')
            #     time.sleep(2)
            #     self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[8]/div/label[3]/span[1]/span').click()
            #
            #     # 点击初级
            #     self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[9]/div/label[1]/span[2]').click()
            #
            #     # 点击发布博客
            #     self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[10]/div/div/div[2]/button').click()
            #
            #     time.sleep(1)
            #     # 防止出现提示,阻碍进程
            #     try:
            #         self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[10]/div/div/div[2]/button').click()
            #     except:
            #         pass
            # except:
            #     pass

            time.sleep(2)

    def run(self):
        self.login()
        self.parse_page()

csdn = CSDN()
csdn.run()

Supongo que te gusta

Origin blog.csdn.net/gls_nuaa/article/details/132837914
Recomendado
Clasificación