Selenium case combat 1: automatic review of Jinjiang comments and forum information

Import and use libraries

#-*-coding:UTF-8-*-
from selenium import webdriver
import sys,os,re
import time

mian: Get the review entry link, set the password to remember when logging in

Contains knowledge points:

1. There are two ways for selenium to realize password-free login:

a. Set the data directory to avoid login.
Through the data directory to avoid login: After setting, manually enter the password to log in when driver.get, and you do not need to enter the password when you open the web page again

Insert picture description here
b.
Cookie-free login. Cookie-free login: the way to view cookies, take Baidu’s cookie as an example
Baidu cookie view

2. Implementation of selenium background operation:
opt.add_argument('headless')

Code:

if __name__== '__main__':
	    ###定义违法信息##
		illegal_info=('上床','出门被车撞','楼主死全家','你有病吧','火枪')
		##A 设置selenium使用的驱动器位置:需要先下载,具体百度
    	chromeDriver = 'C:\driver\chrome\chromedriver.exe'
    	opt = webdriver.ChromeOptions()
    
      	####a 记住登录数据方式一:设置用户数据目录免登陆,这种方式需要先driver.get后手动先登录,后面再次打开就不需要了##
    	opt.add_argument(r'user-data-dir=C:\Users\username\AppData\Local\Google\Chrome\User Data')
   	 #opt.add_argument('headless')  #隐藏浏览器界面进行selenium操作(后台操作), 需要的时候开启
    	driver = webdriver.Chrome(chromeDriver, options=opt)
    
     	 ####a 记住登录数据方式二:通过加cookie免登陆,这个方式需要先获取cookie##
    	# 添加Cookie,这个cookie是登录后看数据
    	# driver.add_cookie({'name': 'BAIDUID', 'value': '67F332038A56CC0A9B109'})
    	# driver.add_cookie({'name': 'BIDUPSID', 'value': '67F332038A56CC0A9B109'})
   	 	# driver.add_cookie({'name': 'H_PS_PSSID', 'value': '1423_3199_31428_31341_31270_31163_31472'})
    	# driver.add_cookie({'name': 'BDUSS', 'value': 'hpRAAAAAAAAAAAAAAAAAAAAAAAJt4s16beLNefm'})

    	driver.get('http://www.jjwxc.net/')
   		if not driver.current_url=='http://www.jjwxc.net/':  #如果跳转到其他页面,则进行登录操作
        	login()
    	wenzhang=driver.find_elements_by_xpath('//*[@id="t_user_nav"]/ul/li[6]/ul/li[1]/a')[0].get_attribute('href') #获取的文章评审的入口链接
    	luntan=driver.find_elements_by_xpath('//*[@id="t_user_nav"]/ul/li[6]/ul/li[3]/a')[0].get_attribute('href')  #获取的论坛评审入口链接
     	if luntan:
         	forum(luntan)  #开始论坛评审操作
     	else:
         	print('get url error')
     	time.sleep(3)
     
    	driver.get('http://www.jjwxc.net/')  #这里再次获取是因为链接是动态生成的
    	pinglun=driver.find_elements_by_xpath('//*[@id="t_user_nav"]/ul/li[6]/ul/li[2]/a')[0].get_attribute('href') #获取评论评审的入口链接
    	if pinglun:
        	discuss(pinglun) #开始评论评审操作
    	driver.close()

login(): Realize login operation

Code:

def login():
    '''实现输入密码和账号登录'''
    name="111"
    passwd="111"
    url = "http://www.jjwxc.net/"
    driver.get(url)
    time.sleep(5)
    #log = driver.find_element_by_xpath('//*[@id="t_user_nav"]')
    ######获取隐藏的登录框####
    log=driver.find_elements_by_id('jj_login')[0]
    if log:
        print("获取登录按钮成功")
        log.click()
        #print(driver.find_elements_by_id('jj_login'))
    else:
        print("获取登录按钮失败")
        exit(1)
    #####获取输入用户信息的节点##
    time.sleep(3)
    username=driver.find_elements_by_name('loginname')[0] # //*[@id="loginname"]
    password=driver.find_elements_by_name('loginpassword')[0] #//*[@id="loginpassword"]
    registerRule=driver.find_elements_by_xpath('//*[@id="login_registerRule"]')[0]
    if username and password:
        time.sleep(1)
        username.send_keys(name)
        time.sleep(1)
        password.send_keys(passwd)
        registerRule.click()
        time.sleep(2)
        driver.find_elements_by_xpath('//*[@id="logininput"]/li[4]/input')[0].click()

forum(luntan): Forum review operation

Knowledge points:

1. Use a recursive loop to determine whether the number of items obtained is less than 10.
2. The element is blocked when it is clicked:

try:
        time.sleep(0.5)
        delnode.click()
except:
        driver.execute_script("arguments[0].click();", delnode)  #用这个方式点击

3.re module to get text data and id

html = re.findall(r"<div style='font-size:20px' onclick=\"toggleselect\((.*?)\)\">(.*?)<br><center style=",page, re.S)  # re.S  表示输出换行符,这里必须要有,否则获取的是空的
idall = re.findall(r'<input name="ids" type="checkbox" id="logids" value="(.*?)">', page, re.S)

Code:

1. Outer function: forum(url), batch approval first, one by one when less than 10

def forum(url):
    '''论坛评审
    1.因为每次点一个通过按钮,都会自动刷新,导致不能用for循环遍历点击,所以for循环的用批量通过点击
    2.一个个的通过的时候就只获取第一个数据,通过后获取刷新后的页面再次获取第一个'''
    time.sleep(1)
    #url="http://my.jjwxc.net/backend/bbs_check.php?jsid=26759293-0.12474766440709728"
    driver.get(url)
    ###1 通过协议#
    know_b=driver.find_elements_by_xpath('/html/body/div[5]/div/center/input')
    if know_b:
        know_b=know_b[0]
        know_b.click()
    time.sleep(1)
    num=forum_all(20)#批量审批
    if num=='10':
        forum_one(10) #最后剩下的10条数据一个一个的核实

2. Inner function: forum_all(url), batch approval

def forum_all(num):
    '''论坛批量通过处理'''
    if num==10:
        return 10
    else:
        #####2 判断内容并勾选####
        page = driver.page_source

        html = re.findall(r"<div style='font-size:20px' onclick=\"toggleselect\((.*?)\)\">(.*?)</div>", page,
                          re.S)  # re.S  表示输出换行符
        # ids = re.findall(r"<span id='(.*?)'>.*?<span id=(.*?)>.*?</span>", page, re.S)  # 获取通过和不通过按钮的id用于后面查找节点
        if not html:
            print("获取消息为空或需要审核的消息是空的")
            html = re.findall(r'<div style="font-size:20px" onclick="toggleselect\((.*?)\)">(.*?)</div>', page, re.S)
            if len(html)<=10:   #数据等于十条时 退出批量审批
                num=10
                return forum_all(num)
        for id, i in html:
            print("查询元素可用id为:" + id + "     信息为:" + i)
            ##a.获取发送的信息和发送的用户
            name = re.findall('<br>.*☆☆☆(.*?)\|.*?☆☆☆<br>', i)
            user = ''
            info = ''
            if name:  # 如果有人回复,则校准回复的信息
                print("######校准的是回复的信息##########")
                user = name[-1]
                infos = re.split('<br>.*<br>', i)
                info = infos[-1]
                print("回复的用户是:" + user + "              回复的信息是:" + info)
            else:
                i = i.strip()
                infos = i.split(":", 1)
                # for k in infos:
                #     print(k)
                user = infos[0]
                info = infos[-1]
                print("发表的用户是:" + user + "              发表的信息是:" + info)
            id2 = re.sub('\'', '', id)
            id2 = re.split(',', id2)
            ##c.获取checkbox按钮
            checkid = passid = id2[0] + "-" + id2[1] + "-" + id2[2]
            checknode = driver.find_element_by_xpath("//input[@value='%s']" % checkid)
            if checknode:
                print("获取checkbox成功")
                time.sleep(1)
            else:
                print("获取checkbox失败")
            ##d.判断发表的信息里面是否有违法信息
            isyes = False
            for illegal in illegal_info:
                if illegal in info:
                    isyes = False
                    print("有非法元素")
                    break
                else:
                    isyes = True
            if isyes:
                checknode.click()  #或者点击checkbox,后面批量通过

        ###e.批量通过###
        all=driver.find_elements_by_name('buttondel')[0]
        if all:
            print("批量通过")
            all.click()
        time.sleep(1)
        return forum_all(num)

3. Inner function: forum_one(url), one by one approval

def forum_one(num):
    #####2 判断内容并勾选####
    print("一个一个的点")
    # errornum = 0
    if num == 0:
        print("检查完成")
        return 0
    else:
        page = driver.page_source

        html = re.findall(r"<div style='font-size:20px' onclick=\"toggleselect\((.*?)\)\">(.*?)</div>", page,re.S)  # re.S  表示输出换行符
        # ids = re.findall(r"<span id='(.*?)'>.*?<span id=(.*?)>.*?</span>", page, re.S)  # 获取通过和不通过按钮的id用于后面查找节点
        if not html:
            print("获取消息为空")
            html = re.findall(r'<div style="font-size:20px" onclick="toggleselect\((.*?)\)">(.*?)</div>', page, re.S)
            # exit(1)
        num=len(html)
        if num == 0:
            return forum_one(num)
        html=html[0]
        id=html[0]
        i=html[1]
        print("查询元素可用id为:" + id + "     信息为:" + i)
        ##a.获取发送的信息和发送的用户
        name = re.findall('<br>.*☆☆☆(.*?)\|.*?☆☆☆<br>', i)
        user = ''
        info = ''
        if name:  # 如果有人回复,则校准回复的信息
            print("######校准的是回复的信息##########")
            user = name[-1]
            infos = re.split('<br>.*<br>', i)
            info = infos[-1]
            print("回复的用户是:" + user + "              回复的信息是:" + info)
        else:
            i = i.strip()
            infos = i.split(":", 1)
            # for k in infos:
            #     print(k)
            user = infos[0]
            info = infos[-1]
            print("发表的用户是:" + user + "              发表的信息是:" + info)
        ##b.拼凑id,查找对应通过和不通过的节点
        id2 = re.sub('\'', '', id)
        id2 = re.split(',', id2)
        passid = id2[0] + "-" + id2[1] + "Recalculation"
        delid = id2[0] + "-" + id2[1] + "Recalculationdel"
        print(passid + "   " + delid)

        # driver.get('http://localhost:63342/JJ/test.html?_ijt=47vr7o1hj81ld36pu5g846r0qk')

        passnode = driver.find_element_by_xpath("//span[@id='%s']/input" % passid)
        time.sleep(1)
        if passnode:
            print("获取通过成功")
            # passnode.click()
        else:
            print("获取通过失败")

        delnode = driver.find_element_by_xpath("//span[@id='%s']/input" % delid)
        time.sleep(1)
        if delnode:
            print("获取不通过成功")
            # delnode.click()
        else:
            print("获取不通过失败")

        ##d.判断发表的信息里面是否有违法信息
        isyes = False
        for illegal in illegal_info:
            if illegal in info:
                isyes=False
                # errornum+=1
                print("有非法元素")
                break
            else:
                isyes=True
        if isyes:
            passnode.click()  # 点击通过按钮
        else:
            delnode.click()
        num-=1
        time.sleep(0.5)
        return forum_one(num)

discuss(pinglun): Comment review operation

Code:

1. Outer function: discuss(url) first batch approval, if there are less than 10, one by one

'''评论评审'''
    driver.get(url)
    ###1 通过协议#
    know_b = driver.find_elements_by_xpath('/html/body/div[5]/div/center/input')
    if know_b:
        know_b = know_b[0]
        know_b.click()
    time.sleep(1)
    ###2.开始审批
    num=discuss_all(20)
    if num==10:
        discuss_one(10)

2. Inner function: discuss_all(num), batch approval

def discuss_all(num):
    '''论坛批量通过处理'''
    if num == 10:
        return 10
    else:
        #####2 判断内容并勾选####
        page = driver.page_source

        html = re.findall(r"<div style='font-size:20px' onclick=\"toggleselect\((.*?)\)\">(.*?)<br><center style=",
                          page, re.S)  # re.S  表示输出换行符
        idall = re.findall(r'<input name="ids" type="checkbox" id="logids" value="(.*?)">', page, re.S)

        if not html:
            print("获取消息为空或需要审核的消息是空的")
            html = re.findall(r'<div style="font-size:20px" onclick="toggleselect\((.*?)\)">(.*?)<br><center style=',
                              page, re.S)
            if len(html) <= 10:  # 数据等于十条时 退出批量审批
                num = 10
                return discuss_all(num)
        for id, i in html:
            print("查询元素可用id为:" + id + "     信息为:" + i)
            ##a.获取发送的信息和发送的用户
            user = re.split(':', i)[0].strip()
            info = re.split(':', i)[1].strip()
            # print("发表的用户是:" + user + "              发表的信息是:" + info)
            id2 = re.sub(',', '_', id)

            checkid = ''
            # for ids in idall:
            #     print('获取到的总的checkboxid是:%s' % ids)
            for ids in idall:
                # print("ids 是:" + ids)
                # print('id2:' + id2)
                if id2 in ids:  # 无回复的状态下
                    print('当前消息无回复')
                    checkid = ids
                    break
                elif re.match(id.split(',')[0], ids):
                    print("当前消息有回复内容")
                    checkid = ids
                    break
                else:
                    continue
            if checkid:
                print('最终的checkboxid是 %s' % checkid)
            else:
                print('没有该元素')

            ##c.获取checkbox按钮
            checknode = driver.find_element_by_xpath("//input[@value='%s']" % checkid)
            if checknode:
                print("获取checkbox成功")
                time.sleep(1)
            else:
                print("获取checkbox失败")
            ##d.判断发表的信息里面是否有违法信息
            isyes = False
            for illegal in illegal_info:
                if illegal in info:
                    isyes = False
                    print("有非法元素")
                    break
                else:
                    isyes = True
            print("开始点击")
            if isyes:
                try:
                    time.sleep(1)
                    checknode.click()  # 或者点击checkbox,后面批量通过
                except:
                    driver.execute_script("arguments[0].click();", checknode)
            else:
                print("有非法元素,不点击")

        ###e.批量通过###
        all = driver.find_elements_by_id('batchControll_button')[0]
        if all:
            print("批量通过")
            time.sleep(0.5)
            all.click()
        time.sleep(5)
        return discuss_all(num)

3. Inner function: discuss_one(num), one by one approval

def forum_one(num):
    #####2 判断内容并勾选####
    print("一个一个的点")
    # errornum = 0
    if num == 0:
        print("检查完成")
        return 0
    else:
        page = driver.page_source

        html = re.findall(r"<div style='font-size:20px' onclick=\"toggleselect\((.*?)\)\">(.*?)</div>", page,re.S)  # re.S  表示输出换行符
        # ids = re.findall(r"<span id='(.*?)'>.*?<span id=(.*?)>.*?</span>", page, re.S)  # 获取通过和不通过按钮的id用于后面查找节点
        if not html:
            print("获取消息为空")
            html = re.findall(r'<div style="font-size:20px" onclick="toggleselect\((.*?)\)">(.*?)</div>', page, re.S)
            # exit(1)
        num=len(html)
        if num == 0:
            return forum_one(num)
        html=html[0]
        id=html[0]
        i=html[1]
        print("查询元素可用id为:" + id + "     信息为:" + i)
        ##a.获取发送的信息和发送的用户
        name = re.findall('<br>.*☆☆☆(.*?)\|.*?☆☆☆<br>', i)
        user = ''
        info = ''
        if name:  # 如果有人回复,则校准回复的信息
            print("######校准的是回复的信息##########")
            user = name[-1]
            infos = re.split('<br>.*<br>', i)
            info = infos[-1]
            print("回复的用户是:" + user + "              回复的信息是:" + info)
        else:
            i = i.strip()
            infos = i.split(":", 1)
            # for k in infos:
            #     print(k)
            user = infos[0]
            info = infos[-1]
            print("发表的用户是:" + user + "              发表的信息是:" + info)
        ##b.拼凑id,查找对应通过和不通过的节点
        id2 = re.sub('\'', '', id)
        id2 = re.split(',', id2)
        passid = id2[0] + "-" + id2[1] + "Recalculation"
        delid = id2[0] + "-" + id2[1] + "Recalculationdel"
        print(passid + "   " + delid)

        # driver.get('http://localhost:63342/JJ/test.html?_ijt=47vr7o1hj81ld36pu5g846r0qk')

        passnode = driver.find_element_by_xpath("//span[@id='%s']/input" % passid)
        time.sleep(1)
        if passnode:
            print("获取通过成功")
            # passnode.click()
        else:
            print("获取通过失败")

        delnode = driver.find_element_by_xpath("//span[@id='%s']/input" % delid)
        time.sleep(1)
        if delnode:
            print("获取不通过成功")
            # delnode.click()
        else:
            print("获取不通过失败")

        ##d.判断发表的信息里面是否有违法信息
        isyes = False
        for illegal in illegal_info:
            if illegal in info:
                isyes=False
                # errornum+=1
                print("有非法元素")
                break
            else:
                isyes=True
        if isyes:
            passnode.click()  # 点击通过按钮
        else:
            delnode.click()
        num-=1
        time.sleep(0.5)
        return forum_one(num)

Guess you like

Origin blog.csdn.net/qq_46020608/article/details/113105234
Recommended