Selenium代刷问卷星问卷

selenium自动刷问卷星问卷

也是看很多同学天天分享这些问卷调查的链接，刚好我在学selenium，就正好想起来做一个demo试试。
这是我们问卷调查的内容大家也可以试着作下：https://www.wjx.cn/m/27168497.aspx
使用测试GIF

选项选择以及判断页面是否需要填写验证码

选项选择我直接用的随机数生成结果。
一般来说前面几次不是很频繁的话，网站是不会让你输入验证码的，后面就不行了必须验证码识别。在识别前先找到验证码判断页面。
前面几次虽然在页面看不到验证码存放的div，但是是实际存在。我是把有验证码的页面和无验证码的页面都保存下来，比较这两个验证码div属性，style的display属性值不同。就可以准确判断出来

def autoSelect(index):
    driver.get('https://www.wjx.cn/m/27168497.aspx')

    xpath1 = '//*[@id="div1"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_1 = driver.find_elements_by_xpath(xpath1)[0]
    answer_1.click()

    xpath2 = '//*[@id="div2"]/div[2]/div[%s]' % str(random.randint(1,3))
    answer_2 = driver.find_elements_by_xpath(xpath2)[0]
    answer_2.click()

    xpath3 = '//*[@id="div3"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_3 = driver.find_elements_by_xpath(xpath3)[0]
    answer_3.click()

    xpath4 = '//*[@id="div4"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_4 = driver.find_elements_by_xpath(xpath4)[0]
    answer_4.click()

    xpath5 = '//*[@id="div5"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_5 = driver.find_elements_by_xpath(xpath5)[0]
    answer_5.click()

    ### 多选题。
    qList_6 = [str(x) for x in range(1,6)]
    aList_6 = random.sample(qList_6,3)
    for i in aList_6:
        xpath6 = '//*[@id="div6"]/div[2]/div[%s]' % i
        answer_6 = driver.find_elements_by_xpath(xpath6)[0]
        answer_6.click()

    qList_7 = [str(x) for x in range(1,8)]
    aList_7 = random.sample(qList_7,3)
    for i in aList_7:
        xpath7 = '//*[@id="div7"]/div[2]/div[%s]' % i
        answer_7 = driver.find_elements_by_xpath(xpath7)[0]
        answer_7.click()

    qList_8 = [str(x) for x in range(1,7)]
    aList_8 = random.sample(qList_8,3)
    for i in aList_8:
        xpath8 = '//*[@id="div8"]/div[2]/div[%s]' % i
        answer_8 = driver.find_elements_by_xpath(xpath8)[0]
        answer_8.click()

    xpath9 = '//*[@id="div9"]/div[2]/div[%s]' % str(random.randint(1,5))
    answer_9 = driver.find_elements_by_xpath(xpath9)[0]
    answer_9.click()

    xpath10 = '//*[@id="div10"]/div[2]/div[%s]' % str(random.randint(1,3))
    answer_10 = driver.find_elements_by_xpath(xpath10)[0]
    answer_10.click()

    xpath11 = '//*[@id="div11"]/div[2]/div[%s]' % str(random.randint(1,5))
    answer_11 = driver.find_elements_by_xpath(xpath11)[0]
    answer_11.click()

    xpath12 = '//*[@id="div12"]/div[2]/div[%s]' % str(random.randint(1,4))
    answer_12 = driver.find_elements_by_xpath(xpath12)[0]
    answer_12.click()

    # 判断页面是否出现验证码。
    captcha_submit = driver.find_elements_by_css_selector('#tdCode')[0]
    if captcha_submit.get_attribute('style')[9:14] == 'block':
        # 点击验证码输入框，显示验证码。
        input = driver.find_elements_by_css_selector('#yucinput')[0]
        input.click()
        img = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#imgCode"))
        )
        driver.get_screenshot_as_file('a.png')

        # 下面操作是寻找截屏中验证码的位置，根据网上的一些方法一直没能准确找到，最后还是根据大致位置一点一点试出来。
        size = img.size
        left = 240
        top = 810
        right = 260 + size['width']
        bottom = 825 + size['height']
        a = Image.open("a.png")
        im = a.crop((left, top, right, bottom))
        im.save('b.png')
        captcha = captchaCode()
        input.send_keys(captcha)


    submit = driver.find_elements_by_css_selector('#ctlNext')[0]
    submit.click()
    try:
        finished = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#tbAward > tbody > tr:nth-child(2) > td:nth-child(2) > a"))
        )
        print('————————第%d页已完成————————' % index)
        time.sleep(0.5)
    except:
        print('————————第%d页失败！————————' % index)

验证码识别

这个是采用的网上很多的验证码识别API，有免费有收费的，我用的这个是易源的，0.01元先练练手。

def captchaCode():
    '''
    验证码识别用的是易源网，第一次用有个0.01元的优惠，很方便。
    :return:
    '''
    with open("b.png", "rb") as f:
        base64_data = base64.b64encode(f.read())
    data = 'data:image/png;base64,' + base64_data.decode()

    url = 'http://route.showapi.com/184-5'
    formdata = {
        'showapi_appid': '73371',
        # 这个秘钥里已经没有次数了。用不了，要自己去申请哦。
        'showapi_sign' : 'f297a686842a40fb96a8c31ef52a765a',
        'img_base64' : data,
        'typeId' : '34',
    }
    response = requests.post(url=url,data=formdata).text

    # 把json类型的字符串转换为json对象。
    resu = json.loads(response)
    return resu["showapi_res_body"]["Result"]

详细代码

爬虫练手，有啥子不足的地方还请多多指教。

import time
import json
import random
import base64
import requests
import urllib.request
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

'''
    问卷星一般都是一些简单的选择或者多选题，每一题随机生成一个或多个选项这个不难，而且问卷星的网站不会封ip，但是刷的多了会出现验证码，验证码验证是个难点。
'''
def autoSelect(index):
    driver.get('https://www.wjx.cn/m/27168497.aspx')

    xpath1 = '//*[@id="div1"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_1 = driver.find_elements_by_xpath(xpath1)[0]
    answer_1.click()

    xpath2 = '//*[@id="div2"]/div[2]/div[%s]' % str(random.randint(1,3))
    answer_2 = driver.find_elements_by_xpath(xpath2)[0]
    answer_2.click()

    xpath3 = '//*[@id="div3"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_3 = driver.find_elements_by_xpath(xpath3)[0]
    answer_3.click()

    xpath4 = '//*[@id="div4"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_4 = driver.find_elements_by_xpath(xpath4)[0]
    answer_4.click()

    xpath5 = '//*[@id="div5"]/div[2]/div[%s]' % str(random.randint(1,2))
    answer_5 = driver.find_elements_by_xpath(xpath5)[0]
    answer_5.click()

    ### 多选题。
    qList_6 = [str(x) for x in range(1,6)]
    aList_6 = random.sample(qList_6,3)
    for i in aList_6:
        xpath6 = '//*[@id="div6"]/div[2]/div[%s]' % i
        answer_6 = driver.find_elements_by_xpath(xpath6)[0]
        answer_6.click()

    qList_7 = [str(x) for x in range(1,8)]
    aList_7 = random.sample(qList_7,3)
    for i in aList_7:
        xpath7 = '//*[@id="div7"]/div[2]/div[%s]' % i
        answer_7 = driver.find_elements_by_xpath(xpath7)[0]
        answer_7.click()

    qList_8 = [str(x) for x in range(1,7)]
    aList_8 = random.sample(qList_8,3)
    for i in aList_8:
        xpath8 = '//*[@id="div8"]/div[2]/div[%s]' % i
        answer_8 = driver.find_elements_by_xpath(xpath8)[0]
        answer_8.click()

    xpath9 = '//*[@id="div9"]/div[2]/div[%s]' % str(random.randint(1,5))
    answer_9 = driver.find_elements_by_xpath(xpath9)[0]
    answer_9.click()

    xpath10 = '//*[@id="div10"]/div[2]/div[%s]' % str(random.randint(1,3))
    answer_10 = driver.find_elements_by_xpath(xpath10)[0]
    answer_10.click()

    xpath11 = '//*[@id="div11"]/div[2]/div[%s]' % str(random.randint(1,5))
    answer_11 = driver.find_elements_by_xpath(xpath11)[0]
    answer_11.click()

    xpath12 = '//*[@id="div12"]/div[2]/div[%s]' % str(random.randint(1,4))
    answer_12 = driver.find_elements_by_xpath(xpath12)[0]
    answer_12.click()

    # 判断页面是否出现验证码。
    captcha_submit = driver.find_elements_by_css_selector('#tdCode')[0]
    if captcha_submit.get_attribute('style')[9:14] == 'block':
        # 点击验证码输入框，显示验证码。
        input = driver.find_elements_by_css_selector('#yucinput')[0]
        input.click()
        img = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#imgCode"))
        )
        driver.get_screenshot_as_file('a.png')

        # 下面操作是寻找截屏中验证码的位置，根据网上的一些方法一直没能准确找到，最后还是根据大致位置一点一点试出来。
        size = img.size
        left = 240
        top = 810
        right = 260 + size['width']
        bottom = 825 + size['height']
        a = Image.open("a.png")
        im = a.crop((left, top, right, bottom))
        im.save('b.png')
        captcha = captchaCode()
        input.send_keys(captcha)


    submit = driver.find_elements_by_css_selector('#ctlNext')[0]
    submit.click()
    try:
        finished = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#tbAward > tbody > tr:nth-child(2) > td:nth-child(2) > a"))
        )
        print('————————第%d页已完成————————' % index)
        time.sleep(0.5)
    except:
        print('————————第%d页失败！————————' % index)

def captchaCode():
    '''
    验证码识别用的是易源网，第一次用有个0.01元的优惠，很方便。
    :return:
    '''
    with open("b.png", "rb") as f:
        base64_data = base64.b64encode(f.read())
    data = 'data:image/png;base64,' + base64_data.decode()

    url = 'http://route.showapi.com/184-5'
    formdata = {
        'showapi_appid': '73371',
        # 这个秘钥里已经没有次数了。用不了，要自己去申请哦。
        'showapi_sign' : 'f297a686842a40fb96a8c31ef52a765a',
        'img_base64' : data,
        'typeId' : '34',
    }
    response = requests.post(url=url,data=formdata).text

    # 把json类型的字符串转换为json对象。
    resu = json.loads(response)
    return resu["showapi_res_body"]["Result"]

    # chromeOptions = webdriver.ChromeOptions()
    # chromeOptions.add_argument("--proxy-server=https://101.37.79.125:3128")

if __name__ == '__main__':
    # driver = webdriver.Chrome(chrome_options=chromeOptions)
    driver = webdriver.Chrome()
    # 想刷几份可以任意更改。
    for index in range(1,51):
        autoSelect(index)

伪调查结果

结果1
结果2
结果3
结果4