python作业6月18日

# 今日内容
# 1 selenium剩余用法
# 2 selenium 万能登陆破解
# 3 破解极验
from selenium import webdriver
from selenium.webdriver import ChromeOptions
import time
r'''
步骤:
1、打开文件的查看,显示隐藏文件
2、找到C:\Users\administortra\AppData\Local\Google\Chrome\User Data
删除Default文件
3、重新打开浏览器,并登陆百度账号
- 此时会创建一个新的Default缓存文件
'''
# 获取options对象,参数对象
options = ChromeOptions()

# 获取cookies保存路径
# 'C:\Users\administortra\AppData\Local\Google\Chrome\User Data'
profile_directory = r'--user-data-dir=C:\Users\administortra\AppData\Local\Google\Chrome\User Data'

# 添加用户信息目录
options.add_argument(profile_directory)

# 把参数加载到当前驱动中 chrome_options默认参数,用来接收options对象
driver = webdriver.Chrome(chrome_options=options)

try:
driver.implicitly_wait(10)
driver.get('https://www.baidu.com/')
'''
BDUSS:*****
'''
# 添加用户cookies信息
# name、value必须小写
driver.add_cookie({"name": "BDUSS", "value": "用户session字符串"})

# 刷新操作
driver.refresh()

time.sleep(10)

finally:
driver.close()
# 爬去京东信息
from selenium import webdriver
import PIL
from selenium.webdriver.common.keys import Keys
import time
driver=webdriver.Chrome()
try:
driver.implicitly_wait(10)
driver.get('https://www.jd.com/')
input_tag=driver.find_element_by_id('key')
input_tag.send_keys('macbook')
input_tag.send_keys(Keys.ENTER)
good_list= driver.find_elements_by_class_name('gl-item')
for good in good_list:
good_url=good.find_element_by_css_selector('.p-img a').get_attribute('href')
good_name=good.find_element_by_css_selector('.p-name em').text.replace("\n","--")
good_price =good.find_element_by_class_name('p-price').text.replace("\n",":")
good_commot=good.find_element_by_class_name('p-commit').text.replace("\n"," ")
good_from=good.find_element_by_class_name('J_im_icon').text.replace("\n"," ")
good_content=f'''
商品链接:(good_url)
商品名称:(good_name)
商品价格:(good_price)
评价人数:(good_commit)
商品商家:(good_from)
'''
print(good_commot)
with open('jd.txt','a',encoding='utf-8') as f:
f.write(good_content)
time.sleep(3)
finally:
driver.close()
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time


def get_good(driver):
try:

# 通过JS控制滚轮滑动获取所有商品信息
js_code = '''
window.scrollTo(0,5000);
'''
driver.execute_script(js_code) # 执行js代码

# 等待数据加载
time.sleep(2)

# 3、查找所有商品div
# good_div = driver.find_element_by_id('J_goodsList')
good_list = driver.find_elements_by_class_name('gl-item')
n = 1
for good in good_list:
# 根据属性选择器查找
# 商品链接
good_url = good.find_element_by_css_selector(
'.p-img a').get_attribute('href')

# 商品名称
good_name = good.find_element_by_css_selector(
'.p-name em').text.replace("\n", "--")

# 商品价格
good_price = good.find_element_by_class_name(
'p-price').text.replace("\n", ":")

# 评价人数
good_commit = good.find_element_by_class_name(
'p-commit').text.replace("\n", " ")

good_content = f'''
商品链接: {good_url}
商品名称: {good_name}
商品价格: {good_price}
评价人数: {good_commit}
\n
'''
print(good_content)
with open('jd.txt', 'a', encoding='utf-8') as f:
f.write(good_content)

next_tag = driver.find_element_by_class_name('pn-next')
next_tag.click()

time.sleep(2)

# 递归调用函数
get_good(driver)

time.sleep(10)

finally:
driver.close()


if __name__ == '__main__':

good_name = input('请输入爬取商品信息:').strip()

driver = webdriver.Chrome()
driver.implicitly_wait(10)
# 1、往京东主页发送请求
driver.get('https://www.jd.com/')

# 2、输入商品名称,并回车搜索
input_tag = driver.find_element_by_id('key')
input_tag.send_keys(good_name)
input_tag.send_keys(Keys.ENTER)
time.sleep(2)

get_good(driver)

猜你喜欢

转载自www.cnblogs.com/jjjpython1/p/11061855.html