#demo1 有道词典翻译单词
import requests
if __name__=="__main__":
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
data={
"i": "爬虫",
"from": "AUTO",
"to":" AUTO",
"smartresult":" dict",
"client":" fanyideskweb",
"salt":" 16005916230854",
"sign":" 52a079bc398239965ce16964323456af",
"lts":" 1600591623085",
"bv":" 656f750600466990f874a839d9f5ad23",
"doctype":" json",
"version":" 2.1",
"keyfrom":" fanyi.web",
"action":" FY_BY_REALTlME"}
response=requests.post(url,headers=headers,data=data)
with open('./youdao.html','w',encoding='UTF-8') as fp:
fp.write(response.text)
print(response.text)
#########################################################################################
#demo2 爬取网易云音乐
import requests
from lxml import etree
##########################################################################################
#demo3 输入任意字符保存返回的网页结果
import requests
if __name__=="__main__":
#在百度查询一个词时的链接 参数为word 所以后面param中键为‘word’
#https://www.baidu.com/s?word=TypeError%3A%20get()%20takes%20from%201%20to%202%20positional%20arguments%20but%203%20were%20given&tn=site888_3_pg&lm=-1&ssl_s=1&ssl_c=ssl1_174b010a917&prec=1
url='https://www.baidu.com/s'
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
kw=input('enter a word :')
param={
'word':kw}
response = requests.get(url=url,params=param,headers=headers)
#response = requests.get(url, param, headers)#报错
filename = kw+'.html'
with open(filename,'w',encoding='UTF-8') as fp:
fp.write(response.text)
print(filename,'保存成功!')
##########################################################################################
#demo4 破解百度翻译
import requests
import json
if __name__=="__main__":
#url可以在审查元素 ->network-> headers-> Requrst URL
#xhr json请求
url='https://fanyi.baidu.com/sug'
#UA伪装 User-Agent
headers={
"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
#post请求 headers 中有一个头 form-data 发送请求时携带的参数
kw = input('enter a word : ')
data={
"kw":kw}
#审查元素 点开network 查看请求头就知道是什么请求方式
post_response = requests.post(url=url,data=data,headers=headers)
#json()方法 返回一个对象 text()返回字符串只有返回类型是json时才可使用 可在审查元素 network response content-type中查看
dic_obj = post_response.json()#返回字典对象
#持久化操作
filename = kw+'.json'
fp = open(filename,'w',encoding='UTF-8')
json.dump(dic_obj,fp=fp,ensure_ascii=False)#我们拿到的是中文数据 中文不能使用ASCII编码 所以FALSE
#这是因为json.dumps 序列化时对中文默认使用的ascii编码.想输出真正的中文需要指定ensure_ascii=False:
####################################################################################################
#demo5 豆瓣电影
import requests
import json
if __name__=="__main__":
url='https://movie.douban.com/j/chart/top_list'
#GET请求中 network Query String Parameters 做请求参数
params={
"type": "20",
"interval_id": "100:90",
"action":"",
"start": "20",
"limit": "20"}
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
response = requests.get(url=url,params=params,headers=headers)
json_resp = response.json()
fp = open('./movie.json','w',encoding='UTF-8')
json.dump(json_resp,fp=fp,ensure_ascii=False)
print('电影抓取完成')
#############################################
#demo6 抓取肯德基餐厅地址
import requests
if __name__=="__main__":
url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
kw = input('enter a city : ')
#POST请求中 From Data做请求参数
params = {
"cname":"",
"pid":"",
"keyword": kw,
"pageIndex": "1",
"pageSize": "10"}
response = requests.post(url=url,params=params,headers=headers)
with open('./resterant.html','w',encoding='UTF-8') as fp:
fp.write(response.text)
2020Python爬虫学习笔记(一)
猜你喜欢
转载自blog.csdn.net/Kaaaakaki/article/details/109103935
今日推荐
周排行