1 import urllib.request 2 3 def load_data(): 4 url = "http://www.baidu.com/" 5 #get的请求 6 #http请求 7 #response:http相应的对象 8 response = urllib.request.urlopen(url) 9 print(response) 10 #读取内容 bytes类型 11 data = response.read() 12 print(data) 13 #将文件获取的内容转换成字符串 14 str_data = data.decode("utf-8") 15 print(str_data) 16 #将数据写入文件 17 with open("baidu.html","w",encoding="utf-8")as f: 18 f.write(data) 19 #将字符串类型转换成bytes 20 str_name = "baidu" 21 bytes_name =str_name.encode("utf-8") 22 print(bytes_name) 23 24 #python爬取的类型:str bytes 25 #如果爬取回来的是bytes类型:但是你写入的时候需要字符串 decode("utf-8") 26 #如果爬取过来的是str类型:但你要写入的是bytes类型 encode(""utf-8") 27 load_data()
1 import urllib.request 2 import urllib.parse 3 import string 4 5 def get_method_params(): 6 7 url = "http://www.baidu.com/s?wd=" 8 #拼接字符串(汉字) 9 #python可以接受的数据 10 #https://www.baidu.com/s?wd=%E7%BE%8E%E5%A5%B3 11 12 name = "美女" 13 final_ur14 print(final_url) 15 #代码发送了请求 16 #网址里面包含了汉字;ascii是没有汉字的;url转译 17 #将包含汉字的网址进行转译 18 encode_new_url = urllib.parse.quote(final_url,safe=string.printable) 19 print(encode_new_url) 20 # 使用代码发送网络请求 21 response = urllib.request.urlopen(encode_new_url) 22 print(response) 23 #读取内容 24 data = response.read().decode() 25 print(data) 26 #保存到本地 27 with open("02-encode.html","w",encoding="utf-8")as f: 28 f.write(data) 29 #UnicodeEncodeError: 'ascii' codec can't encode 30 # characters in position 10-11: ordinal not in range(128) 31 #python:是解释性语言;解析器只支持 ascii 0 - 127 32 #不支持中文 33 34 get_method_params()