1.TXT文件存储
from pyquery import PyQuery as pq import requests url = 'https://movie.douban.com/top250' headers = { 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/79.0.3945.88 Safari/537.36' } res = requests.get(url=url,headers=headers) doc = pq(res.text) items = doc('.info').items() for item in items: name = pq(item.find('.hd').html()).find('span:first-child').text() #此处的find找到所有符合条件的 score = pq(item.find('.star').html()).find('span:nth-child(2)').text() comment = item.find('.quote').text() with open('movies.txt','a',encoding='utf-8') as file: file.write('\n'.join([name,score,comment])) file.write('\n'+"="*20+'\n')
2.JSON文件存储
json,JavaScript Object Notation,通过对象(字典)和数组(列表)的组合来表示数据结构,是一种轻量级的数据交换格式。json数据为字符串类型。
loads()方法将字符串转为json对象,即可操作的数据结构,如字典或列表;
dumps()方法将json对象转化为文本字符串。
2.1读取JSON
import json str = ''' [{ "name":"aa", "age":18 },{ "name":"bb", "age":20 }] ''' print(type(str)) #<class 'str'> data = json.loads(str) print(data) #<class 'str'> print(data[0]['name']) #json数据需用双引号,否则报错 #json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 3 column 5 (char 8)
2.2 写入json
import json data = [{ "name":"aa", "age":18 },{ "name":"bb", "age":20 }] with open('test.json','w') as file: file.write(json.dumps(data))
from pyquery import PyQuery as pq import requests import json url = 'https://movie.douban.com/top250' headers = { 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/79.0.3945.88 Safari/537.36' } res = requests.get(url=url,headers=headers) doc = pq(res.text) items = doc('.info').items() movies_list = [] for item in items: name = pq(item.find('.hd').html()).find('span:first-child').text() score = pq(item.find('.star').html()).find('span:nth-child(2)').text() comment = item.find('.quote').text() movies_list.append([name,score,comment]) with open('movies.json', 'a', encoding='utf-8') as file: file.write(json.dumps(movies_list,indent=4,ensure_ascii=False))
3.CSV文件存储
import csv with open('test.csv','w') as file: writer = csv.writer(file) writer.writerow(['name','age']) writer.writerow(['aa',14]) writer.writerow(['bb',24]) writer.writerow(['cc',25])