python爬虫9--文件存储之TXT,JSON,CSV

1.TXT文件存储

from pyquery import PyQuery as pq
import requests

url = 'https://movie.douban.com/top250'
headers = {
    'User-Agent': 'Mozilla/5.0(Windows NT 6.1;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/79.0.3945.88 Safari/537.36'
}
res = requests.get(url=url,headers=headers)
doc = pq(res.text)
items = doc('.info').items()
for item in items:
    name = pq(item.find('.hd').html()).find('span:first-child').text()   #此处的find找到所有符合条件的
    score = pq(item.find('.star').html()).find('span:nth-child(2)').text()
    comment = item.find('.quote').text()
    with open('movies.txt','a',encoding='utf-8') as file:
        file.write('\n'.join([name,score,comment]))
        file.write('\n'+"="*20+'\n')

2.JSON文件存储

json，JavaScript Object Notation，通过对象（字典）和数组（列表）的组合来表示数据结构，是一种轻量级的数据交换格式。json数据为字符串类型。

loads()方法将字符串转为json对象，即可操作的数据结构，如字典或列表；

dumps()方法将json对象转化为文本字符串。

2.1读取JSON

import json
str = '''
[{
    "name":"aa",
    "age":18
},{
    "name":"bb",
    "age":20
}]
'''
print(type(str))   #<class 'str'>
data = json.loads(str)
print(data)   #<class 'str'>
print(data[0]['name'])
#json数据需用双引号，否则报错
#json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 3 column 5 (char 8)

2.2 写入json

import json
data = [{
    "name":"aa",
    "age":18
},{
    "name":"bb",
    "age":20
}]
with open('test.json','w') as file:
    file.write(json.dumps(data))

from pyquery import PyQuery as pq
import requests
import json

url = 'https://movie.douban.com/top250'
headers = {
    'User-Agent': 'Mozilla/5.0(Windows NT 6.1;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/79.0.3945.88 Safari/537.36'
}
res = requests.get(url=url,headers=headers)
doc = pq(res.text)
items = doc('.info').items()
movies_list = []
for item in items:
    name = pq(item.find('.hd').html()).find('span:first-child').text()   
    score = pq(item.find('.star').html()).find('span:nth-child(2)').text()
    comment = item.find('.quote').text()
    movies_list.append([name,score,comment])
    with open('movies.json', 'a', encoding='utf-8') as file:
        file.write(json.dumps(movies_list,indent=4,ensure_ascii=False))

3.CSV文件存储

import csv
with open('test.csv','w') as file:
    writer = csv.writer(file)
    writer.writerow(['name','age'])
    writer.writerow(['aa',14])
    writer.writerow(['bb',24])
    writer.writerow(['cc',25])

python爬虫9--文件存储之TXT,JSON,CSV

猜你喜欢