2019.07.22 (day07) learn the job (programming)

day06 HomeWork

1. reptiles Pictures

import re
import requests

response = requests.get('https://www.doutula.com/')
print(response.status_code)
print(response.encoding)
data = response.text
# print(data)



res = re.findall('<img referrerpolicy=".*?" data-original="(.*?)".*?>',data)

for i in res:
    print(i)

    res_response = requests.get(i)
    res_data = res_response.content
    res_name = i.split('/')[-1]

    f = open(res_name,'wb')
    f.write(res_data)


2. reptiles word frequency statistics

import re
import requests
import jieba


response = requests.get('http://www.haha56.net/a/2016/12/14140.html')
response.encoding='gbk'
data = response.text
# print(data)

res_content = re.findall('<meta name=".*?" content="(.*?)">',data)
res_content = str(res_content)
print(type(res_content))

res_content_jieba = jieba._lcut(res_content)
# print(type(res_content_jieba))
# print(res_content_jieba)

dict = {}
for word in res_content_jieba:
    if len(word)==1:
        continue
    if word in dict:# 如果循环的这个词已将在我们定义的count_dict这个集合里面了
        dict[word] += 1# 那count_dict[word]的值就累加1,用来计数
    else:
        dict[word] = 1# 如果不在,那就在集合里面添加这个词,并且赋给他值1
# print(dict)


def func(i):
    return i[1]
data_list=list(dict.items())
data_list.sort(key=func)
data_list.reverse()

print(data_list)

3. word cloud

import wordcloud
import re
import requests

response = requests.get('http://www.haha56.net/a/2016/12/14140.html')
response.encoding='gbk'
data = response.text

res_content = re.findall('<meta name=".*?" content="(.*?)">',data)
print(res_content)


dat =str(res_content)
w = wordcloud.WordCloud(font_path=r'C:\Windows\Fonts\simkai.ttf')
w.generate(dat)
w.to_file('ciyun.png')

Guess you like

Origin www.cnblogs.com/xichenHome/p/11228462.html