첫 번째 블로그

숙제의 마지막 문제

검색 자 만들기

  • [1] 출품작 내용 인쇄
  • [2] 사용자가 검색 키워드를 입력하도록 허용
  • [3] 목차의 첫 번째 제목 인쇄
  • [4] 첫 단락 인쇄
  • [5] 두 번째 제목 아래 첫 번째 단락 인쇄
  • [6] 제목, 부제 및 소개 (즉, 출품작의 내용)를 인쇄합니다.
  • [7] 코드 단순화

첫 번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    url = "http://baike.baidu.com/view/284853.htm"
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser") # 使用 Python 默认的解析器
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)

if __name__ == "__main__":
    main()

두번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)

if __name__ == "__main__":
    main()

세 번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    print(ty[2])

if __name__ == "__main__":
    main()

네 번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())

if __name__ == "__main__":
    main()

다섯 번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
  
if __name__ == "__main__":
    main()


여섯 번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def summary(soup):
    word = soup.h1.text
    print(word)
    if soup.h2:
        word += soup.h2.text
    if soup.find(class_="lemma-summary"):
        print(soup.find(class_="lemma-summary").text)
def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
    print("-->"*30)
    summary(soup)

  
if __name__ == "__main__":
    main()

일곱 번째 단계

import urllib.request
import re
from bs4 import BeautifulSoup

def summary(soup):
    word = soup.h1.text
    print(word)
    if soup.h2:
        word += soup.h2.text
    if soup.find(class_="lemma-summary"):
        print(soup.find(class_="lemma-summary").text)
def body():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
    print("-->"*30)
     
    summary(soup)
def main():
    body()
   

  
if __name__ == "__main__":
    main()

추천

출처blog.csdn.net/qq_51598376/article/details/112361544