最初のブログ

宿題の最後の質問

サーチャーを作る

  • [1]エントリーコンテンツを印刷する
  • [2]ユーザーが検索キーワードを入力できるようにする
  • [3]目次の最初のタイトルを印刷します
  • [4]最初の段落を印刷する
  • [5] 2番目の見出しの下に最初の段落を印刷します
  • [6]タイトル、サブタイトル、紹介(つまり、エントリの内容)を印刷します
  • [7]コードを簡素化する

最初の一歩

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    url = "http://baike.baidu.com/view/284853.htm"
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser") # 使用 Python 默认的解析器
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)

if __name__ == "__main__":
    main()

第二段階

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)

if __name__ == "__main__":
    main()

3番目のステップ

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    print(ty[2])

if __name__ == "__main__":
    main()

4番目のステップ

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())

if __name__ == "__main__":
    main()

5番目のステップ

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
  
if __name__ == "__main__":
    main()


6番目のステップ

import urllib.request
import re
from bs4 import BeautifulSoup

def summary(soup):
    word = soup.h1.text
    print(word)
    if soup.h2:
        word += soup.h2.text
    if soup.find(class_="lemma-summary"):
        print(soup.find(class_="lemma-summary").text)
def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
    print("-->"*30)
    summary(soup)

  
if __name__ == "__main__":
    main()

7番目のステップ

import urllib.request
import re
from bs4 import BeautifulSoup

def summary(soup):
    word = soup.h1.text
    print(word)
    if soup.h2:
        word += soup.h2.text
    if soup.find(class_="lemma-summary"):
        print(soup.find(class_="lemma-summary").text)
def body():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
    print("-->"*30)
     
    summary(soup)
def main():
    body()
   

  
if __name__ == "__main__":
    main()

おすすめ

転載: blog.csdn.net/qq_51598376/article/details/112361544