宿題の最後の質問
サーチャーを作る
- [1]エントリーコンテンツを印刷する
- [2]ユーザーが検索キーワードを入力できるようにする
- [3]目次の最初のタイトルを印刷します
- [4]最初の段落を印刷する
- [5] 2番目の見出しの下に最初の段落を印刷します
- [6]タイトル、サブタイトル、紹介(つまり、エントリの内容)を印刷します
- [7]コードを簡素化する
最初の一歩
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
url = "http://baike.baidu.com/view/284853.htm"
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser") # 使用 Python 默认的解析器
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
if __name__ == "__main__":
main()
第二段階
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
if __name__ == "__main__":
main()
3番目のステップ
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
print(ty[2])
if __name__ == "__main__":
main()
4番目のステップ
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
if __name__ == "__main__":
main()
5番目のステップ
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
if __name__ == "__main__":
main()
6番目のステップ
import urllib.request
import re
from bs4 import BeautifulSoup
def summary(soup):
word = soup.h1.text
print(word)
if soup.h2:
word += soup.h2.text
if soup.find(class_="lemma-summary"):
print(soup.find(class_="lemma-summary").text)
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
print("-->"*30)
summary(soup)
if __name__ == "__main__":
main()
7番目のステップ
import urllib.request
import re
from bs4 import BeautifulSoup
def summary(soup):
word = soup.h1.text
print(word)
if soup.h2:
word += soup.h2.text
if soup.find(class_="lemma-summary"):
print(soup.find(class_="lemma-summary").text)
def body():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
print("-->"*30)
summary(soup)
def main():
body()
if __name__ == "__main__":
main()