숙제의 마지막 문제
검색 자 만들기
- [1] 출품작 내용 인쇄
- [2] 사용자가 검색 키워드를 입력하도록 허용
- [3] 목차의 첫 번째 제목 인쇄
- [4] 첫 단락 인쇄
- [5] 두 번째 제목 아래 첫 번째 단락 인쇄
- [6] 제목, 부제 및 소개 (즉, 출품작의 내용)를 인쇄합니다.
- [7] 코드 단순화
첫 번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
url = "http://baike.baidu.com/view/284853.htm"
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser") # 使用 Python 默认的解析器
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
if __name__ == "__main__":
main()
두번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
if __name__ == "__main__":
main()
세 번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
print(ty[2])
if __name__ == "__main__":
main()
네 번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
if __name__ == "__main__":
main()
다섯 번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
if __name__ == "__main__":
main()
여섯 번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def summary(soup):
word = soup.h1.text
print(word)
if soup.h2:
word += soup.h2.text
if soup.find(class_="lemma-summary"):
print(soup.find(class_="lemma-summary").text)
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
print("-->"*30)
summary(soup)
if __name__ == "__main__":
main()
일곱 번째 단계
import urllib.request
import re
from bs4 import BeautifulSoup
def summary(soup):
word = soup.h1.text
print(word)
if soup.h2:
word += soup.h2.text
if soup.find(class_="lemma-summary"):
print(soup.find(class_="lemma-summary").text)
def body():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
print("-->"*30)
summary(soup)
def main():
body()
if __name__ == "__main__":
main()