robot.txt: Reduce the risk of reptiles being blocked by
the website's anti-crawler mechanism
```RobotParser.py
import urllib.robotparser as urobot
import requests
import urllib
#方法一:
url="https://www.taobao.com/"
rp=urobot.RobotFileParser()
rp.set_url(url+"/robots.txt")
rp.read()
user_agent='Googlebot'
if rp.can_fetch(user_agent,'https://www.taobao.com/item/'):
site=requests.get(url)
print("seem good")
else:
print("cannot scrap because robots.txt banned you!")
#方法二:
def url_robots(url,newurl,user_agent):
rp = urobot.RobotFileParser()
rp.set_url(url + "/robots.txt")
rp.read()
if rp.can_fetch(user_agent, newurl):
urllib.request.urlopen(newurl)
print("seem good")
else:
print("cannot scrap because robots.txt banned you!")
url="https://www.taobao.com/"
user_agent ='Googlebot'
newurl='https://www.taobao.com/item/'
test=url_robots(url,newurl,user_agent)
Running result: seem good*2