I started crawling when not disguised with headers to the browser, causing Catcher Beijing and Fuzhou district to put my ip ban, but fortunately later found reasons not to be left in the Xiamen area I broke the code as follows:
#-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup page_url = "http://xm.maitian.cn/xqall" headers = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36", "Referer":"http://xm.maitian.cn/esfall", "Connection":"keep-alive", "Content-Type":"text/plain; charset=utf-8"} def get_communities_url(): all_data =[] try: reponse = requests.get(url=page_url,headers=headers) except Exception as e: print("请求连接错误") raise e soup = BeautifulSoup(reponse.text,"lxml") soup = soup.find("div","list_wrap") tag_li = soup.find_all("li") for tag_li in soup.find_all("li"): href = tag_li.h1.a['href'] new_url = page_url.replace("/xqall",href) #all_url.append(new_url) dict_data =get_target_info(new_url) if dict_data: all_data.append(dict_data) #print(all_data) return all_data def get_target_info(new_url): # all_url = get_communities_url() # print(len(all_url)) dict = {} try: reponse = requests.get(url=new_url,headers=headers) except Exception as e: print("请求连接错误") raise e #print(reponse.text) soup = BeautifulSoup(reponse.text,'lxml') soup1 = soup.find("section","home_main") PS = soup1.find_all ( " P " ) # cell Median community_avg = PS [0] .b.string.strip () dict [ " community_avg " ] = community_avg # homes for sale unsold_homes = ps [1] .find_all ( " EM " ) [0] + .a.string " sets " dict [ " unsold_homes " ] = unsold_homes # Rent listings rent_homes PS = [. 1] .find_all ( " EM " ) [. 1] + .a.string "Sets " dict["rent_homes"] = rent_homes #所属商圈 business_circle = ps[2].label.string dict["business_circle"] =business_circle #开发商 developers = ps[2].em.string dict["developers"] = developers soup2 = soup.find("ul","home_details") for tag_li in soup2.find_all("li"): IF tag_li [ " class " ] == [ ' li_left ' ]: P = tag_li.find_all ( " P " ) # building area Area = P [0] .em.string dict [ " Area " ] = Area # Property The company property_company the p-= [1 ] .em.string dict [ " property_company " ] = property_company # property costs industry_fee = p [2].em.string dict["industry_fee"] = industry_fee elif tag_li["class"] == ['li_center']: p = tag_li.find_all("p") #建成年代 built_year = p[0].em.string dict["built_year"] = built_year #房屋总数 total_houses = p[1].em.string dict["total_houses"] = total_houses #绿化率 green_rates = p[2].em.string dict["green_rates"] =green_rates elif tag_li["class"] == ['li_right']: p = tag_li.find_all("p") # 占地面积 cover_area = p[0].em.string dict["cover_area"] = cover_area # 楼栋总数 total_built = p[1].em.string dict["total_built"] = total_built # 容积率 product_rates = p[2].em.string dict["product_rates"] = product_rates return dict if __name__ == '__main__': data_all = get_communities_url() print(data_all)