# coding:utf-8
import re
import requests
import os
import sys
from requests.exceptions import ReadTimeout,HTTPError,RequestException
(V:jycg789 )
QQ:29295842
global data
fw = open("telphone.txt", 'w')
fr = open("urls.txt", 'r')
while 1:
lines = fr.readlines(1000)
if not lines:
break
for line in lines:
print(line)
try:
r = requests.get(line)
except ReadTimeout:
except HTTPError:
except RequestException:
else:
data = r.text
link_list = re.findall(r"\d+", data)
for url in link_list:
if( len(url) == 11 ):
#print(url+"\n")
if( url[0] == '1'):
if( url[1] == '3' or url[1] == '4' or url[1] == '5' or url[1] == '7' or url[1] == '8'):
print(url+"\n")
fw.write(url+"\n")
fw.write("\n")
link_list2 = re.findall(r"(?<=href=\").*?(?=\")",data)
for url2 in link_list2:
try:
r2 = requests.get(url2)
#print(r2.status_code)
except ReadTimeout:
except HTTPError:
except RequestException:
else:
data2 = r2.text
link_list3 = re.findall(r"\d+", data2)
for url3 in link_list3:
if(len(url3) == 11):
if(url3[0] == '1'):
if( url3[1] == '3' or url3[1] == '4' or url3[1] == '5' or url3[1] == '7' or url3[1] == '8'):
print(url3+"\n")
fw.write(url3+"\n")
fw.write("\n\n")
fw.close()
fr.close()
os.system("pause")
抓取网页中手机号码方法
猜你喜欢
转载自blog.csdn.net/jingzhunhuoke9/article/details/109171599
今日推荐
周排行