www.alta.ru/taksa-online 代码爬虫

#encoding=utf-8
def printred(kw):
    print("\033[1;31m {}\033[0m".format(kw))
def getHeaderByCopy(str):
    header = {}
    lines= str.split("\n")
    for line in lines:
        args = line.split(": ")
        if(len(args)==2):
            header[args[0]] = args[1].lstrip()
    return header
def loger(contents):
    fh = open('info.txt', 'a+', encoding='utf-8')
    print(contents)
    fh.write(contents+"\n")
    fh.close()
def logerred(contents):
    fh = open('info.txt', 'a+', encoding='utf-8')
    printred(contents)
    fh.write(contents+"\n")
    fh.close()

def get_html_by_url(url,encoding='utf-8'):
    import requests
    r = requests.get(url)
    # print(r.encoding) 
    r.encoding = encoding 
    return r.text

def get_soup_by_html(html):
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(h

猜你喜欢

转载自blog.csdn.net/Allure_LoveU/article/details/121147386
今日推荐