generating a user-agent python reptiles

Some sites do anti-climbing techniques, such as: relatively junior judge a request by the head of the user-agent field to detect whether accessed through a browser.

When crawl these sites need to simulate user-agent

Import Random
 Import Re
 from Typing Import of Dict, List 


class UserAgent: 

    '' ' 
    agent 
    ' '' 
    __filepath = ' User-agent.txt ' 

    '' ' 
    object instances 
    ' '' 
    __instance = None 

    '' ' 
    proxy browser 
    ' '' 
    __dict : of Dict [STR, List] = {} 

    '' ' 
    proxy browser 
    ' '' 
    __list : List [STR] = [] 

    '' ' 
    initialization 
    ' ''

    def __init__(self):
        reg = re.compile(r'firefox|chrome|msie|opera', re.I)
        with open(self.__filepath, 'r', encoding='utf_8_sig') as f:
            for r in f:
                result = reg.search(r) and reg.search(r).group().lower()
                if result and (not result in self.__dict):
                    self.__dict[result] = []
                result and self.__dict[result].append(r.strip())
                self.__list.append(r.strip())

    '''
    单例 - 构造函数
    '''
    def __new__(cls):
        if not cls.__instance:
            cls.__instance = super(UserAgent, cls).__new__(cls)
        return cls.__instance

    '''
    谷歌
    '''
    @property
    def chrome(self) -> str:
        return random.choice(self.__dict['chrome'])

    '''
    火狐
    '''
    @property
    def firefox(self) -> str:
        return random.choice(self.__dict['firefox'])

    '''
    IE
    '''
    @property
    def ie(self) -> str:
        return random.choice(self.__dict['msie'])

    '''
    Opera 浏览器
    '''
    @property
    def opera(self) -> str:
        return random.choice(self.__dict['Opera ' ]) 

    ' '' 
    random 
    '' ' 

    DEF Random (Self) -> STR:
         return . The random.choice (Self __list ) 


    ' '' 
    iteration 
    '' ' 
    DEF  the __iter__ (Self): 
        Self. the __iter = ITER (Self. __list )
         return Self 

    '' ' 
    next 
    ' '' 
    DEF  __next__ (Self):
         return next (Self. the __iter ) 

    '' ' 
    index 
    ' ''
    def __getitem__(self, index) -> str or List(str):
        return self.__list[index]


useragent = UserAgent()
print(useragent.random())

'''
for n in useragent:
    print(n)
'''

 

user-agent.txt

Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36
Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36
Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36
Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36
Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36
Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36
...... Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; zh-cn) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5

 

Baidu network disk

Link: https: //pan.baidu.com/s/1ramkIyjVSI2_GXbxypj1Dg
extraction code: hak8

Guess you like

Origin www.cnblogs.com/whnba/p/11618438.html