Movie resource search assistant 2

Newly added function, save files by what; multiple URLs to search for resources

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 17:01:26 2018

@author: gzs10227

Search for movie resources
"""
import re,os
import requests
import time,datetime
import urllib
import sys
stderr = sys.stderr
stdout = sys.stdout
reload(sys)
sys.setdefaultencoding('utf8')
sys.stderr = stderr
sys.stdout = stdout
urllib.getproxies_registry = lambda: {}
null = ''
from lxml import etree
local import

HEADERS = {
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}

print u'Please enter the movie you want to search:'
keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))

print u'\nPlease enter the path where you want to save the file:'
save_path = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))

print u'\nWhat sort of order do you want to save the file in:'
print u' 1, file size 2, creation time 3, number of downloads 4, no requirement. Save search results in txt format'
GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
while GS_num > 4:
    print u'The input requirements are wrong, please re-enter: 1, file size 2, creation time 3, download times 4, no requirements'
    GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))

if GS_num == 4:
    save = os.path.join(save_path+'\\',keyword+'.txt')
    fle = open(save,'w')



def open_url(url):
    html = requests.get(url,headers = HEADERS).content
    web_data = etree.HTML(html)
    return web_data


def get_url(keyword):
    main_url = 'http://www.btyunsou.me/search/%s_ctime_1.html'%keyword
    web_data = open_url(main_url)
    links = web_data.xpath('//li[@class="media"]//h4//a/@href') # Get links
    links = ['http://www.btyunsou.me'+i for i in links]
    return links


def get_info(url):    
    web_data = open_url(url)
    try:
        title = web_data.xpath(r'//div[@class="row-flbtd tor-title"]/h2/text()')[0]
    except:
        title = ''
        
    if keyword in title:        
        print u'movie name:',title
        mange_link = 'magnet:? xt = urn: btih:' + url [23: -5]
        print u'Magnetic link: ',mange_link
        if GS_num == 4:
            fle.write(u'movie name:'+ title + '\n')
            fle.write(u'magnet link: ' + mange_link +'\n')
            datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[:10]
            for i in range(0,len(datalist),2):
                print datalist[i],datalist[i+1]
                fle.write(datalist[i] + datalist[i+1] + '\n')
        else:
            titles.append(title)
            cls.append(mange_link)
            datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[4:10]
            for i in range(0,len(datalist),2):
                print datalist[i],datalist[i+1]     
                if i == 0:
                    size.append(datalist[1]) # file size
                if i == 2:
                    ctime.append(datalist[3]) # time
                if i == 4:
                    loadnum.append(datalist[5]) # download times
            
    else:
        print 'Sorry! None Search,Please change one: '


def get_info2(keyword):
    url = 'https://www.ciliba.org/s/%s.html'%keyword
    web_data = open_url(url)
    hrefs = web_data.xpath(r'//div[@class="item-title"]/h3/a/@href')
    for href in hrefs:
        try:
            web_data = open_url(href)
        except:
            continue
        try:
            title = web_data.xpath(r'//*[@id="wall"]/h1/text()')[0]
        except:
            title = ''
        if keyword in title:
            print u'movie name: ',title
            xl_link = web_data.xpath(r'//*[@id="wall"]/div[1]/p[6]/a[2]/@href')[0]
            print u'Xunlei link: ',xl_link
            data1 = web_data.xpath('//*[@id="wall"]/div[1]/p[2]/text()')[0]
            data2 = web_data.xpath('//*[@id="wall"]/div[1]/p[3]/text()')[0]
            print data1
            print data2
            if GS_num == 4:
                fle.write(u'movie name:'+ title + '\n')
                fle.write(u'Thunder link: ' + xl_link +'\n')
                fle.write(data1)
                fle.write('\n')
                fle.write(data2)
                fle.write('\n')
            else:
                titles.append(title)
                cls.append(xl_link)
                size.append(data1.split(':')[1])
                ctime.append(data2.split(':')[1])
                loadnum.append(1)
        else:
            print 'Sorry! None Search,Please change one: '


def clear(i):
    if 'Gb' in i or 'GB' in i:
        inum = round(float(i.replace('GB','').replace('Gb','').replace(' ','')),2)
        return int (inum * 1024)
    else:
        inum = round(float(i.replace('Mb','').replace('MB','').replace(' ','')),2)
        return int(inum)        
        

if __name__ == '__main__':       
    i = 1
    while True:    
        if i > 1:
            print u'Please enter the movie you want to search for:'
            keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))
            print u'\nWhich sorting method do you want to save the files in: 1, file size 2, creation time 3, download times 4, no requirement, save in txt'
            GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
            while GS_num > 4:
                print u'The input requirement is wrong, please re-enter [1-4]: 1, file size 2, creation time 3, download times 4, no requirement, save as txt'
                GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))            
            if GS_num == 4:
                save = os.path.join(save_path,keyword+'.txt')
                fle = open(save,'w')
                
        links = get_url(keyword)
        #df = pd.DataFrame()
        titles = [];cls = [];size = [];ctime = [];loadnum = []
        for url in links:
            try:            
                get_info(url)
            except:
                continue
            if GS_num == 4:
                fle.write('--------------------------------------------------')
                fle.write('\n')
                
        try:
            get_info2(keyword)
        except:
            pass
        
        if GS_num != 4:
# df[u'title'] = titles
# df[u'creation time'] = ctime
# df[u'file size in MB'] = size
# df[u'download count'] = loadnum
# df[u'download link'] = cls
# df[u'downloads'] = df[u'downloads'].astype(int)
# df[u'file size MB'] = map(clear,df[u'file size MB'])
            size = map(clear,size)
            df_list = []
            for ii in range(len(titles)):
                df_list.append([titles[ii],cls[ii],size[ii],ctime[ii],loadnum[ii]])
            save = os.path.join(save_path,keyword+'.txt')
            fle2 = open(save,'w')
            if GS_num == 1:
                #df2 = df.sort_values(by = u'file size in MB', ascending = False)
                df2 = sorted(df_list, key=lambda x: x[2],reverse = True)
            if GS_num == 2:
                #df2 = df.sort_values(by = u'creation time', ascending = False)
                df2 = sorted(df_list, key=lambda x: x[3],reverse = True)
            else:
                #df2 = df.sort_values(by = u'downloads', ascending = False)    
                df2 = sorted(df_list, key=lambda x: x[4],reverse = True)
            #df2.to_excel(save,index = False,encoding = 'gbk')
            
            for sl in df2:
                for s in range(len(sl)):
                    if s == 0:
                        ss = u'movie name:' + sl[s]
                    if s == 1:
                        ss = u'magnet link:' + sl[s]
                    if s == 2:
                        ss = u'file size MB:' + str(sl[s])
                    if s == 3:
                        ss = u' creation time: ' + str(sl[s])
                    if s == 4:
                        ss = u'hotness: ' + str(sl[s])
                    fle2.write(str(ss))
                    fle2.write('\n')
                fle2.write('--------------------------------------------------')
                fle2.write('\n')  
                
            fle2.close()
                
        else:                
            fle.close()
        i = i + 1
        print u'\nIf you want to search again, enter the movie name! Otherwise please close the window manually.\n'
    
    
    
    
    


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325704235&siteId=291194637