Newly added function, save files by what; multiple URLs to search for resources
# -*- coding: utf-8 -*- """ Created on Tue Jan 30 17:01:26 2018 @author: gzs10227 Search for movie resources """ import re,os import requests import time,datetime import urllib import sys stderr = sys.stderr stdout = sys.stdout reload(sys) sys.setdefaultencoding('utf8') sys.stderr = stderr sys.stdout = stdout urllib.getproxies_registry = lambda: {} null = '' from lxml import etree local import HEADERS = { 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } print u'Please enter the movie you want to search:' keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) print u'\nPlease enter the path where you want to save the file:' save_path = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) print u'\nWhat sort of order do you want to save the file in:' print u' 1, file size 2, creation time 3, number of downloads 4, no requirement. Save search results in txt format' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) while GS_num > 4: print u'The input requirements are wrong, please re-enter: 1, file size 2, creation time 3, download times 4, no requirements' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) if GS_num == 4: save = os.path.join(save_path+'\\',keyword+'.txt') fle = open(save,'w') def open_url(url): html = requests.get(url,headers = HEADERS).content web_data = etree.HTML(html) return web_data def get_url(keyword): main_url = 'http://www.btyunsou.me/search/%s_ctime_1.html'%keyword web_data = open_url(main_url) links = web_data.xpath('//li[@class="media"]//h4//a/@href') # Get links links = ['http://www.btyunsou.me'+i for i in links] return links def get_info(url): web_data = open_url(url) try: title = web_data.xpath(r'//div[@class="row-flbtd tor-title"]/h2/text()')[0] except: title = '' if keyword in title: print u'movie name:',title mange_link = 'magnet:? xt = urn: btih:' + url [23: -5] print u'Magnetic link: ',mange_link if GS_num == 4: fle.write(u'movie name:'+ title + '\n') fle.write(u'magnet link: ' + mange_link +'\n') datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[:10] for i in range(0,len(datalist),2): print datalist[i],datalist[i+1] fle.write(datalist[i] + datalist[i+1] + '\n') else: titles.append(title) cls.append(mange_link) datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[4:10] for i in range(0,len(datalist),2): print datalist[i],datalist[i+1] if i == 0: size.append(datalist[1]) # file size if i == 2: ctime.append(datalist[3]) # time if i == 4: loadnum.append(datalist[5]) # download times else: print 'Sorry! None Search,Please change one: ' def get_info2(keyword): url = 'https://www.ciliba.org/s/%s.html'%keyword web_data = open_url(url) hrefs = web_data.xpath(r'//div[@class="item-title"]/h3/a/@href') for href in hrefs: try: web_data = open_url(href) except: continue try: title = web_data.xpath(r'//*[@id="wall"]/h1/text()')[0] except: title = '' if keyword in title: print u'movie name: ',title xl_link = web_data.xpath(r'//*[@id="wall"]/div[1]/p[6]/a[2]/@href')[0] print u'Xunlei link: ',xl_link data1 = web_data.xpath('//*[@id="wall"]/div[1]/p[2]/text()')[0] data2 = web_data.xpath('//*[@id="wall"]/div[1]/p[3]/text()')[0] print data1 print data2 if GS_num == 4: fle.write(u'movie name:'+ title + '\n') fle.write(u'Thunder link: ' + xl_link +'\n') fle.write(data1) fle.write('\n') fle.write(data2) fle.write('\n') else: titles.append(title) cls.append(xl_link) size.append(data1.split(':')[1]) ctime.append(data2.split(':')[1]) loadnum.append(1) else: print 'Sorry! None Search,Please change one: ' def clear(i): if 'Gb' in i or 'GB' in i: inum = round(float(i.replace('GB','').replace('Gb','').replace(' ','')),2) return int (inum * 1024) else: inum = round(float(i.replace('Mb','').replace('MB','').replace(' ','')),2) return int(inum) if __name__ == '__main__': i = 1 while True: if i > 1: print u'Please enter the movie you want to search for:' keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) print u'\nWhich sorting method do you want to save the files in: 1, file size 2, creation time 3, download times 4, no requirement, save in txt' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) while GS_num > 4: print u'The input requirement is wrong, please re-enter [1-4]: 1, file size 2, creation time 3, download times 4, no requirement, save as txt' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) if GS_num == 4: save = os.path.join(save_path,keyword+'.txt') fle = open(save,'w') links = get_url(keyword) #df = pd.DataFrame() titles = [];cls = [];size = [];ctime = [];loadnum = [] for url in links: try: get_info(url) except: continue if GS_num == 4: fle.write('--------------------------------------------------') fle.write('\n') try: get_info2(keyword) except: pass if GS_num != 4: # df[u'title'] = titles # df[u'creation time'] = ctime # df[u'file size in MB'] = size # df[u'download count'] = loadnum # df[u'download link'] = cls # df[u'downloads'] = df[u'downloads'].astype(int) # df[u'file size MB'] = map(clear,df[u'file size MB']) size = map(clear,size) df_list = [] for ii in range(len(titles)): df_list.append([titles[ii],cls[ii],size[ii],ctime[ii],loadnum[ii]]) save = os.path.join(save_path,keyword+'.txt') fle2 = open(save,'w') if GS_num == 1: #df2 = df.sort_values(by = u'file size in MB', ascending = False) df2 = sorted(df_list, key=lambda x: x[2],reverse = True) if GS_num == 2: #df2 = df.sort_values(by = u'creation time', ascending = False) df2 = sorted(df_list, key=lambda x: x[3],reverse = True) else: #df2 = df.sort_values(by = u'downloads', ascending = False) df2 = sorted(df_list, key=lambda x: x[4],reverse = True) #df2.to_excel(save,index = False,encoding = 'gbk') for sl in df2: for s in range(len(sl)): if s == 0: ss = u'movie name:' + sl[s] if s == 1: ss = u'magnet link:' + sl[s] if s == 2: ss = u'file size MB:' + str(sl[s]) if s == 3: ss = u' creation time: ' + str(sl[s]) if s == 4: ss = u'hotness: ' + str(sl[s]) fle2.write(str(ss)) fle2.write('\n') fle2.write('--------------------------------------------------') fle2.write('\n') fle2.close() else: fle.close() i = i + 1 print u'\nIf you want to search again, enter the movie name! Otherwise please close the window manually.\n'