此次内容:
使用webdriver访问instagram网站, 查看周杰伦主页,向下翻动,并返回加载出来的图片url,和周杰伦的粉丝数:
from selenium import webdriver import time from selenium.webdriver.common.keys import Keys import re class Ins_user: def __init__(self,username,password,starname): self.username = username self.password = password self.starname = starname self.driver = webdriver.Chrome() def closeBrowser(self): self.driver.close() def start(self): driver = self.driver driver.get('https://www.instagram.com/') time.sleep(3) #点击登陆按钮 login_button = driver.find_element_by_xpath("//a[@href='/accounts/login/?source=auth_switcher']") login_button.click() time.sleep(3) #找到输入用户名的元素 username_elem = driver.find_element_by_xpath("//input[@name='username']") username_elem.clear() username_elem.send_keys(self.username) time.sleep(2) #找到输入密码的元素 password_elem = driver.find_element_by_xpath("//input[@name='password']") password_elem.clear() password_elem.send_keys(self.password) #找到登陆按钮 loadup_button = driver.find_element_by_xpath("//button[@class='sqdOP L3NKy y3zKF ']") loadup_button.click() time.sleep(2) #点击以后再说 try: cancle_button = driver.find_element_by_xpath("//button[@class='aOOlW HoLwm ']") cancle_button.click() except: print("") # 找到搜索框 seacher_elem = driver.find_element_by_xpath("//input[@placeholder='搜索']") seacher_elem.clear() seacher_elem.send_keys(self.starname) time.sleep(2) #点击第一个匹配对象 #进入他的最近视频动态 # star_elem_story = driver.find_element_by_xpath("//div[@class='RR-M- h5uC0 g9vPa']") # star_elem_story.click() #进入主页 star_elem_profie = driver.find_element_by_xpath("//div[@class='Fy4o8']") star_elem_profie.click() time.sleep(2) def getPhotourl(self): driver = self.driver driver.get('https://www.instagram.com/'+self.starname) time.sleep(2) for i in range(1,3): driver.execute_script("window.scrollTo(0,document.body.scrollHeight);") time.sleep(2) photo_list= [] page_html = driver.page_source #print(page_html) #粉丝数 cop_fans = re.compile('<span class="g47SY " title="(.*?)>') res_fans = re.findall(cop_fans,page_html) #图片url cop_photo = re.compile('<a href="(.*?)">') res_photo = re.findall(cop_photo,page_html) for i in res_photo: photo_list.append(i) photo_urllist= [] for i in photo_list: photo_url = 'https://www.instagram.com/jaychou'+i photo_urllist.append(photo_url) temp = [str(i) for i in res_fans] fans_num = ''.join(temp) print(photo_urllist) print(self.starname+"粉丝数:"+fans_num.replace('"','')) if __name__ == '__main__': inst = Ins_user('+86 17671714962','199854cyj','jaychou') inst.start() inst.getPhotourl()