python crawler learning 16

python crawler learning 16

  • Advanced usage of the requests library

    • File Upload
      # 还记得我们之前获取的.ico文件吗?现在我们使用它进行文件上传的操作
      import requests
      
      # 在上传文件时,要保证要上传的文件与程序存在于同一文件夹下
      files = {
              
              'file': open('favicon.ico', 'rb')}
      url = 'https://www.httpbin.org/post'
      
      r = requests.post(url, files=files)
      print(r.text)
      

      Running result: The returned response has more files field and form field than the regular response

      insert image description here

    • Cookie settings
      # 获取 cookie
      import requests
      
      url = 'https://www.baidu.com'
      
      r = requests.get(url)
      print(r.cookies)
      for key,value in r.cookies.items():
          print(key, '=', value)
      

      operation result:

      insert image description here

      # 直接使用Cookie维持登录状态(以GitHub为例)
      # 登录 GitHub 获取 cookie
      # 下图是我登录GitHub后找到的cookie
      

      insert image description here

      # 将cookie放置到请求头中
      import requests
      
      url = 'https://github.com/'
      headers = {
              
              
          'cookie': '_octo=GH1.1.1662807162.1635003280; _device_id=172b02e053edbd3c0916c3d9e18629c6; tz=Asia%2FShanghai; has_recent_activity=1; tz=Asia%2FShanghai; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; logged_in=yes; dotcom_user=suchangGuo; user_session=CdRxR9aFlRTeOzI2tgIIF0EkkT_zZorFKwLaGWE8zFIyRox3; _gh_sess=oXO4mMftS%2Bn1S5bGvnpAv9Ih0W65ms5g3ooJv77X7g5U6HANAlnymeuYDDQnCEYExAsnpj5r37WtRZFcwTJBso3VxC4RpNs9E%2BKIaI8s9RgQuxak8m8VQbs8kmPXLZM%2BN99cd%2BZzu%2BxFS%2BL5LdqkImEB9YXAW6FYqbeUdEm8KkdsZ2jXpypBNT1ZM%2ByCkQZ1y8iPLq30AYKLAVlHh7ppwk%2FXdWKUWQRuh2Tb3G1ruVTPBwp%2Fj6zzfbPLvMYNckCArSciZXSOQWNNAJZsGpuGHP%2BSeI0x7yMznVYRqayES1QhwjgCy%2FRtUxry370pEuOgXbvzNm%2BMnvBDXLVtcI1%2BibDiaOhkQVs4LCYe1K7chHcwOzZqbb44uU0dwE9cOfR0l5Fe%2FwucPkeIpMk8O6m0y37VInQ2aCymnwzYotoaGU5Oz1B%2Fn4qy736aP66T0MgxgEFeO2SJu3XaRlUkwKeD5MdlUOMfrQ5IGJtjprkLhQeFwnOM85m650d9DzJQV4LI2oN8bnAKxmh%2BAsvTQyNxbw%2B4FSGRtc2v5OPZ1GFXIL%2BqJYSNYavRwVKIf%2B4wKQfx4QMtW4jvsvtsDiUaUQNhzitTHdoDSXS6iyga2cGgesUlqxNoElx19R5wI7OuLfly14Hxf%2BoxkN04TYxECe7i34fh6o5heVBuOXlM4MGwD9n8xb3hd91%2F0EpfTL1EQ6AlfOKO17sJkxh9ZblbRkv4Qxrsnj57vl4BftbasZ%2Fa0FzkAqj%2F8ci8e9Kajy7uZzpjoNFHkuFCKIQhIKZoLbuEXwcMmF5rjwUDUeH6yly0hAS4YE1R70Qf2tbUcV%2FJsDCH49wtnu03dhPniytIsayrk6EtM2jmWeWDrnGNao1Uu3T3cK4ASys4VSH8hsvfmzkiZHELAnJ%2BzXKwpMSdZDb1PDuu0Xou%2FOiJoyQtAw%3D%3D--Y11NmLqVVLv9rvgo--DvkpSyI94WND4WSVneLRuw%3D%3D'
          , 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.2242 SLBChan/11'
      }
      
      r = requests.get(url, headers=headers)
      with open('receive.txt', 'a+', encoding='utf-8') as f:
          f.write(r.text)
      

      Running result: There is a txt file named receive in the folder

      # 将 cookie 直接传给 cookies 参数
      import requests as req
      
      url = 'https://github.com/'
      cookies = '_octo=GH1.1.1662807162.1635003280; _device_id=172b02e053edbd3c0916c3d9e18629c6; tz=Asia%2FShanghai; has_recent_activity=1; tz=Asia%2FShanghai; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; logged_in=yes; dotcom_user=suchangGuo; user_session=CdRxR9aFlRTeOzI2tgIIF0EkkT_zZorFKwLaGWE8zFIyRox3; _gh_sess=oXO4mMftS%2Bn1S5bGvnpAv9Ih0W65ms5g3ooJv77X7g5U6HANAlnymeuYDDQnCEYExAsnpj5r37WtRZFcwTJBso3VxC4RpNs9E%2BKIaI8s9RgQuxak8m8VQbs8kmPXLZM%2BN99cd%2BZzu%2BxFS%2BL5LdqkImEB9YXAW6FYqbeUdEm8KkdsZ2jXpypBNT1ZM%2ByCkQZ1y8iPLq30AYKLAVlHh7ppwk%2FXdWKUWQRuh2Tb3G1ruVTPBwp%2Fj6zzfbPLvMYNckCArSciZXSOQWNNAJZsGpuGHP%2BSeI0x7yMznVYRqayES1QhwjgCy%2FRtUxry370pEuOgXbvzNm%2BMnvBDXLVtcI1%2BibDiaOhkQVs4LCYe1K7chHcwOzZqbb44uU0dwE9cOfR0l5Fe%2FwucPkeIpMk8O6m0y37VInQ2aCymnwzYotoaGU5Oz1B%2Fn4qy736aP66T0MgxgEFeO2SJu3XaRlUkwKeD5MdlUOMfrQ5IGJtjprkLhQeFwnOM85m650d9DzJQV4LI2oN8bnAKxmh%2BAsvTQyNxbw%2B4FSGRtc2v5OPZ1GFXIL%2BqJYSNYavRwVKIf%2B4wKQfx4QMtW4jvsvtsDiUaUQNhzitTHdoDSXS6iyga2cGgesUlqxNoElx19R5wI7OuLfly14Hxf%2BoxkN04TYxECe7i34fh6o5heVBuOXlM4MGwD9n8xb3hd91%2F0EpfTL1EQ6AlfOKO17sJkxh9ZblbRkv4Qxrsnj57vl4BftbasZ%2Fa0FzkAqj%2F8ci8e9Kajy7uZzpjoNFHkuFCKIQhIKZoLbuEXwcMmF5rjwUDUeH6yly0hAS4YE1R70Qf2tbUcV%2FJsDCH49wtnu03dhPniytIsayrk6EtM2jmWeWDrnGNao1Uu3T3cK4ASys4VSH8hsvfmzkiZHELAnJ%2BzXKwpMSdZDb1PDuu0Xou%2FOiJoyQtAw%3D%3D--Y11NmLqVVLv9rvgo--DvkpSyI94WND4WSVneLRuw%3D%3D'
      headers = {
              
              
          'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.2242 SLBChan/11'
      }
      jar = req.cookies.RequestsCookieJar()
      for cookie in cookies.split(';'):
          key, value = cookie.split('=', 1)
          jar.set(key, value)
      
      r = req.get(url, cookies=jar, headers=headers)
      print(r.text)
      
      

      operation result:

      ​ A bunch of web code.jpg

It ends today and may continue tomorrow. . .

Guess you like

Origin blog.csdn.net/szshiquan/article/details/123534036