Traceback (most recent call last):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 435, in _update_chunk_length
self.chunk_left = int(line, 16)
ValueError: invalid literal for int() with base 16: b''
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 226, in _error_catcher
yield
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 486, in read_chunked
self._update_chunk_length()
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 439, in _update_chunk_length
raise httplib.IncompleteRead(line)
http.client.IncompleteRead: IncompleteRead(0 bytes read)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/models.py", line 660, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 340, in stream
for line in self.read_chunked(amt, decode_content=decode_content):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 514, in read_chunked
self._original_response.close()
File "/usr/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 244, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
requests.packages.urllib3.exceptions.ProtocolError: ('Connection broken: IncompleteRead(0 bytes read)', IncompleteRead(0 bytes read))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "spider.py", line 40, in main
poster.get(callback=lambda posts, forum=forum_name: store_to_db(posts, forum))
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 51, in get
return PostsResult(self.ids, bundle, callback)
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 73, in __init__
self.results = self.format(bundle, callback)
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 97, in format
comments = comments.get() if comments else []
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.5/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 60, in _serially_get_comments
_comments = client.get(comments_url, params=params)
File "/home/ubuntu/workspace/dcard-spider/dcard/utils.py", line 31, in get
response = self.req_session.get(url, **kwargs)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/sessions.py", line 480, in get
return self.request('GET', url, **kwargs)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/sessions.py", line 608, in send
r.content
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/models.py", line 737, in content
self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes()
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/models.py", line 663, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ('Connection broken: IncompleteRead(0 bytes read)', IncompleteRead(0 bytes read))
requests.exceptions.ChunkedEncodingError: ('Connection broken: IncompleteRead (0 bytes read)', IncompleteRead (0 bytes read)) The reason for the error is reported by the server http protocol 1.0, and python is 1.1, the solution is to specify the client http protocol version After all, we cannot change the server code.
Solution:
- python3
import http.client
http.client.HTTPConnection._http_vsn = 10
http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0'
- python2
import httplib
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'