When performing webhdfs operations, use the following code
from hdfs import InsecureClient
client = InsecureClient('http://host:port', user='ann')
Get the list of files in the remote /tmp directory (this step is only obtained from the NameNode)
# Listing all files inside a directory.
list_content = client.list('/tmp')
The upload method is called when uploading a file
client.upload(remote_dir,local_dir,overwriten=True)
The following exception message appears
[E 180201 14:21:10 client:599] Error while uploading. Attempting cleanup.
Traceback (most recent call last):
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\connection.py", line 141, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\util\connection.py", line 61, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "C:\Program Files (x86)\Python36-32\lib\socket.py", line 743, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 594, in upload
_upload(path_tuple)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 524, in _upload
self.write(_temp_path, wrap(reader, chunk_size, progress), **kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 470, in write
consumer(data)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 464, in consumer
data=(c.encode(encoding) for c in _data) if encoding else _data,
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 207, in _request
**kwargs
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\adapters.py", line 441, in send
low_conn.endheaders()
File "C:\Program Files (x86)\Python36-32\lib\http\client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Program Files (x86)\Python36-32\lib\http\client.py", line 1026, in _send_output
self.send(msg)
File "C:\Program Files (x86)\Python36-32\lib\http\client.py", line 964, in send
self.connect()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\connection.py", line 166, in connect
conn = self._new_conn()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\connection.py", line 150, in _new_conn
self, "Failed to establish a new connection: %s" % e)
requests.packages.urllib3.exceptions.NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x046C9BD0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed
[I 180201 14:21:10 client:848] Deleting '/tmp/data1/tensorflow' recursively.
[E 180201 14:21:10 web:1548] Uncaught exception POST /api/job/create (127.0.0.1)
HTTPServerRequest(protocol='http', host='localhost:8081', method='POST', uri='/api/job/create', version='HTTP/1.1', remote_ip='127.0.0.1', headers={'Connection': 'close', 'Cookie': 'Pycharm-c9b2eeaf=d1c21794-2128-4ae7-9a97-2f9a04f8749c', 'Content-Length': '34', 'Referer': 'http://localhost:8082/', 'Content-Type': 'application/json;charset=utf-8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept': 'application/json, text/plain, */*', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0', 'Host': 'localhost:8081'})
Traceback (most recent call last):
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\connection.py", line 141, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\util\connection.py", line 61, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "C:\Program Files (x86)\Python36-32\lib\socket.py", line 743, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Program Files (x86)\Python36-32\lib\site-packages\tornado\web.py", line 1469, in _execute
result = yield result
File "C:\Program Files (x86)\Python36-32\lib\site-packages\tornado\gen.py", line 1015, in run
value = future.result()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\tornado\concurrent.py", line 237, in result
raise_exc_info(self._exc_info)
File "<string>", line 3, in raise_exc_info
File "C:\Program Files (x86)\Python36-32\lib\site-packages\tornado\gen.py", line 1024, in run
yielded = self.gen.send(value)
File "app.py", line 79, in post
hdfs_client.upload(remote_hdfs_model_dir,model_dir,overwrite=True)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 605, in upload
raise err
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 594, in upload
_upload(path_tuple)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 524, in _upload
self.write(_temp_path, wrap(reader, chunk_size, progress), **kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 470, in write
consumer(data)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 464, in consumer
data=(c.encode(encoding) for c in _data) if encoding else _data,
File "C:\Program Files (x86)\Python36-32\lib\site-packages\hdfs\client.py", line 207, in _request
**kwargs
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\adapters.py", line 441, in send
low_conn.endheaders()
File "C:\Program Files (x86)\Python36-32\lib\http\client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Program Files (x86)\Python36-32\lib\http\client.py", line 1026, in _send_output
self.send(msg)
File "C:\Program Files (x86)\Python36-32\lib\http\client.py", line 964, in send
self.connect()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\connection.py", line 166, in connect
conn = self._new_conn()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\connection.py", line 150, in _new_conn
self, "Failed to establish a new connection: %s" % e)
requests.packages.urllib3.exceptions.NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x046C9BD0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed
[E 180201 14:21:10 web:1971] 500 POST /api/job/create (127.0.0.1) 128.01ms
[I 180201 14:23:35 autoreload:204] C:\Program Files (x86)\Python36-32\lib\site-packages\requests\packages\urllib3\util\connection.py modified; restarting server
169.24.2.194
50070
bigdata6.chinasws.com
50075
After various tests, it should be that uploading files needs to be connected to the DataNode node to write data, and the hdfs client (the machine running the hdfs.upload code) needs to maintain a smooth network with each DataNode node. If your hdfs cluster adopts the domain name method , then you need to configure it on the DNS server, or modify the client's local mapping, the file is /etc/hosts (linux) or C:\windows\system32\drivers\etc\hosts file to add the corresponding IP address and domain name manage