python 读写hdfs的txt文件

from hdfs import Client
hdfsConn = Client('http://111111:111', root='/111/111', timeout=1000, session=False)

NearLinearIds = []
with hdfsConn.read(NearLinearIds_path, encoding='utf-8', delimiter='\n') as f:
    for line in f:
        NearLinearIds.append(line.strip())
创建txt文件
hdfs dfs -touchz /user/111/111/data/7.4/interpolation.txt
hdfsConn.write("/user/111/111/data/7.4/interpolation.txt", data="1234567" + '\n', overwrite=False, append=True)
hdfsConn.write("/user/111/111/data/7.4/interpolation.txt", data="333" + '\n', overwrite=False, append=True)
hdfsConn.write("/user/111/111/data/7.4/interpolation.txt", data="101010" + '\n', overwrite=False, append=True)

ss = []
with hdfsConn.read('/user/111/111/data/7.4/interpolation.txt', encoding='utf-8', delimiter='\n') as f:
    for line in f:
        ss.append(line.strip())

print(ss)
worthless_ecpm_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
worthless_ecpm_ids_str = ','.join(str(i) for i in worthless_ecpm_ids)
hdfsConn.write(worthlessIds_path, data=worthless_ecpm_ids_str, overwrite=False, append=True)

worthlessIds = []
with hdfsConn.read(worthlessIds_path, encoding='utf-8', delimiter=',') as f:
   for line in f:
       if len(line) != 0:
           worthlessIds.append(line)

https://hdfscli.readthedocs.io/en/latest/api.html

Guess you like

Origin blog.csdn.net/qq_42363032/article/details/118494354