【python 操作hdfs】python操作分布式文件系统hdfs

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u013421629/article/details/82770140

1、安装hdfs包

pip install hdfs

2、python 操作分布式文件系统hdfs

# -*- encoding=utf-8 -*-


from hdfs.client import Client


client = Client("http://XXX.XXX.XX.XX:50070")



# 创建目录
def mkdirs(client, hdfs_path):
    client.makedirs(hdfs_path)


# 删除hdfs文件
def delete_hdfs_file(client, hdfs_path):
    client.delete(hdfs_path)


# 上传文件到hdfs
def put_to_hdfs(client, local_path, hdfs_path):
    client.upload(hdfs_path, local_path, cleanup=True)


# 从hdfs获取文件到本地
def get_from_hdfs(client, hdfs_path, local_path):
    client.download(hdfs_path, local_path, overwrite=False)


# 追加数据到hdfs文件
def append_to_hdfs(client, hdfs_path, data):
    client.write(hdfs_path, data, overwrite=False, append=True)


# 覆盖数据写到hdfs文件
def write_to_hdfs(client, hdfs_path, data):
    client.write(hdfs_path, data, overwrite=True, append=False)


# 移动或者修改文件
def move_or_rename(client, hdfs_src_path, hdfs_dst_path):
    client.rename(hdfs_src_path, hdfs_dst_path)


# 返回目录下的文件
def list(client, hdfs_path):
    return client.list(hdfs_path, status=False)


if __name__ == '__main__':
    
    # 调用
    kk=list(client,"/user/admin/deploy/user_lable_dimension/")
    for each in kk:
        print(each)






3、cmd-put 方式

-- 建表语句

drop table dm.ml_user_lable_dimension;

create  external table dm.ml_user_lable_dimension(
app_id      string comment '平台',
user_id     string comment '用户id',
login_name  string comment '登录名',
cert_no     string comment '身份证',
type        string comment '标签类型',
lable	    string comment '用户标签',
value	    string comment '标签值',
record_date  string comment '跟新时间'
)comment '用户标签维度表'
partitioned by(partition_type string comment '分区标签类型')

row format delimited 
fields terminated by '\t'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile
location '/user/admin/deploy/user_lable_dimension';


# 创建分区文件
hdfs dfs -mkdir  /user/admin/deploy/user_lable_dimension/partition_type=brush

# 映射分区
alter table dm.ml_user_lable_dimension add partition(partition_type='brush')  location '/user/admin/deploy/user_lable_dimension/partition_type=brush';


# 查询结果
SELECT * from dm.ml_user_lable_dimension WHERE partition_type='brush' and record_date='2018-09-18'





# encoding: 
utf-8

import datetime
import os

# 自定义获取昨天日期的函数
def getYesterday():
    """
    :return: 获取昨天日期
    """
    today = datetime.date.today()
    oneday=datetime.timedelta(days=1)
    yesterday=today-oneday
    # 日期转字符串
    partition_date=yesterday.strftime('%Y-%m-%d')
    return partition_date


partition_date=getYesterday()

# 本地文件上传到hdfs
cmd ="hdfs dfs -put -f  /home/admin/user_lable_dimension/hedging/result/%s.txt   /user/admin/deploy/user_lable_dimension/partition_type=hedging" %partition_date
res = os.popen(cmd)
res.close()

猜你喜欢

转载自blog.csdn.net/u013421629/article/details/82770140