版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u013421629/article/details/82770140
1、安装hdfs包
pip install hdfs
2、python 操作分布式文件系统hdfs
# -*- encoding=utf-8 -*-
from hdfs.client import Client
client = Client("http://XXX.XXX.XX.XX:50070")
# 创建目录
def mkdirs(client, hdfs_path):
client.makedirs(hdfs_path)
# 删除hdfs文件
def delete_hdfs_file(client, hdfs_path):
client.delete(hdfs_path)
# 上传文件到hdfs
def put_to_hdfs(client, local_path, hdfs_path):
client.upload(hdfs_path, local_path, cleanup=True)
# 从hdfs获取文件到本地
def get_from_hdfs(client, hdfs_path, local_path):
client.download(hdfs_path, local_path, overwrite=False)
# 追加数据到hdfs文件
def append_to_hdfs(client, hdfs_path, data):
client.write(hdfs_path, data, overwrite=False, append=True)
# 覆盖数据写到hdfs文件
def write_to_hdfs(client, hdfs_path, data):
client.write(hdfs_path, data, overwrite=True, append=False)
# 移动或者修改文件
def move_or_rename(client, hdfs_src_path, hdfs_dst_path):
client.rename(hdfs_src_path, hdfs_dst_path)
# 返回目录下的文件
def list(client, hdfs_path):
return client.list(hdfs_path, status=False)
if __name__ == '__main__':
# 调用
kk=list(client,"/user/admin/deploy/user_lable_dimension/")
for each in kk:
print(each)
3、cmd-put 方式
-- 建表语句
drop table dm.ml_user_lable_dimension;
create external table dm.ml_user_lable_dimension(
app_id string comment '平台',
user_id string comment '用户id',
login_name string comment '登录名',
cert_no string comment '身份证',
type string comment '标签类型',
lable string comment '用户标签',
value string comment '标签值',
record_date string comment '跟新时间'
)comment '用户标签维度表'
partitioned by(partition_type string comment '分区标签类型')
row format delimited
fields terminated by '\t'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile
location '/user/admin/deploy/user_lable_dimension';
# 创建分区文件
hdfs dfs -mkdir /user/admin/deploy/user_lable_dimension/partition_type=brush
# 映射分区
alter table dm.ml_user_lable_dimension add partition(partition_type='brush') location '/user/admin/deploy/user_lable_dimension/partition_type=brush';
# 查询结果
SELECT * from dm.ml_user_lable_dimension WHERE partition_type='brush' and record_date='2018-09-18'
# encoding:
utf-8
import datetime
import os
# 自定义获取昨天日期的函数
def getYesterday():
"""
:return: 获取昨天日期
"""
today = datetime.date.today()
oneday=datetime.timedelta(days=1)
yesterday=today-oneday
# 日期转字符串
partition_date=yesterday.strftime('%Y-%m-%d')
return partition_date
partition_date=getYesterday()
# 本地文件上传到hdfs
cmd ="hdfs dfs -put -f /home/admin/user_lable_dimension/hedging/result/%s.txt /user/admin/deploy/user_lable_dimension/partition_type=hedging" %partition_date
res = os.popen(cmd)
res.close()