hadoop的配置文件存放目录在 {HADOOP_HOME}/etc/hadoop 下, 与 hdfs相关的配置: core-site.xml、hdfs-site.xml
core-site.xml: core-site 配置详解
新增属性信息: fs.defaultFS
fs.defaultFS表示指定集群的文件系统类型是分布式文件系统(HDFS),datanode心跳发送到nameNode的地址
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://#{nameNode}:#{PORT}</value>
</property>
</configuration>
hdfs-site.xml:hdfs-site 配置详解
- dfs.replication 副本数,表示副本数是3
- dfs.name.dir和dfs.data.dir, namenode 和 datanode 的数据存放路径
- dfs.datanode.max.locked.memory 开启缓存,配置值根据自己机器情况配置
- dfs.permissions 是否开启权限校验
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/software/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/opt/software/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.datanode.max.locked.memory</name>
<value>65536</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
HDFS -- API操作:
1、引入包hadoop-client
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
</dependency>
</dependencies>
2、HSFS 客户端操作
- 获取客户端对象
- 执行相关操作命令: 文件上传、下载、修改文件名称/路径、文件删除........
- 关闭资源
package hadoop.hdfs;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsClient {
@Test
public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
// 连接集群nn的地址
URI uri = new URI("hdfs://TestNode1:9000");
//创建一个配置文件
Configuration configuration = new Configuration();
//用户
String user = "zsm";
// 获取到客户端对象
FileSystem fs = FileSystem.get(uri, configuration, user);
fs.mkdirs(new Path("/zsm/hdfs/test/"));
fs.close();
}
}
2.1 文件上传: copyFromLocalFile
@Test
public void testCopyFromLocalFile() throws URISyntaxException, IOException, InterruptedException {
// 1.获取文件系统
Configuration configuration = new Configuration();
configuration.set("dfs.replication","2");
FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");
// 2.上传文件(在项目文件夹下创建hdfs_test.txt, 也可以指定一个绝对路径下的文件)
fs.copyFromLocalFile(new Path("hdfs_test.txt"), new Path("/zsm/hdfs/test/"));
// 3.关闭资源
fs.close();
}
2.2 文件下载:
copyToLocalFile(Boolean delSrc, Path src, Path dst, Boolean useRawLocalFileSystem)
- boolean delSrc 指是否将原文件删除
- Path src 指要下载的文件路径
- Path dst 指将文件下载到的路径
- boolean useRawLocalFileSystem 是否开启文件校验
@Test
public void testCopyToLocalFile() throws IOException, URISyntaxException, InterruptedException {
// 1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");
// 2.执行下载操作
// boolean delSrc 指是否将原文件删除
// Path src 指要下载的文件路径
// Path dst 指将文件下载到的路径
// boolean useRawLocalFileSystem 是否开启文件校验
fs.copyToLocalFile(false, new Path("/zsm/hdfs/test/hdfs_test.txt"), new Path("zsm_test2.txt"), true);
// 3.关闭资源
fs.close();
}
2.3 修改文件名和路径: rename
@Test
public void testRename() throws IOException, URISyntaxException, InterruptedException {
// 1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");
// 2.修改文件名称
fs.rename(new Path("/zsm/hdfs/test/hdsf_test.txt"),new Path("/zsm/hdfs/test2/hdsf_test2.txt"));
// 3.关闭资源
fs.close();
}
2.4 删除文件和目录: delete
@Test
public void testDelete() throws IOException, URISyntaxException, InterruptedException {
// 1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");
// 2.执行删除
fs.delete(new Path("/zsm/hdfs"),true);
// 3.关闭资源
fs.close();
}
2.5 HDFS文件详情查看
查看文件名称、权限、长度、块信息
@Test
public void testListFiles() throws IOException, InterruptedException,
URISyntaxException {
// 1 获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");
// 2 获取文件详情
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/zsm"), true);
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = listFiles.next();
System.out.println("========" + fileStatus.getPath() + "=========");
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getOwner());
System.out.println(fileStatus.getGroup());
System.out.println(fileStatus.getLen());
System.out.println(fileStatus.getModificationTime());
System.out.println(fileStatus.getReplication());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPath().getName());
// 获取块信息
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
System.out.println(Arrays.toString(blockLocations));
}
// 3 关闭资源
fs.close();
}
2.6 HDFS文件和文件夹判断
@Test
public void testListStatus() throws IOException, InterruptedException, URISyntaxException{
// 1 获取文件配置信息
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"), configuration, "zsm");
// 2 判断是文件还是文件夹
FileStatus[] listStatus = fs.listStatus(new Path("/zsm"));
for (FileStatus fileStatus : listStatus) {
// 如果是文件
if (fileStatus.isFile()) {
System.out.println("f:"+fileStatus.getPath().getName());
}else {
System.out.println("d:"+fileStatus.getPath().getName());
}
}
// 3 关闭资源
fs.close();
}