Hadoop --- HDFS configuration and operation

Hadoop configuration files are stored in {HADOOP_HOME}/etc/hadoop, hdfs-related configurations:  core-site.xml, hdfs-site.xml 

core-site.xml:  detailed explanation of core-site configuration

Added attribute information: fs.defaultFS

fs.defaultFS indicates that the file system type of the specified cluster is a distributed file system (HDFS), and the datanode heartbeat is sent to the nameNode address

<configuration>
 
   <property>
        <name>fs.defaultFS</name> 
        <value>hdfs://#{nameNode}:#{PORT}</value> 
  </property>
 
 </configuration>

hdfs-site.xml: hdfs-site configuration details

  • dfs.replication number of copies, indicating that the number of copies is 3
  • dfs.name.dir and dfs.data.dir, the data storage path of namenode and datanode
  • dfs.datanode.max.locked.memory enables caching, and the configuration value is configured according to the situation of your own machine 
  • dfs.permissions Whether to enable permission verification 
<property>
    <name>dfs.replication</name>
    <value>3</value>
</property>
<property>
    <name>dfs.namenode.name.dir</name>
    <value>file:/opt/software/hadoop/hdfs/name</value>
</property>
<property>
    <name>dfs.datanode.data.dir</name>
    <value>file:/opt/software/hadoop/hdfs/data</value>
</property>
<property>
    <name>dfs.datanode.max.locked.memory</name>
    <value>65536</value>
</property>
<property>
    <name>dfs.permissions</name>
    <value>false</value>
</property>

HDFS -- API operations: 

1. Import the package hadoop-client 

<dependencies>
 <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.1.3</version>
 </dependency> 
</dependencies>

2. HSFS client operation

  1.  Get the client object
  2. Execute related operation commands: file upload, download, modify file name/path, file delete...
  3. close resource
package hadoop.hdfs;

import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;

import java.net.URI;
import java.net.URISyntaxException;

public class HdfsClient {
    @Test
    public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
        // 连接集群nn的地址
        URI uri = new URI("hdfs://TestNode1:9000");
        //创建一个配置文件
        Configuration configuration = new Configuration();

        //用户
        String user = "zsm";
        // 获取到客户端对象
        FileSystem fs = FileSystem.get(uri, configuration, user);
        fs.mkdirs(new Path("/zsm/hdfs/test/"));
        fs.close();
    }

}

2.1 File upload: copyFromLocalFile

    @Test
    public void testCopyFromLocalFile() throws URISyntaxException, IOException, InterruptedException {
        // 1.获取文件系统
        Configuration configuration = new Configuration();
        configuration.set("dfs.replication","2");
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");

        // 2.上传文件(在项目文件夹下创建hdfs_test.txt, 也可以指定一个绝对路径下的文件)
        fs.copyFromLocalFile(new Path("hdfs_test.txt"), new Path("/zsm/hdfs/test/"));

        // 3.关闭资源
        fs.close();
    }

2.2 File download: 

copyToLocalFile(Boolean delSrc, Path src, Path dst, Boolean useRawLocalFileSystem)

  1. boolean delSrc means whether to delete the original file 
  2. Path src refers to the file path to download
  3. Path dst refers to the path to download the file to
  4. boolean useRawLocalFileSystem Whether to enable file verification
    @Test
    public void testCopyToLocalFile() throws IOException, URISyntaxException, InterruptedException {
        // 1.获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");

        // 2.执行下载操作
        // boolean delSrc 指是否将原文件删除
        // Path src 指要下载的文件路径
        // Path dst 指将文件下载到的路径
        // boolean useRawLocalFileSystem 是否开启文件校验
        fs.copyToLocalFile(false, new Path("/zsm/hdfs/test/hdfs_test.txt"), new Path("zsm_test2.txt"), true);

        // 3.关闭资源
        fs.close();
    }

2.3 Modify the file name and path: rename 

    @Test
    public void testRename() throws IOException, URISyntaxException, InterruptedException {
        // 1.获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");

        // 2.修改文件名称
        fs.rename(new Path("/zsm/hdfs/test/hdsf_test.txt"),new Path("/zsm/hdfs/test2/hdsf_test2.txt"));

        // 3.关闭资源
        fs.close();
    }

2.4 Delete files and directories: delete 

    @Test
    public void testDelete() throws IOException, URISyntaxException, InterruptedException {
        // 1.获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");

        // 2.执行删除
        fs.delete(new Path("/zsm/hdfs"),true);

        // 3.关闭资源
        fs.close();
    }

2.5 View HDFS file details 

View file name, permissions, length, block information

    @Test
    public void testListFiles() throws IOException, InterruptedException,
            URISyntaxException {
        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"),configuration,"zsm");

        // 2 获取文件详情
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/zsm"), true);
        while (listFiles.hasNext()) {
            LocatedFileStatus fileStatus = listFiles.next();
            System.out.println("========" + fileStatus.getPath() + "=========");
            System.out.println(fileStatus.getPermission());
            System.out.println(fileStatus.getOwner());
            System.out.println(fileStatus.getGroup());
            System.out.println(fileStatus.getLen());
            System.out.println(fileStatus.getModificationTime());
            System.out.println(fileStatus.getReplication());
            System.out.println(fileStatus.getBlockSize());
            System.out.println(fileStatus.getPath().getName());
            // 获取块信息
            BlockLocation[] blockLocations = fileStatus.getBlockLocations();
            System.out.println(Arrays.toString(blockLocations));
        }

        // 3 关闭资源
        fs.close();
    }

2.6 HDFS file and folder judgment 

    @Test
    public void testListStatus() throws IOException, InterruptedException, URISyntaxException{
        // 1 获取文件配置信息
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:9000"), configuration, "zsm");

        // 2 判断是文件还是文件夹
        FileStatus[] listStatus = fs.listStatus(new Path("/zsm"));
        for (FileStatus fileStatus : listStatus) {
            // 如果是文件
            if (fileStatus.isFile()) {
                System.out.println("f:"+fileStatus.getPath().getName());
            }else {
                System.out.println("d:"+fileStatus.getPath().getName());
            }
        }

        // 3 关闭资源
        fs.close();
    }

Guess you like

Origin blog.csdn.net/zhoushimiao1990/article/details/131332900