hadoop基本知识2

Hadoop管理员常用命令
hadoop job -list #列出正在运行的job
hadoop job -kill <job_id> #kill job
hadoop fsck /xxx  #检查HDFS块状态,是否损坏
hadoop fsck /xxx -delete #检查HDFS块状态,删除损坏块
hadoop dfsadmin -report #检查HDFS状态,包括DN信息
hadoop dfsadmin -safemode enter|leave
./bin/start-balancer.sh #平衡集群文件

使用hadoop创建一个文件

package com.hadoop.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class HDFSmkdir {

 public static void main(String args[]) throws IOException{
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  fs.mkdirs(new Path("/user/hadoop/xxx"));
 }
}


创建完成后导出hadoop项目jar,导出为hdfs.jar

放在linux中hadoop的lib下面
执行方式:
hadoop jar hdfs.jar

package com.hadoop.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * 文件拷贝
 * @author lenovo
 *
 */
public class CopyToHdfs {
 
 public static void main(String args[]) throws IOException{
  Configuration conf = new Configuration();
  
  FileSystem fs = FileSystem.get(conf);
  
  fs.copyFromLocalFile(new Path("/user/abc.txt"), new Path("/user/hadoop/abc.txt"));
 }
}


//查看HDFS结点信息

package com.hadoop.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

/**
 * 查看HDFS结点信息
 * @author lenovo
 *
 */
public class FindFileOnHdfs {

 /**
  * @param args
  * @throws IOException 
  */
 public static void main(String[] args) throws IOException {

  getHDFSNodes();
  getFileLocal();
 }

 // 获取某个文件在Hdfs的集群位置
 private static void getFileLocal() throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path path  =new Path("/user/hadoop/xxxxx/xxx.txt");
  //获取文件信息状态
  FileStatus filestatus = fs.getFileStatus(path);
  BlockLocation[] blockLocation=fs.getFileBlockLocations(filestatus, 0, 20);
  
  int lengths= blockLocation.length;
  for(int i=0;i<lengths;i++){
   String [] nodes = blockLocation[i].getHosts();
   System.out.println("block_"+i+"_location:"+nodes[0]);
  }
 }

 private static void getHDFSNodes() throws IOException {

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  //获取分布式文件系统
  DistributedFileSystem hdfs =(DistributedFileSystem) fs;
  //获取结点数
  DatanodeInfo [] datanodeinfo = hdfs.getDataNodeStats();
  for(int i=0;i<datanodeinfo.length;i++){
   System.out.println("datanode_"+i+"_name"+datanodeinfo[i].getHostName());
  }
 }

}

猜你喜欢

转载自501565246-qq-com.iteye.com/blog/2042724