基础命令

Hadoop - 彻底解决警告：WARN util.NativeCodeLoader

1 cd ${HADOOP_HOME}
2 vim etc/hadoop/log4j.properties
# 在文件最后面追加：
3 log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR

#意思是，对于 org.apache.hadoop.util.NativeCodeLoader 类的日志，只打印 ERROR 及以上级别，那么 ERROR 级别之下的，比如 DEBUG、INFO、WARN 等，都会被忽略。

HDFS文件操作系统

#创建文件夹
hdfs dfs -mkdir /temp
#亚马逊创建了6000+硬盘
cd /opt/
ls
#复制到网页硬盘（个人理解） 
hdfs dfs -copyFromLocal jdk-8u111-linux-x64.tar.gz /temp
#从u盘把文件拷回来
hdfs dfs -copyToLocal /temp/jdk-8u111-linux-x64.tar.gz ~
#删除文件
rm -rf jdk-8u111-linux-x64.tar.gz 
#创建tmp文件
hdfs dfs -mkdir /tmp
#复制jdk文件到tmp（网页到网页用 -cp）
hdfs dfs -cp /temp/jdk-8u111-linux-x64.tar.gz /tmp
#查看tmp上面的文件
hdfs dfs -ls /tmp
#删除文件和目录
hdfs dfs -rm -R /tmp
hdfs dfs -rm -R /temp

#创建文件temp
hdfs dfs -mkdir /temp

.上传文件
vim exp.txt
hello hadoop
hello world
hello spring
spring cloud

hdfs dfs -copyFromLocal exp.txt /temp

idea编写代码

1 新建maven项目导入pom


    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.6.0</version>
    </dependency>

#授权可以删除文件 （在删除文件报错时使用）
#supergroup:drwxr-xr-x 会变成drwxrwxrwx
hdfs dfs -chmod -R 777 /
#查看本地文件写入到fdfs（方法2）
hdfs dfs -cat /tmp/eee

3.java版上传下载

public class App 
{
    public static void main( String[] args ) throws Exception {
//        /** 方法1 任务
//         * 对HDFS 上的文件 写到用户指定的位置
//         * 找到你的大数据硬盘
//         */
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.64.210:9000"),new Configuration());
//
//        //准备io流读写文件
//        InputStream is = fs.open(new Path("/temp/exp.txt"));
        //准备一个输出流
//        FileOutputStream fos = new FileOutputStream("d:/abc.txt");
//        //把读出的数据流写入输出流
//        IOUtils.copyBytes(is,fos,4096,true);


//        /** 方法2
//         *crud文件 对用户指定的位置文件 写到HDFS 上的文件

//         */
//        在HDFS盘上创建一个文件夹
//        fs.mkdirs(new Path("/tmp"));
        //在HDFS盘上删除一个文件夹
//        fs.delete(new Path("/temp"),true);

        fs.createNewFile(new Path("/tmp/eee"));
        FileInputStream is = new FileInputStream("d:/a.txt");
        FSDataOutputStream fos = fs.append(new Path("/tmp/eee"));
        IOUtils.copyBytes(is,fos,4096,true);
    }
}

分布式计算模型MapReduce

1 idea代码02

#再次导包
  <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>2.6.0</version>
    </dependency>

2 编写mapper

#WcMapper层
public class WcMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
    IntWritable one=new IntWritable(1);
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //字符串分割
        String[] infos = value.toString().split(" ");
        //hadoop hello ok
        for (String word : infos) {
            context.write(new Text(word),one);// hadoop 1,hello 1,ok 1

        }
    }
}

#WcReduce 层
public class WcReduce extends Reducer<Text, IntWritable,Text,IntWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int count=0;
        for (IntWritable val : values) {
            count+=val.get();
        }
        context.write(key,new IntWritable(count));
    }
}

#WcRun层
public class WcRun {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //1 初始化
        Configuration cfg = new Configuration();
        Job job = Job.getInstance(cfg);
        //2 设置jar加载路径,一般设置都是driver类.class
        job.setJarByClass(WcRun.class);



        //3 设置map和reduce类
        job.setMapperClass(WcMapper.class);
        job.setReducerClass(WcReduce.class);

        //4 设置map输出
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //5 设置Reduce输出及最终的输出
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //6 设置输入和输出路径,其中args是main)方法传入的参数
        FileInputFormat.setInputPaths(job,new Path("file:///d:/a.txt"));
        FileOutputFormat.setOutputPath(job,new Path("file:///d:/kgc"));

        //提交任务
        job.waitForCompletion(true);


    }
}