hadoop+hbase学习

一、Hadoop
1.创建hadoop用户
sudo useradd -m username -s /bin/bash #/bin/bash作为shell
sudo passwd 123 #设置密码，如：123
sudo adduser username
sudo #为用户增加管理员权限
su - lln #切换当前用户为lln
su root #进入超级用户

2.文件操作
sudo tar zxvf /exitpath/zxvf filename.tar.gz -C /topath #解压文件
vim ~/.bashrc #修改配置文件
source ~/.bashrc #生效配置文件
sudo apt-get install vim #apt-get安装
sudo chown lln: ./eclipse.desktop #修改所有者
sudo chmod u+x eclipse.desktop #修改权限

3.启动hadoop
NameNode 的格式化： ./bin/hdfs namenode -format
启动hadoop：./sbin/start-all.sh
查看java进程：jps
Web 界面查看： http://localhost:50070
离开安全模式：hdfs dfsadmin -safemode leave

4.hadoop shell命令
以运行wordcount为例
找到运行路径：cd /usr/local/hadoop-2.6.5/share/hadoop/mapreduce
创建目录： hadoop fs -mkdir -p /data/wordcount
hadoop fs -mkdir -p /output/
本地创建输入文件：vim /usr/inputWord
上传至HDFS：hadoop fs -put /usr/inputWord /data/wordcount
查看：hadoop fs -ls /data/wordcount
hadoop fs -text /data/wordcount/inputWord
运行：
hadoop jarhadoop-mapreduce-examples-2.6.5.jar wordcount /data/wordcount /output/wordcountresult

shell下java程序的编译与运行
1.查看java路径：which java
2.编译（.java—->.class）:javac Filename.java
3.生成jar包：jar cvf Filename.jar Filename*class
4.hadoop下运行： /usr/local/hadoop-2.6.5/bin/hadoop jar Filename.jar Filename

Hadoop fs官网： http://hadoop.apache.org/docs/r2.6.5/hadoop-project-dist/hadoop-common/FileSystemShell.html

hdfs dfs -appendToFile <localsrc> ... <dst>
hdfs dfs -cat URI [URI ...]
hdfs dfs -copyFromLocal <localsrc> URI
hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI <localdst>
hdfs dfs -count [-q] [-h] <paths>
hdfs dfs -cp [-f] [-p | -p[topax]] URI [URI ...] <dest>
hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst>
hdfs dfs -ls [-R] <args>
hdfs dfs -mkdir [-p] <paths>
hdfs dfs -moveFromLocal <localsrc> <dst>
hdfs dfs -moveToLocal [-crc] <src> <dst>
hdfs dfs -mv URI [URI ...] <dest>
hdfs dfs -put <localsrc> ... <dst>
hdfs dfs -rm [-f] [-r|-R] [-skipTrash] URI [URI ...]
hdfs dfs -text <src>
hdfs dfs -touchz pathname

5.hadoop java API 编程

Hadoop API官网：
http://hadoop.apache.org/docs/r2.6.5/api/index.html

控制台打印

System.out.println("append or overwrite?" ); 
Scanner sc = new Scanner(System.in); 
String s = sc.next();
if(s.equals("append")) {
fs.append(path); 
System.out.println("文件已追加" ); 
}else {
 fs.create(path, true); 
System.out.println("文件已覆盖" );
 }

判断文件是否存在
fs.exists(path)
判断要删除的文件是否存在
fs.deleteOnExit(path)

编程顺序
private static final String HDFS = “hdfs://localhost:9000”;
String remote= HDFS + “/user/upload”; //HDFS文件path
String local = “/home/lln/下载/test”; //本地文件path

private Configuration conf = new Configuration() ;
FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
fs.close();

//上传文件

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(HDFS + "/test");
fs.append(path);//文件已存在，追加至文件末尾
fs.create(path, true);//文件已存在，覆盖文件
fs.copyFromLocalFile(new Path(local), new Path(remote));//文件不存在，上传
fs.close();

//下载文件

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(local);
fs.rename(new Path(local), new Path("/home/lln/文档/test1"));//文件已存在，重命名为"/home/lln/文档/test1"
fs.copyToLocalFile(new Path(remote), new Path(local));//下载文件至本地
fs.close();

//显示 HDFS 中指定的文件的读写权限、大小、创建时间、路径等信息

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote); 
FileStatus fileStatus = fs.getFileStatus(path); 
System.out.println("路径:"+fileStatus.getPath()); 
System.out.println("大小:"+fileStatus.getBlockSize());
fs.close();

//递归列出所有文件夹

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote); 
FileStatus[] fileList = fs.listStatus(path); 
for (FileStatus f : fileList) { 
System.out.printf("name: %s | folder: %s | size: %d\n", f.getPath(), f.isDir() , f.getLen()); 
try{ 
FileStatus[] fileListR = fs.listStatus(f.getPath()); 
for(FileStatus fr:fileListR){ 
System.out.printf("name: %s | folder: %s | size: %d\n", fr.getPath(), fr.isDir() , fr.getLen()); 
} 
}finally{ 
continue; 
} 
}
fs.close();

//创建和删除文件

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
fs.create(path);//创建
fs.delete(path);//删除
fs.close();

//创建和删除目录

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
fs.mkdirs(path);//创建
fs.listStatus(path);
fs.delete(path);//删除
fs.close();

//向 HDFS 中指定的文件追加内容

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration()); 
Path path = new Path(remote); 
FileOutputStream out=new FileOutputStream(local);
fs.append(path);
IOUtils.closeStream(out); 
fs.close();

//删除 HDFS 中指定的文件

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration()); 
Path path = new Path(remote);
fs.delete(path);
fs.close();

//将文件从源路径移动到目的路径

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration()); 
Path path = new Path(remote); 
Path path1 = new Path(local); 
fs.moveFromLocalFile(path1,path); 
fs.close();

//显示指定文件内容1

FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
Path path = new Path(remote);
FSDataInputStream fsdis = null; 
System.out.println("cat: " + folder); 
try { 
fsdis =fs.open(path); 
IOUtils.copyBytes(fsdis, System.out, 4096, false); 
} finally { 
IOUtils.closeStream(fsdis); 
fs.close(); 
}

//显示指定文件内容2

FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration()); 
Path path = new Path(folder); 
FSDataInputStream fp = fs.open(path) ; 
InputStreamReader isr = new InputStreamReader(fp) ; 
BufferedReader br = new BufferedReader(isr) ; 
String line = br.readLine() ; 
while(line !=null){ 
System.out.println(line); 
line = br.readLine() ; 
}

//显示指定文件内容3

FileSystem fs = FileSystem. get(URI.create (uri), conf); 
InputStream in = null; 
try { 
in = fs.open( new Path(uri)); 
IOUtils.copyBytes(in, System.out, 4096, false); 
} finally { 
IOUtils.closeStream(in); 
}

二、Hbase
运行：
./bin/hbase shell

Shell命令
create ‘test’, ‘cf’
list ‘test’
describe ‘test’
put ‘test’, ‘row1’, ‘cf:a’, ‘value1’
scan ‘test’
get ‘test’, ‘row1’
disable ‘test’
drop ‘test’

Quit

停止：
./bin/stop-hbase.sh

//列出 HBase 所有的表的相关信息,例如表名

public static void listTables() throws IOException {
        init();
        HTableDescriptor hTableDescriptors[] = admin.listTables();
        for(HTableDescriptor hTableDescriptor: hTableDescriptors){
            System.out.println("表名："+hTableDescriptor.getNameAsString());
        }
        close();
    }

//在终端打印出指定的表的所有记录数据;

public static void scanData(String tableName) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Scan scan=new Scan();
        ResultScanner scanner=table.getScanner(scan);
        for(Result result:scanner) {
        	showCell(result);
        }
        close();
    }

//向已经创建好的表添加和删除指定的列族或列;

public static void insertRow(String tableName,String rowkey,String colFamily,String col,String val) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Put put = new Put(rowkey.getBytes());
        put.addColumn(colFamily.getBytes(), col.getBytes(), val.getBytes());
        table.put(put);
        table.close();
        close();
    }

public static void deleteRow(String tableName,String rowkey,String colFamily,String col) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Delete delete = new Delete(rowkey.getBytes());
        //删除指定列族的所有数据
        //delete.addFamily(colFamily.getBytes());
        //删除指定列的数据
        delete.addColumn(colFamily.getBytes(), col.getBytes());

        table.delete(delete);
        table.close();
        close();
    }

//清空指定的表的所有记录数据;

public static void clearRows(String tableName) throws IOException {
        init();
        TableName tn = TableName.valueOf(tableName);
        admin.disableTable(tn);
        admin.deleteTable(tn);
        HTableDescriptor hTableDescriptor =new HTableDescriptor(tableName);
        admin.createTable(hTableDescriptor);
        close();
    }

//统计表的行数。

public static void countRows(String tableName) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Scan scan=new Scan();
        ResultScanner scanner=table.getScanner(scan);
        int num=0;
        for(Result result=scanner.next();result!=null;result=scanner.next()) {
        	num++;
        }
        System.out.println("行数："+num);
        scanner.close();
        close();
    }

//创建表

public static void createTable(String myTableName,String[] colFamily) throws IOException {
        init();
        TableName tableName = TableName.valueOf(myTableName);
        if(admin.tableExists(tableName)){
            System.out.println("table is exists!");
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        }
            HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
            for(String str:colFamily){
                HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(str);
                hTableDescriptor.addFamily(hColumnDescriptor);
            }
            admin.createTable(hTableDescriptor);
            System.out.println("create table success");
        close();
    }

//添加记录

public static void addRecord(String tableName,String row,String[] fields,String[] values) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        for(int i=0;i!=fields.length;i++) {
        	Put put = new Put(row.getBytes());
        	String[] cols=fields[i].split(":");
            put.addColumn(cols[0].getBytes(), cols[1].getBytes(), values[1].getBytes());
            table.put(put);
        }
        
        table.close();
        close();
}

//浏览表 tableName 某一列的数据

public static void scanColumn(String tableName,String column) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Scan scan=new Scan();
        scan.addFamily(Bytes.toBytes(column));
        
        ResultScanner scanner=table.getScanner(scan);
        for(Result result=scanner.next();result!=null;result=scanner.next()) {
        	 showCell(result);
        }      
        table.close();
        close();
    }

//修改表

public static void modifyData(String tableName,String row,String column,String val) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Put put=new Put(row.getBytes());
        put.addColumn(column.getBytes(), null, val.getBytes());
        table.put(put);
        table.close();
        close();
}

//删除表

public static void deleteRow(String tableName,String rowkey,String colFamily,String col) throws IOException {
        init();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Delete delete = new Delete(rowkey.getBytes());
        //删除制定列族的所有数据
        //delete.addFamily(colFamily.getBytes());
        //删除指定列的数据
        //delete.addColumn(colFamily.getBytes(), col.getBytes());

        table.delete(delete);
        table.close();
        close();
    }

三、文件操作
//文件合并和去重

public class Merge {
public static class Map extends Mapper<Object, Text, Text, Text> {
private static Text text = new Text();

public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
text = value;
context.write(text, new Text(""));
}
}

public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
context.write(key, new Text(""));
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000/user/lln");
String[] otherArgs = new String[] { "input", "output" };
if (otherArgs.length != 2) {
System.err.println("Usage: Merge and duplicate removal <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "Merge and duplicate removal");
job.setJarByClass(Merge.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

MergeSort.java
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MergeSort {
    public static class Map extends
            Mapper<Object, Text, IntWritable, IntWritable> {
        private static IntWritable data = new IntWritable();

        public void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            data.set(Integer.parseInt(line));
            context.write(data, new IntWritable(1));
        }
    }

 public static class Reduce extends
            Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
        private static IntWritable linenum = new IntWritable(1);

        public void reduce(IntWritable key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {
            for (IntWritable val : values) {
                context.write(linenum, key);
                linenum = new IntWritable(linenum.get() + 1);
            }

        }

    }

 public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://localhost:9000");
        String[] otherArgs = new String[] { "input2", "output2" }; /* 直接设置输入参数 */
        if (otherArgs.length != 2) {
            System.err.println("Usage: mergesort <in> <out>");
            System.exit(2);
        }
        Job job = Job.getInstance(conf, "mergesort");
        job.setJarByClass(MergeSort.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

STjoin.java
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class STjoin {
    public static int time = 0;

    public static class Map extends Mapper<Object, Text, Text, Text> {
        public void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            String child_name = new String();
            String parent_name = new String();
            String relation_type = new String();
            String line = value.toString();
            int i = 0;
            while (line.charAt(i) != ' ') {
                i++;
            }
            String[] values = { line.substring(0, i), line.substring(i + 1) };
            if (values[0].compareTo("child") != 0) {
                child_name = values[0];
                parent_name = values[1];
                relation_type = "1";
                context.write(new Text(values[1]), new Text(relation_type + "+"
                        + child_name + "+" + parent_name));
                relation_type = "2";
                context.write(new Text(values[0]), new Text(relation_type + "+"
                        + child_name + "+" + parent_name));
            }
        }
    }

    public static class Reduce extends Reducer<Text, Text, Text, Text> {
        public void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            if (time == 0) {
                context.write(new Text("grand_child"), new Text("grand_parent"));
                time++;
            }
            int grand_child_num = 0;
            String grand_child[] = new String[10];
            int grand_parent_num = 0;
            String grand_parent[] = new String[10];
            Iterator ite = values.iterator();
            while (ite.hasNext()) {
                String record = ite.next().toString();
                int len = record.length();
                int i = 2;
                if (len == 0)
                    continue;
                char relation_type = record.charAt(0);
                String child_name = new String();
                String parent_name = new String();
                while (record.charAt(i) != '+') {
                    child_name = child_name + record.charAt(i);
                    i++;
                }
                i = i + 1;
                while (i < len) {
                    parent_name = parent_name + record.charAt(i);
                    i++;
                }
                if (relation_type == '1') {
                    grand_child[grand_child_num] = child_name;
                    grand_child_num++;
                } else {
                    grand_parent[grand_parent_num] = parent_name;
                    grand_parent_num++;
                }
            }

            if (grand_parent_num != 0 && grand_child_num != 0) {
                for (int m = 0; m < grand_child_num; m++) {
                    for (int n = 0; n < grand_parent_num; n++) {
                        context.write(new Text(grand_child[m]), new Text(
                                grand_parent[n]));
                    }
                }
            }
        }
    }

 public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://localhost:9000");
        String[] otherArgs = new String[] { "input3", "output3" };
        if (otherArgs.length != 2) {
            System.err.println("Usage: Single Table Join <in> <out>");
            System.exit(2);
        }
        Job job = Job.getInstance(conf, "Single table join ");
        job.setJarByClass(STjoin.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

猜你喜欢