hbase批量入库的总结

最近这一段时间一直在研究hbase的批量入库，看似简单的问题其实埋着无数的坑......

接下来就把我遇到的一些问题和解决的办法分享给大家，希望能让那些新接触到的人不至于像我一样走这么多弯路。

hbase一般的插入过程都使用HTable对象，将数据封装在Put对象中，Put在new创建的时候需要传入rowkey，并将列族，列名，列值add进去。然后HTable调用put方法，通过rpc请求提交到Regionserver端。

写入的方式可以分为以下几种:


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       单条
       
       put
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       批量
       
       put
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       使用Mapreduce
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       bluckload

进行批量入库之前，首先要连接到正确的连接到hbase


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       static{
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		conf=
       
       HBaseConfiguration.create();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       //
       
       可以连接
       
       hbase
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       //zookeeper
       
       给客户端的端口
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		conf.
       
       set(
       
       "hbase.zookeeper.property.clientPort", 
       
       "2181");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		conf.
       
       set(
       
       "hbase.zookeeper.quorum", 
       
       "192.168.137.138,192.168.137.139");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		conf.
       
       set(
       
       "hbase.master", 
       
       "192.168.10.138:60000");		
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}

然后开始建立我们的表结构:


   
   
    
    
     
     
      
      
     
     
     
     
      
      	
       
       public static void createTable(String tableName){
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       try {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			ha = 
       
       new HBaseAdmin(conf);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       if(ha.tableExists(tableName)){
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				ha.disableTable(tableName);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				ha.deleteTable(tableName);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       //
       
       建立表结构
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			HTableDescriptor hd =
       
       new HTableDescriptor(tableName);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       //
       
       添加列族
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			hd.addFamily(
       
       new HColumnDescriptor(
       
       "family1".getBytes()));
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			hd.addFamily(
       
       new HColumnDescriptor(
       
       "family2".getBytes()));
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                   ha.createTable(hd);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		} 
       
       catch (Exception e) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			System.
       
       out.println(e);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}

有了上面的基础后，可以正式开始进行数据的插入

单条put


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       // 
       
       插入内容，行键，列族，列名，值，插入的表名
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	public 
       
       static 
       
       void insertData(
       
       String rowkey, 
       
       String cf, 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       String clomun, 
       
       String content, 
       
       String tableName)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			throws IOException {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		htable = 
       
       new HTable(conf, tableName);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Put put = 
       
       new Put(rowkey.getBytes());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		put.add(cf.getBytes(), clomun.getBytes(), content.getBytes());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		htable.put(put);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}

这种方式是批量插入数据最慢的方式，它更合适的应用场景是一般是线上业务运行时，记录单条插入，如报文记录，处理记录，写入后htable对象即释放。每次提交就是一次rpc请求.

多条Put

也就是将每一个put对象,放入List集合里面,然后对这个List集合进行入库,相比于单条Put,这种方式在入库效率上明显会有所提升. 应用场景一般在数据量稍多的环境下，通过批量提交减少请求次数


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       public static void insertData(String rowkey, String cf, 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       String clomun, 
       
       String content, 
       
       String tableName)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       throws IOException {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		htable = 
       
       new HTable(conf, tableName);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		List<Put> list =
       
       new ArrayList<Put>(); 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Put put = 
       
       new Put(rowkey.getBytes());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		put.add(cf.getBytes(), clomun.getBytes(), content.getBytes());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                      list.add(put);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		htable.put(list);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}

在主方法里面调用该方法并且输入相关参数就可以实现用put方式对数据的批量插入了


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       public static void main(String[] args) throws IOException {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               createTable(
       
       "insertTest");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
              
       
       try {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
                  
       
       for (
       
       int i = 
       
       0; i < 
       
       10; i++) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                       String rowkey = UUID.randomUUID().toString();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
                      
       
       // 因为不能动态增加列簇,所以只能动态添加列
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
                      
       
       for (
       
       int j = 
       
       0; j <= 
       
       10; j++) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                           insertData(rowkey, 
       
       "family1", 
       
       "column",
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
                                  
       
       new SimpleDateFormat(
       
       "yyyy-MM-dd hh:mm:ss").
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                                   format(
       
       new Date()), 
       
       "insertTest");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                           insertData(rowkey, 
       
       "family2", 
       
       "column",
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
                                  
       
       new SimpleDateFormat(
       
       "yyyy-MM-dd hh:mm:ss").
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                                   format(
       
       new Date()), 
       
       "insertTest");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                       }
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                   }
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               } 
       
       catch (Exception e) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                   e.printStackTrace();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               }
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           }

以上的put插入数据,因为不适合处理大批量的数据,所以都是在自己搭建的集群上进行的测试,接下来介绍的两种方式用的是公司的集群.

使用mapReduce批量插入

因为hbase是寄托在hadoop集群上的分布式非关系型数据库,而Hadoop又是处理大规模数据的典范,所以使用MapReduce来实现hbase的批量入库自然是个不错的选择,废话不多说,直接贴代码


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.conf.Configuration;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.client.Put;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.mapreduce.TableReducer;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.util.Bytes;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.io.LongWritable;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.io.NullWritable;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.io.Text;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.Job;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.Mapper;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import java.io.IOException;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import java.text.SimpleDateFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import java.util.Date;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       public 
       
       class Hbase_MapReduceTest {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       static 
       
       class BatchMapper extends Mapper<LongWritable, Text, Text, Text> {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       SimpleDateFormat simpleDateFormat = 
       
       new 
       
       SimpleDateFormat(
       
       "yyyyMMddHHmmssSS");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Text text = 
       
       new 
       
       Text();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       protected void map(LongWritable key, Text value, Context context) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       try {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      				
       
       final 
       
       String[] spliteds = value.
       
       toString().
       
       split(
       
       "::");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      				
       
       Date date = 
       
       new 
       
       Date();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      				
       
       String dateFormat = simpleDateFormat.format(date);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      				
       
       final 
       
       String rowKey = spliteds[
       
       0] + 
       
       "_" + dateFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				text.
       
       set(rowKey);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				context.write(text, value);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			} 
       
       catch (
       
       IOException e) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				e.printStackTrace();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			} 
       
       catch (
       
       InterruptedException e) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				e.printStackTrace();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       static 
       
       class BatchReducer extends TableReducer<Text, Text, NullWritable> {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       protected void reduce(Text key, Iterable<Text> values, Context context) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       for (
       
       Text tx : values) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      				
       
       try {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      					
       
       final 
       
       String[] arrays = tx.
       
       toString().
       
       split(
       
       "::");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      					
       
       Put put = 
       
       new 
       
       Put(key.getBytes());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       					put.addColumn(
       
       "info".getBytes(), 
       
       "name".getBytes(),
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       					arrays[
       
       1].getBytes());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      					
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       					context.write(
       
       NullWritable.
       
       get(), put);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				} 
       
       catch (
       
       IOException e) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       					e.printStackTrace();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				} 
       
       catch (
       
       InterruptedException e) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       					e.printStackTrace();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       public static void main(String[] args) throws Exception {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       final 
       
       Configuration configuration = 
       
       new 
       
       Configuration();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		configuration.
       
       set(
       
       "hbase.zookeeper.quorum", 
       
       "master");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		configuration.
       
       set(
       
       "hbase.zookeeper.property.clientPort", 
       
       "4180");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 设置hbase表名称
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		configuration.
       
       set(
       
       TableOutputFormat.
       
       OUTPUT_TABLE, 
       
       "HBASE_INSERT");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		configuration.
       
       set(
       
       "dfs.socket.timeout", 
       
       "180000");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       final 
       
       Job job = 
       
       new 
       
       Job(configuration, 
       
       "HBaseBatchImport");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 设置reduce的个数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		job.setNumReduceTasks(
       
       3);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                      job.setMapperClass(
       
       BatchMapper.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		job.setReducerClass(
       
       BatchReducer.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 设置map的输出，不设置reduce的输出类型
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		job.setMapOutputKeyClass(
       
       Text.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		job.setMapOutputValueClass(
       
       Text.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		job.setInputFormatClass(
       
       TextInputFormat.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 不再设置输出路径，而是设置输出格式类型
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		job.setOutputFormatClass(
       
       TableOutputFormat.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 设置数据的输入路径
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       FileInputFormat.setInputPaths(job, args[
       
       0]);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // hdfs://master:9000/input
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       System.exit(job.waitForCompletion(
       
       true) ? 
       
       0 : 
       
       1);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       }

这里和普通的MapReduce最大的区别就在于reduce过程的输出:不是常规的将reduce的结果输出到hdfs上面,而是直接输出到表里面,而且不用设置reduce的输出类型这里是最关键的两行代码:


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       //设置输出的hbase的表名 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       configuration.
       
       set(
       
       TableOutputFormat.
       
       OUTPUT_TABLE, 
       
       "HBASE_INSERT");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       //设置数据的输出格式类型
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       job.setOutputFormatClass(
       
       TableOutputFormat.
       
       class);

这种方式最终会调用Tableoutputformat类，核心的原理还是使用htable的put方法，不过由于使用了mapreduce分布式提交到hbase，速度比单线程效率高出许多.

但是这种方式也不是万能的，put提交的速度太快时会给hbase造成比较大的压力，容易发生gc,造成节点挂掉，尤其是初始化表到hbase时，一般都会有很多的历史数据需要入库，容易造成比较大的压力，这种情况下建议使用下面的方式bulkload方式入库，减少给hbase压力。上面这种方式是直接在map中生成put然后交给TableOutputformat去提交的，因为这里几乎不需要逻辑处理，如果需要做逻辑处理，那么一般会在reduce端去生成put对象，在map端做业务逻辑处理，比如数据关联，汇总之类的.

采用bulkLoad方法批量入库

这是应用最广泛的,也是经过官方认证的最快捷使用的hbase 批量入库的方式, hbase官方文档对这一块的介绍如下(经过google翻译之后的文档...):

散货装载 bulkload


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       1. 概观 HBase包含几种将数据加载到表中的方法。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       最直接的方法是使用TableOutputFormatMapReduce作业中的类，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       或者使用普通的客户端API; 然而，这些并不总是最有效的方法。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       批量加载功能使用MapReduce作业以HBase内部数据格式输出表格数据，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       然后直接将生成的StoreFiles加载到正在运行的集群中。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       使用批量加载将比使用HBase API使用更少的CPU和网络资源。  
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       2.  大容量装载限制 当批量加载绕过写入路径时，WAL不会被写入作为过程的一部分。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       复制通过读取WAL文件来工作，所以它不会看到批量加载的数据 - 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       对于使用的编辑也是如此Put.setDurability(SKIP_WAL)。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       处理该问题的一种方法是将原始文件或HFile发送到其他群集，并在那里进行其他处理。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
       
       3. 批量加载架构 HBase批量加载过程包含两个主要步骤。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       1. 通过MapReduce作业准备数据 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           批量加载的第一步是使用MapReduce作业生成HBase数据文件（StoreFiles）HFileOutputFormat2。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           这种输出格式将数据写入HBase的内部存储格式，以便以后可以非常高效地将其加载到群集中。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           为了高效工作，HFileOutputFormat2必须对每个输出HFile进行配置，使其适合单个区域。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           为了做到这一点，输出将被批量加载到HBase中的作业使用Hadoop的
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           TotalOrderPartitioner类将映射输出分区到键空间的不相交范围，对应于表中区域的键范围。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           HFileOutputFormat2包括一个便利功能，configureIncrementalLoad()它
       
       '
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           TotalOrderPartitioner根据当前的表格区域边界自动设置一个。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
            
       
       2. 完成数据加载 在准备好数据导入之后，通过使用importtsv具有“importtsv.bulk.output”
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             选项的工具或使用其他某个MapReduce作业HFileOutputFormat，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             该completebulkload工具可用于将数据导入到正在运行的集群中。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             这个命令行工具遍历准备好的数据文件，每个文件确定文件所属的区域。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             然后，它会联系采用HFile的相应RegionServer，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             将其移动到其存储目录中，并将数据提供给客户端。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             如果在批量装载准备过程中，或者在准备和完成步骤之间区域边界发生了变化，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             completebulkload公用程序将自动将数据文件分割成对应于新边界的片段。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             这个过程并不是最佳的，所以用户应该小心地减少准备批量加载和导入到群集之间的延迟，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             尤其是当其他客户端同时通过其他方式加载数据时。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             $ hadoop jar hbase-
       
       server-VERSION.jar completebulkload 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             [-c /path/
       
       to/hbase/config/hbase-site.xml] /user/todd/myoutput mytable 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             该-c config-file选项可用于指定包含相应hbase参数的文件（例如，hbase-site.xml）
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             （如果CLASSPATH中尚未提供此参数）
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             （另外，如果zookeeper不是，则CLASSPATH必须包含具有zookeeper配置文件的目录由HBase管理）。     
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
             如果目标表在HBase中不存在，该工具将自动创建表。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       4. 也可以看看 有关引用的实用程序的更多信息，请参阅ImportTsv和 CompleteBulkLoad。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       请参阅操作方法：使用HBase批量加载，以及为什么选择最近一次有关批量加载状态的博客。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       5. 高级用法 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       尽管该importtsv工具在很多情况下都很有用，但是高级用户可能希望以编程方式生成数据
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       ，或者从其他格式导入数据。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       要开始这样做，挖掘ImportTsv.java并检查JavaDoc 
       
       for HFileOutputFormat。 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       批量加载的导入步骤也可以通过编程来完成。

我做测试的时候采用的是将wordcount的结果进行批量入库,测试的数据量是2个G的文本文件,wordcount的代码就不贴出来了,直接贴批量入库的代码


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       import java.io.IOException;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.conf.Configuration;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.fs.FileSystem;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.fs.Path;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.HBaseConfiguration;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.HColumnDescriptor;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.HTableDescriptor;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.TableName;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.client.Admin;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.client.ConnectionFactory;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.client.HTable;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.client.Put;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.hbase.util.Bytes;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.io.LongWritable;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.io.Text;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.Job;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.Mapper;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import org.apache.hadoop.util.GenericOptionsParser;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       public 
       
       class WorldCount_Hbase {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       public 
       
       static 
       
       class ConvertWordCountOutToHFileMapper
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       extends 
       
       Mapper<
       
       LongWritable, 
       
       Text, 
       
       ImmutableBytesWritable, 
       
       Put> {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		@
       
       Override
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		protected void 
       
       map(
       
       LongWritable key, 
       
       Text value, 
       
       Context context) 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       throws 
       
       IOException, 
       
       InterruptedException {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       // 
       
       上一个
       
       WordCount
       
       的输出格式是
       
       :a 100 b 20
       
       这样的形式
       
       ,
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       // 
       
       按行读取后
       
       ,
       
       行号作为
       
       key,
       
       每一行的内容作为
       
       value
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       String wordCountStr = value.
       
       toString();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       String[] wordCountArray = wordCountStr.
       
       split(
       
       "\t");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       String word = wordCountArray[
       
       0];
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			int 
       
       count = 
       
       Integer.valueOf(wordCountArray[
       
       1]);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       // 
       
       创建
       
       HBase
       
       中的
       
       RowKey
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			byte[] rowKey = 
       
       Bytes.toBytes(word);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       ImmutableBytesWritable rowKeyWritable = new 
       
       ImmutableBytesWritable(rowKey);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			byte[] family = 
       
       Bytes.toBytes(
       
       "cf");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			byte[] qualifier = 
       
       Bytes.toBytes(
       
       "count");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			byte[] hbaseValue = 
       
       Bytes.toBytes(
       
       count);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       // Put 
       
       用于列簇下的多列提交，若只有一个列，则可以使用
       
        KeyValue 
       
       格式
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       // KeyValue keyValue = new KeyValue(rowKey, family, qualifier,
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       // hbaseValue);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       Put put = new 
       
       Put(rowKey);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			put.add(family, qualifier, hbaseValue);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			context.write(rowKeyWritable, put);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       public 
       
       static void main(
       
       String[] args) 
       
       throws 
       
       Exception {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Long starttime = 
       
       System.currentTimeMillis();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Configuration hadoopConfiguration = new 
       
       Configuration();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		hadoopConfiguration.
       
       set(
       
       "fs.defaultFS", 
       
       "hdfs://192.168.1.31:9000");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // hadoopConfiguration.set("mapreduce.map.memory.mb", "512");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // hadoopConfiguration.set("mapreduce.reduce.memory.mb", "512");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       String[] dfsArgs = new 
       
       GenericOptionsParser(hadoopConfiguration, args).
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		getRemainingArgs();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       FileSystem fs = 
       
       FileSystem.
       
       get(hadoopConfiguration);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Path input = new 
       
       Path(
       
       "/tmp/xmr/hbase/test");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Path output = new 
       
       Path(
       
       "/tmp/xmr/resultdata/test");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // Path input = new Path(dfsArgs[0]);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // Path output = new Path(dfsArgs[1]);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Job convertWordCountJobOutputToHFileJob = new 
       
       Job(hadoopConfiguration, 
       
       "wordCount_bulkload");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		convertWordCountJobOutputToHFileJob.setJarByClass(
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       WorldCount_Hbase.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		convertWordCountJobOutputToHFileJob.setMapperClass(
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       ConvertWordCountOutToHFileMapper.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		convertWordCountJobOutputToHFileJob.setMapOutputKeyClass(
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       ImmutableBytesWritable.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		convertWordCountJobOutputToHFileJob.setMapOutputValueClass(
       
       Put.
       
       class);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       if (fs.exists(output)) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			fs.delete(output);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       FileInputFormat.addInputPath(convertWordCountJobOutputToHFileJob, input);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       FileOutputFormat.setOutputPath(convertWordCountJobOutputToHFileJob, output);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 
       
       创建
       
       HBase
       
       的配置对象
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Configuration hbaseConfiguration = 
       
       HBaseConfiguration.create();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		hbaseConfiguration.
       
       set(
       
       "hbase.zookeeper.quorum", 
       
       "master,node001,node002");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		hbaseConfiguration.
       
       set(
       
       "hbase.zookeeper.property.clientPort", 
       
       "4180");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       System.out.
       
       println(hbaseConfiguration.
       
       toString());
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 
       
       创建目标表对象
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Admin admin = 
       
       ConnectionFactory.createConnection(hbaseConfiguration).getAdmin();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       if (!admin.isTableAvailable(
       
       TableName.valueOf
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		(
       
       "wordcount"))) {
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      			
       
       HTableDescriptor hbaseTable = new 
       
       HTableDescriptor(
       
       TableName.valueOf(
       
       "word1count"));
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			hbaseTable.addFamily(new 
       
       HColumnDescriptor(
       
       "cf"));
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       			admin.createTable(hbaseTable);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       HTable wordCountTable = new 
       
       HTable(hbaseConfiguration, 
       
       "word1count");
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       HFileOutputFormat.configureIncrementalLoad(
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		convertWordCountJobOutputToHFileJob, wordCountTable);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		int convertWordCountJobOutputToHFileJobResult =
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		convertWordCountJobOutputToHFileJob.waitForCompletion(
       
       true) ? 
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       				: 
       
       1;
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 
       
       调用
       
       BulkLoad
       
       方式来将
       
       MR
       
       结果批量入库
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       LoadIncrementalHFiles loader = new 
       
       LoadIncrementalHFiles(hbaseConfiguration);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       // 
       
       第一个参数为第二个
       
       Job
       
       的输出目录即保存
       
       HFile
       
       的目录，第二个参数为目标表
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		loader.doBulkLoad(output, wordCountTable);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Long endtime = 
       
       System.currentTimeMillis();
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       System.out.
       
       println(
       
       "
       
       程序的执行时间为
       
       :" + (endtime - starttime));
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       System.exit(convertWordCountJobOutputToHFileJobResult);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	}
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       }

这里面需要注意的地方:

map的输出格式必须为ImmutableBytesWritable, Put或者ImmutableBytesWritable, KeyValue 如果有多个列就选用Put,如果只有一个列可以选用KeyValue
不用自己写reduce过程,自然也不用写reduce的输入输出路径
使用BulkLoad方法将生成的Hfile进行批量入库 // 调用BulkLoad方式来将MR结果批量入库


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       LoadIncrementalHFiles loader = 
       
       new LoadIncrementalHFiles(hbaseConfiguration);
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       // 第一个参数为第二个Job的输出目录即保存HFile的目录，第二个参数为目标表
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       loader.doBulkLoad(output, wordCountTable);

以上是对常用的几种批量入库方式进行的基本介绍,对于每一种方法,也用了几个G的数据进行过测试无误..

网上也能搜到一大堆雷同相似的代码,但是Hbase是用来存储海量数据的数据库,到了实际的应用中,面临很大数据量一大,也不会出问题么?

答案当然是NO,Hbase批量入库的坑才刚刚开始!!!我就被这些坑折腾的怀疑人生!!

那些奇奇怪怪的小问题在这里就不多赘述了,把使用BulkLoad进行批量入库遇到的最大的几个坑分享给大家!!!

首先就是reduce相关的问题:

在实际的应用中你就会发现,对于稍大一点的数据量,map过程的执行效率还是比较让人满意的,但是到了reduce阶段就会出现比较严重的卡顿,我的困惑就是?我的代码里明明没有reduce过程,为什么还会有reduce过程来影响我入库的效率呢?

于是,我尝试着在job里,设置reduce的数量,把它设置为0,可是重新执行的时候就会发现还是会有那个烦人的reduce过程, 既然设置它为0没有效果,那我把它的数量设置的多一点,提高它的并行度总能加快效率了吧于是我又修改了reduce的数量,执行的时候发现还是只有一个..... 后来我才知道, 在这种情况下,我们不用自己写reduce过程,但是会使用Hbase给我们提供的reduce,也就是说,无论你怎么设置reduce数量,都是无效的. 这样我也就释然了

效率严重低下的问题!!!

首先我用100M的数据量做测试,居然需要30s才能入库完毕!用几个G的数据量测试,效率也没有明显的提升! 也就是说平均每秒的插入速度还不到15000条.,这甚至比mysql的入库还要慢很多,这种效率在实际生产中是完全不能接受的说好的这是入库最快的方式呢?我不仅产生了怀疑.. 说到底,这种问题还是因为reduce数量只有一个这个蛋疼的问题所导致的,也就是说,不管你的集群有多牛,都值相当于单机版,这显然是不合适的...那么该如何解决这个问题呢????

就是在建表的时候进行合理的预分区!!!预分区的数目会决定你的reduce过程的数目!简单来说,在一定的范围内,进行合适预分区的话,reduce的数量增加多少,效率就提高多少倍!!!

有关于hbase的预分区,进行合适的预分区,实际上是一个很复杂的问题,也不是本篇文章讨论的重点. 感兴趣的话可以去看看这位大神写的东西,给了我很大的启发

大神的博客链接

我只简单介绍一下hbase建表时预分区的shell语句和执行的结果:

create 'XUE_BULKLOAD','info',{SPLITS => [ '1','2','3', '4','5','6','7','8','9']}

这样就成功的将表名为 'XUE_BULKLOAD',列簇名为'info'的表在建表时预分了10个分区

预分区结束之后进行测试:发现reduce的数量为预分区的数量+1,而且执行效率大大提高! 插入效率大致在10W/s~20W/s之间,已经勉强能达到实际工作的要求!

数据量超过某个范围就会导致插入数据库失败的问题!

经过各种各样的调试,效率已经可以接受! 然后开始调大数据量测试,发现哪怕几十个G的数据量,在执行完MapReduce过程之后都会报错,去表里面查看数据,一条记录都没有!!报错信息如下


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       Trying to 
       
       load more 
       
       than 
       
       32 hfiles 
       
       to one family 
       
       of one region
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       18 
       
       23:
       
       20:
       
       36 
       
       ERROR mapreduce
       
       .LoadIncrementalHFiles: Trying 
       
       to 
       
       load
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       more 
       
       than 
       
       32 hfiles 
       
       to family info 
       
       of region 
       
       with 
       
       start 
       
       key 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       Exception 
       
       in 
       
       thread 
       
       "main" java
       
       .io
       
       .IOException: Trying 
       
       to 
       
       load more 
       
       than 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       32 hfiles 
       
       to one family 
       
       of one region
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at org
       
       .apache
       
       .hadoop
       
       .hbase
       
       .mapreduce
       
       .LoadIncrementalHFiles
       
       .doBulkLoad
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	(LoadIncrementalHFiles
       
       .java:
       
       377)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at hbase_Insert
       
       .Hbase_Insert
       
       .main(Hbase_Insert
       
       .java:
       
       241)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at sun
       
       .reflect
       
       .NativeMethodAccessorImpl
       
       .invoke0(
       
       Native Method)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at sun
       
       .reflect
       
       .NativeMethodAccessorImpl
       
       .invoke(
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	NativeMethodAccessorImpl
       
       .java:
       
       57)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at sun
       
       .reflect
       
       .DelegatingMethodAccessorImpl
       
       .invoke(
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	DelegatingMethodAccessorImpl
       
       .java:
       
       43)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at java
       
       .lang
       
       .reflect
       
       .Method
       
       .invoke(Method
       
       .java:
       
       606)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at org
       
       .apache
       
       .hadoop
       
       .util
       
       .RunJar
       
       .run(RunJar
       
       .java:
       
       221)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      	
       
       at org
       
       .apache
       
       .hadoop
       
       .util
       
       .RunJar
       
       .main(RunJar
       
       .java:
       
       136)

报错的大致意思就是试图将超过32个Hfile文件导入到hbase里面的一个region导致失败那这个问题该如何解决呢?实际上就是两个重要的参数限制的一个是:

hbase.hregion.max.filesize

单个ColumnFamily的region大小，若按照ConstantSizeRegionSplitPolicy策略，超过设置的该值则自动split 默认的大小是1G hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily

允许的hfile的最大个数,默认配置是32 也就是说:这两个参数的默认值决定了,每次批量入库的数据量不能超过1*32也就是32个G,超过这个数量就会导致入库失败

可以在代码里,或者在hbase安装路径下conf目录下的hbase-site.xml里面针对这两个参数进行设置为了一劳永逸,我选择在hbase-site.xml里面进行设置,设置结果如下:


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       <property>
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       <name>hbase.hregion.max.filesize
       
       </name> 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       <value>10737418240
       
       </value> 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       </property> 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       <property> 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       <name>hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily
       
       </name> 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       <value>3200
       
       </value>
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       </property>

这样,每次能够批量入库的数据就达到了32个T,符合公司的数据量需要! 配置完毕后重启集群进行测试,不在报这个错误,执行结果如下:


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       31 INFO mapreduce.Job:  map 
       
       1% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       35 INFO mapreduce.Job:  map 
       
       2% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       42 INFO mapreduce.Job:  map 
       
       3% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       45 INFO mapreduce.Job:  map 
       
       4% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       51 INFO mapreduce.Job:  map 
       
       5% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       55 INFO mapreduce.Job:  map 
       
       6% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       17:
       
       59 INFO mapreduce.Job:  map 
       
       7% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       03 INFO mapreduce.Job:  map 
       
       8% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       06 INFO mapreduce.Job:  map 
       
       9% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       11 INFO mapreduce.Job:  map 
       
       10% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       16 INFO mapreduce.Job:  map 
       
       11% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       20 INFO mapreduce.Job:  map 
       
       12% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       27 INFO mapreduce.Job:  map 
       
       13% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       32 INFO mapreduce.Job:  map 
       
       14% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       37 INFO mapreduce.Job:  map 
       
       15% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       42 INFO mapreduce.Job:  map 
       
       16% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       47 INFO mapreduce.Job:  map 
       
       17% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       53 INFO mapreduce.Job:  map 
       
       18% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       18:
       
       58 INFO mapreduce.Job:  map 
       
       19% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       03 INFO mapreduce.Job:  map 
       
       20% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       08 INFO mapreduce.Job:  map 
       
       21% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       14 INFO mapreduce.Job:  map 
       
       22% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       18 INFO mapreduce.Job:  map 
       
       23% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       23 INFO mapreduce.Job:  map 
       
       24% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       29 INFO mapreduce.Job:  map 
       
       25% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       33 INFO mapreduce.Job:  map 
       
       26% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       38 INFO mapreduce.Job:  map 
       
       27% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       43 INFO mapreduce.Job:  map 
       
       28% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       48 INFO mapreduce.Job:  map 
       
       29% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       53 INFO mapreduce.Job:  map 
       
       30% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       19:
       
       58 INFO mapreduce.Job:  map 
       
       31% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       04 INFO mapreduce.Job:  map 
       
       32% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       08 INFO mapreduce.Job:  map 
       
       33% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       13 INFO mapreduce.Job:  map 
       
       34% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       17 INFO mapreduce.Job:  map 
       
       35% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       21 INFO mapreduce.Job:  map 
       
       36% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       25 INFO mapreduce.Job:  map 
       
       37% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       29 INFO mapreduce.Job:  map 
       
       38% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       33 INFO mapreduce.Job:  map 
       
       39% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       37 INFO mapreduce.Job:  map 
       
       40% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       41 INFO mapreduce.Job:  map 
       
       41% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       45 INFO mapreduce.Job:  map 
       
       42% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       50 INFO mapreduce.Job:  map 
       
       43% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       54 INFO mapreduce.Job:  map 
       
       44% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       20:
       
       58 INFO mapreduce.Job:  map 
       
       45% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       21:
       
       02 INFO mapreduce.Job:  map 
       
       46% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       21:
       
       06 INFO mapreduce.Job:  map 
       
       47% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       21:
       
       10 INFO mapreduce.Job:  map 
       
       48% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       21:
       
       14 INFO mapreduce.Job:  map 
       
       49% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       21:
       
       18 INFO mapreduce.Job:  map 
       
       50% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       21:
       
       22 INFO mapreduce.Job:  map 
       
       51% reduce 
       
       0%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       ........
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       29:
       
       12 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       81%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       29:
       
       24 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       82%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       29:
       
       36 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       83%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       29:
       
       48 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       84%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       30:
       
       00 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       85%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       30:
       
       12 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       86%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       30:
       
       23 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       87%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       30:
       
       33 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       88%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       30:
       
       45 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       89%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       30:
       
       59 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       90%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       31:
       
       11 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       91%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       31:
       
       21 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       92%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       31:
       
       33 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       93%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       31:
       
       45 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       94%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       31:
       
       57 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       95%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       32:
       
       10 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       96%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       32:
       
       28 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       97%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       32:
       
       57 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       98%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       33:
       
       28 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       99%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       34:
       
       43 INFO mapreduce.Job:  map 
       
       100% reduce 
       
       100%
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO mapreduce.Job: Job job_1516347580021_0001 completed successfully
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO mapreduce.Job: Counters: 
       
       52
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	File System Counters
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		FILE: Number of bytes read=
       
       87576726096
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		FILE: Number of bytes written=
       
       142193600747
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		FILE: Number of read operations=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		FILE: Number of large read operations=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		FILE: Number of write operations=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		HDFS: Number of bytes read=
       
       83582905128
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		HDFS: Number of bytes written=
       
       166475667426
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		HDFS: Number of read operations=
       
       5468
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		HDFS: Number of large read operations=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		HDFS: Number of write operations=
       
       39
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	Job Counters 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Failed map tasks=
       
       6
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Launched map tasks=
       
       1086
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Launched reduce tasks=
       
       10
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Other local map tasks=
       
       6
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Data-local map tasks=
       
       465
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Rack-local map tasks=
       
       615
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Total time spent by all maps in occupied slots (ms)=
       
       82454392
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Total time spent by all reduces in occupied slots (ms)=
       
       47463944
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Total time spent by all map tasks (ms)=
       
       10306799
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Total time spent by all reduce tasks (ms)=
       
       5932993
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Total vcore-seconds taken by all map tasks=
       
       10306799
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Total vcore-seconds taken by all reduce tasks=
       
       5932993
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Total megabyte-seconds taken by all map tasks=
       
       84433297408
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Total megabyte-seconds taken by all reduce tasks=
       
       48603078656
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	Map-Reduce Framework
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Map input records=
       
       568152966
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Map output records=
       
       568152966
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Map output bytes=
       
       228099087448
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Map output materialized bytes=
       
       54476960272
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Input split bytes=
       
       186120
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Combine input records=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Combine output records=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Reduce input groups=
       
       292435364
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Reduce shuffle bytes=
       
       54476960272
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Reduce input records=
       
       568152966
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Reduce output records=
       
       2339482912
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Spilled Records=
       
       1513624168
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Shuffled Maps =
       
       10800
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Failed Shuffles=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Merged Map outputs=
       
       10800
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       GC time elapsed (ms)=
       
       794607
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       CPU time spent (ms)=
       
       21363440
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Physical memory (bytes) snapshot=
       
       3038556569600
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Virtual memory (bytes) snapshot=
       
       9401710268416
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      		
       
       Total committed heap usage (bytes)=
       
       3512994889728
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	Shuffle Errors
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		BAD_ID=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		CONNECTION=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		IO_ERROR=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		WRONG_LENGTH=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		WRONG_MAP=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		WRONG_REDUCE=
       
       0
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	File Input Format Counters 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Bytes Read=
       
       83582349648
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       	File Output Format Counters 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       		Bytes Written=
       
       166475667426
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO zookeeper.RecoverableZooKeeper: Process identifier=hconnection-
       
       0x71f30c76 connecting to ZooKeeper ensemble=node003:
       
       4180,node002:
       
       4180,node001:
       
       4180,master:
       
       4180,node009:
       
       4180,node008:
       
       4180,node007:
       
       4180,node010:
       
       4180,node006:
       
       4180,node005:
       
       4180,node004:
       
       4180
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=node003:
       
       4180,node002:
       
       4180,node001:
       
       4180,master:
       
       4180,node009:
       
       4180,node008:
       
       4180,node007:
       
       4180,node010:
       
       4180,node006:
       
       4180,node005:
       
       4180,node004:
       
       4180 sessionTimeout=
       
       90000 watcher=hconnection-
       
       0x71f30c760x0, quorum=node003:
       
       4180,node002:
       
       4180,node001:
       
       4180,master:
       
       4180,node009:
       
       4180,node008:
       
       4180,node007:
       
       4180,node010:
       
       4180,node006:
       
       4180,node005:
       
       4180,node004:
       
       4180, baseZNode=/hbase
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO zookeeper.ClientCnxn: Opening socket connection to server node004/
       
       192.168.1.38:
       
       4180. 
       
       Will not attempt to authenticate using SASL (unknown error)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/01/20 12:38:02 INFO zookeeper.ClientCnxn: Socket connection established to node004/192.168.1.38:4180, initiating session
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/01/20 12:38:02 INFO zookeeper.ClientCnxn: Session establishment complete on server node004/192.168.1.38:4180, sessionid = 
       
       0x26001af8d8190002, negotiated timeout = 
       
       40000
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Skipping non-directory hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/_SUCCESS
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying to bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/28e47c53edaf4616a3dfc349d0f0e02a with size: 10931823633 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/5e6501ccb7554b82a2e93024d61dbe0e with size: 10931820982 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/6245730468534f85a428ef7fb7acd499 with size: 10931829083 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/a3b7225320b24e838559d5a5772bdd87 with size: 10931823391 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/a8306bc4ef3941f5bd131d47f0b1c2c3 with size: 10931822321 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/c163c568b1c24d88ac0ed7599b81ecba with size: 10931824861 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/d069683ce064411793640f2a0ec6ca98 with size: 10931822990 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/e5df664e18c54da7b84370b72506923b with size: 10931821709 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 WARN mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       bulk 
       
       load hfile hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/fba38b4d0bd34f6782b844b288780e7b with size: 10931826385 bytes can be problematic as it may lead to oversplitting.
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       02 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/11e458c2c1f0465
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       4ae1783ec4e6576e8 
       
       first=
       
       459096918168596876155 
       
       last=
       
       4999999888024945828
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load h
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       file=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/78067c90799149db
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       b4a423ef556a4272 
       
       first=
       
       559078464243536377945 
       
       last=
       
       5999999888024945828
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/0049f16fd57b482
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       aa2e68ebe21a0cb72 
       
       first=
       
       15907887724999982915 
       
       last=
       
       19999999217611496331
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/28e47c53edaf461
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       6a3dfc349d0f0e02a 
       
       first=
       
       80100000359202982424 
       
       last=
       
       859088818898462383266
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/6245730468534f8
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       5a428ef7fb7acd499 
       
       first=
       
       401000000531957283573 
       
       last=
       
       459096917941294955954
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/18068da4a3f5469a804eee9f6921617a first=959083192452571451003 last=99999998239977206078
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/5e6501ccb7554b8
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       2a2e93024d61dbe0e 
       
       first=
       
       30100000359202982424 
       
       last=
       
       359081166786305137185
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load 
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/0cff66c092004d4
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       88db32c3bf549a1d1 
       
       first=
       
       0100000359202982424 
       
       last=
       
       0999998239977206078
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/c163c568b1c24d8
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       8ac0ed7599b81ecba 
       
       first=
       
       10100000359202982424 
       
       last=
       
       15907887393454423668
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/e5df664e18c54da7b84370b72506923b first=501000000531957283573 last=559078458337340744586
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/88df957d66e84b758583c47c9e6aec95 first=25908421410455709356 last=29999998239977206078
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO hfile.CacheConfig: CacheConfig:disabled
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/a8306bc4ef3941f5bd131d47f0b1c2c3 first=60100000359202982424 last=659079145929173333600
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/d28c5e918b784127a7faa8afee8b364d first=359081168652388606128 last=39999999217611496331
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/a3b7225320b24e838559d5a5772bdd87 first=701000000531957283573 last=759089489615157841144
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168.1.31:9000/test_demo/result/test/info/d069683ce064411793640f2a0ec6ca98 first=20100000359202982424 last=25908421377193754247
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       18/
       
       01/
       
       20 
       
       12:
       
       38:
       
       03 INFO mapreduce.LoadIncrementalHFiles: Trying 
       
       to 
       
       load hfile=hdfs:
       
       //192.168

hbase批量入库的总结

猜你喜欢