1. First, we must first create a good table
# Create a baidu li this namespace in the namespace, and to develop the column cluster info create "li:baidu","info"
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException; public class ReadFromFileIntoHbase { static class ReadFromFileMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>{ ImmutableBytesWritable ibw = new ImmutableBytesWritable(); private byte[] FAMILY = Bytes.toBytes("info"); private byte[] SEARCH = Bytes.toBytes("serch"); private byte[] RANK = Bytes.toBytes("rank"); private byte[] CLICK = Bytes.toBytes("click"); private byte[] URL = Bytes.toBytes("url"); @Override protected void Map (LongWritable Key, the Text value, the Context context) throws IOException, InterruptedException { // the specified delimiter at the new table String [] = value.toString words () Split (. " \ T " ); // put time as the user id + RowKey String words RK = [ . 1 ] + " - " + words [ 0 ]; IBW. SET (Bytes.toBytes (RK)); // the original table has IF (words.length == . 6 ) { Put put = new Put(Bytes.toBytes(rk)); put.addColumn(FAMILY,SEARCH,Bytes.toBytes(words[2])); put.addColumn(FAMILY,RANK,Bytes.toBytes(words[3])); put.addColumn(FAMILY,CLICK,Bytes.toBytes(words[4])); put.addColumn(FAMILY,URL,Bytes.toBytes(words[5])); context.write(ibw,put); }else { return; } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum","server3:2181"); config.set("zookeeper.znode.parent","/hbase-unsecure"); Job job = Job.getInstance(config, "ExampleRead"); job.setJarByClass(ReadFromFileIntoHbase.class); job.setMapperClass(ReadFromFileMapper.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); // because we want the final data into a table, so we need to do TableMapReduceUtil.initTableReducerJob ( " Liang: sogo4 " , null , the Job); // set a the reduce job.setNumReduceTasks ( 0 ); FileInputFormat.addInputPath(job,new Path("D:\\sogou.500w.utf8")); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } } } }
2. quiz about
# View the top 10 data scan "li:baidu",{LIMIT=>10}