从HDFS读文件,进行Hash based group by,再写入Hbase

/**
* First assignment for Big data systems and big data analysis
* @author LMC
* @version V2.0
* @date: 2018.04.23
 */
import java.io.*;
import java.io.BufferedReader;  
import java.io.IOException;  
import java.io.InputStreamReader;  
import java.net.URI;  
import java.net.URISyntaxException;  
import java.util.HashMap;  
import java.util.Map;  
  
import org.apache.commons.lang.StringUtils;  
import org.apache.hadoop.conf.Configuration;  
import org.apache.hadoop.fs.FSDataInputStream;  
import org.apache.hadoop.fs.FileSystem;  
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.hbase.HBaseConfiguration;  
import org.apache.hadoop.hbase.HColumnDescriptor;  
import org.apache.hadoop.hbase.HTableDescriptor;  
import org.apache.hadoop.hbase.MasterNotRunningException;  
import org.apache.hadoop.hbase.TableName;  
import org.apache.hadoop.hbase.ZooKeeperConnectionException;  
import org.apache.hadoop.hbase.client.HBaseAdmin;  
import org.apache.hadoop.hbase.client.HTable;  
import org.apache.hadoop.hbase.client.Put; 

/**   
 * @ClassName: Hw1Grp2  
 * @Description: Main Class
 * @author:LMC
 * @date: 2018.04.23
 */
 public class Hw1Grp2 {   
    private HTable table; 
    public void setTable(HTable table) {  
        this.table = table;  
    } 
    public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, URISyntaxException {  
/**
 * @param java Hw1Grp2 R=/hw1/lineitem.tbl groupby:R0 'res:count,avg(R2),max(R3)'
 */ 
    // create table descriptor
     String tableName = "Result";				  
     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
	 
	 // create column descriptor
	 String colnumFamily = "res";  
     HColumnDescriptor cf = new HColumnDescriptor(colnumFamily);
     htd.addFamily(cf);
	 
	 // configure HBase
     Configuration configuration = HBaseConfiguration.create();
     HBaseAdmin hAdmin = new HBaseAdmin(configuration);
     if (hAdmin.tableExists(tableName)) {
         System.out.println("Table already exists");
     }
     else {
   
         hAdmin.createTable(htd);
         System.out.println("table "+tableName+ " created successfully");
     }
        hAdmin.close();
        HTable table = new HTable(configuration,tableName);
        String file = StringUtils.substringAfter(args[0], "=");    
        String keyNum = StringUtils.substringAfter(args[1], "R");   
        String colsName = StringUtils.substringAfter(args[2], ":"); 
		int rowKey = Integer.parseInt(keyNum);
        String colStr = null;  
        Configuration conf = new Configuration();  
        FileSystem fs = FileSystem.get(URI.create(file), conf);  
        Path path = new Path(file);  
        FSDataInputStream inStream = fs.open(path);  
        BufferedReader in = new BufferedReader(new InputStreamReader(inStream));   
        String[] tempStr = colsName.split(",");  
        Map<String, Integer> temp = new HashMap<String, Integer>();  
        for(int i = 0; i < tempStr.length; i++) {  
            if(!tempStr[i].equals("count")) {  
                temp.put(StringUtils.substringBefore(tempStr[i], "("), Integer.parseInt(StringUtils.substringBetween(tempStr[i],"R", ")")));  
            } else {  
                temp.put(tempStr[i], rowKey);  
            }  
        }  
        System.out.println("file:" + file);  
        for(String key : temp.keySet()) {  
            System.out.println(key + ":" + temp.get(key));  
        }  
        Hw1Grp2 h = new Hw1Grp2(); 
        h.setTable(table);		
        h.mainProcess(file, rowKey, temp);  
        System.out.println("sucessfully");  
    } 
/**   
 * @Title: ${mainProcess}  
 * @Description: ${hash based group by}  
 * @param: ${file,rowKey,temp}      
 */	
    public void mainProcess(String file, int rowKey, Map<String, Integer> args) throws IOException, URISyntaxException {  
        String colStr = null;  
        Configuration conf = new Configuration();  
        FileSystem fs = FileSystem.get(URI.create(file), conf);  
        Path path = new Path(file);  
        FSDataInputStream inStream = fs.open(path);  
        BufferedReader in = new BufferedReader(new InputStreamReader(inStream));  
/**   
 * created hashtable 
 */	 
        
           Map<String, Integer> countMap = new HashMap<String, Integer>();  
           Map<String, Integer> sumMap = new HashMap<String, Integer>();  
           Map<String, Integer> maxMap = new HashMap<String, Integer>();
           Map<String, Float> avgMap = new HashMap<String,Float>();  
           int maxColnum = -1, avgColnum = -1, sumColnum =-1,  countColnum = -1;  
 /**   
 * containsKey() testing
 */            
           if(args.containsKey("count")) {  
               countColnum = args.get("count");  
           }  
           if(args.containsKey("avg")) {  
               avgColnum = args.get("avg");  
           }  
           if(args.containsKey("max")) {  
               maxColnum = args.get("max");  
           }  
           if(args.containsKey("sum")) {  
               sumColnum = args.get("sum");  
           }  
            
               String str;  
                while ((str=in.readLine())!=null) {
               String[] colnum = str.split("\\|");  
               if(countMap.containsKey(colnum[rowKey])) {  
                     countMap.put(colnum[rowKey], countMap.get(colnum[rowKey]) +1 );  
               } else {  
                     countMap.put(colnum[rowKey], 1);  
               }  
               if(sumColnum != -1) {  
                   if(sumMap.containsKey(colnum[rowKey])) {  
                       sumMap.put(colnum[rowKey], sumMap.get(colnum[rowKey]) +Integer.parseInt(colnum[sumColnum]) );  
                   } else {  
                       sumMap.put(colnum[rowKey], Integer.parseInt(colnum[sumColnum]));  
                   }  
               }  
               if(avgColnum != -1) {  
                   if(avgMap.containsKey(colnum[rowKey])) {  
                       avgMap.put(colnum[rowKey], avgMap.get(colnum[rowKey]) +Float.parseFloat(colnum[avgColnum]) );  
                   } else {  
                       avgMap.put(colnum[rowKey], Float.parseFloat(colnum[avgColnum]));  
                   }  
               }  
               if(maxColnum != -1) {  
                   if(maxMap.containsKey(colnum[rowKey])) {  
                       if(Integer.parseInt(colnum[maxColnum]) > maxMap.get(colnum[rowKey]))  
                          maxMap.put(colnum[rowKey], Integer.parseInt(colnum[maxColnum]));  
                   } else {  
                       maxMap.put(colnum[rowKey], Integer.parseInt(colnum[maxColnum]));  
                   }  
               }  
                 
			}
/**
 *insert*into hashtable from hashmap
 */		   
           for(String key : countMap.keySet()) {  
             if(countColnum != -1) {  
                colStr = "count";  
                Put put = new Put(key.getBytes());  
                put.add("res".getBytes(),colStr.getBytes(),(countMap.get(key) + "").getBytes());  
                table.put(put); 
					   
               }  
               if(avgColnum != -1) {  
                 colStr = "avg(R" + avgColnum + ")";  
                 avgMap.put(key, (float)Math.round(avgMap.get(key)/countMap.get(key)*100)/100);  
                 Put put = new Put(key.getBytes());  
                 put.add("res".getBytes(),colStr.getBytes(),(avgMap.get(key) + "").getBytes());  
                 table.put(put); 
				 
               }  
               if(maxColnum != -1) {  
                 colStr = "max(R" + maxColnum + ")";  
                 Put put = new Put(key.getBytes());  
                 put.add("res".getBytes(),colStr.getBytes(),(maxMap.get(key) + "").getBytes());  
                 table.put(put); 
				 
               }  
                 
               if(sumColnum != -1) {  
                 colStr = "sum(R" + sumColnum + ")";  
                 Put put = new Put(key.getBytes());  
                 put.add("res".getBytes(),colStr.getBytes(),(sumMap.get(key) + "").getBytes());  
                 table.put(put); 
				 
               }  
           }  
           System.out.println("Main process success");  
    }  
        public static boolean isInteger(String colStr){  
        for(int i=colStr.length();--i>=0;){  
           int chr=colStr.charAt(i);  
           if(chr<48 || chr>57)  
              return false;  
        }  
        return true;  
     }   
	
}  

readme

--------------------------------------------------------------------------------
                README
--------------------------------------------------------------------------------
PLEASE save your code and data to a portable drive!!!
WARNING: this VM will be cleaned without notice after you log out.  
         Your code and data on the VM will get lost!!!

1. start hdfs and hbase

$ start-dfs.sh
$ start-hbase.sh

2. stop hdfs and hbase
r
$ stop-hbase.sh
$ stop-dfs.sh

3. hdfs directory is ~/work/hdfs

4. To compile your java code MyCode.java (implementing class MyCode)
$ javac MyCode

then to run it
$ java MyCode <args>

5. compile and run HDFSTest.java

$  javac HDFSTest.java
$  java HDFSTest

6. compile and run HBaseTest.java

$  javac HBaseTest.java 
$  java HBaseTest


check if we have successfully create mytable and put the new row
start hbase shell and run command in hbase shell

$ hbase shell

hbase(main):001:0> scan 'mytable'
ROW                                                  COLUMN+CELL                                                                                                                                             
 abc                                                 column=mycf:a, timestamp=1428459927307, value=789                                                                                                       
1 row(s) in 1.8950 seconds

hbase(main):002:0> disable 'mytable'
0 row(s) in 1.9050 seconds

hbase(main):003:0> drop 'mytable'
0 row(s) in 1.2320 seconds

hbase(main):004:0> exit

--------------------------------------------------------------------------------

发布了30 篇原创文章 · 获赞 17 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/selinaqqqq/article/details/80363138
今日推荐