Kettle series: Insert data into Kudu using Kudu API

This article introduces in detail the use of Kudu API in Kettle to write data into Kudu. From this article, you can learn:
1. How to write a simple Kettle's Used defined Java class.
2. How to read the fields of each Kettle record. Required Note that getInteger() returns a Long object; the method to get Timestamp field is getDate().
3. How to call Kudu API.

This Kettle example is very simple, the Data Grid component defines some sample data (including multiple data types), and the Java class writes these sample data to kudu. 

Kudu table schema:

CREATE TABLE kudu_testdb.perf_test_t1
(
    id string ENCODING PLAIN_ENCODING COMPRESSION SNAPPY,
    int_value int,
    bigint_value bigint, 
    timestamp_value timestamp, 
    bool_value int,
    PRIMARY KEY (histdate,id)  
)
PARTITION BY HASH (histdate,id) PARTITIONS 2 
STORED AS KUDU
TBLPROPERTIES (
  'kudu.table_name' = 'testdb.perf_test_t1',
  'kudu.master_addresses' = '10.205.6.1:7051,10.205.6.2:7051,10.205.7.3:7051'
);

 

Focus on the Java class code:

import java.sql.Timestamp;
import java.util.UUID;
import static java.lang.Math.toIntExact;

import org.apache.kudu.client.Insert;
import org.apache.kudu.client.KuduClient;
import org.apache.kudu.client.KuduException;
import org.apache.kudu.client.KuduSession;
import org.apache.kudu.client.KuduTable;
import org.apache.kudu.client.PartialRow;
import org.apache.kudu.client.SessionConfiguration;

private final static String KUDU_TABLE="testdb.perf_test_t1";
private final static String KUDU_SERVERS="10.205.6.1:7051,10.205.6.2:7051,10.205.7.3:7051";
private final static int OPERATION_BATCH = 50;  

KuduClient client=null;
KuduSession session=null;
KuduTable table=null;
Integer recordCount=null;
SessionConfiguration.FlushMode mode;

private Object[] previousRow;
private Object[] currentRow;


public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
  if (first) {
    first = false; 
  }

  currentRow = getRow();
  if (currentRow == null) {
    setOutputDone();
    return false;
  }
 
    try {
        session.setFlushMode(mode);
        session.setMutationBufferSpace(OPERATION_BATCH);

        int uncommit = 0;
        while(currentRow != null) {
            Insert insert = table.newInsert();
            PartialRow kuduRow = insert.getRow();
                       
            int intTmp;
            Long longTmp;
            String stringTmp;
            java.util.Date dateTmp;
            Boolean booleanTmp;
            
                
            // kettle string -> kudu string 
            //kuduRow.addString("id",UUID.randomUUID().toString());
            stringTmp =  get(Fields.In, "id").getString(currentRow);    
            if (stringTmp!=null)
            {
               kuduRow.addString("id",stringTmp);
            }
            
            // kettle int -> kudu int
            //import static java.lang.Math.toIntExact;
            longTmp=get(Fields.In, "int_value").getInteger(currentRow);
            if (longTmp!=null)
            {
               intTmp =toIntExact(get(Fields.In, "int_value").getInteger(currentRow)); 
               kuduRow.addInt("int_value",  intTmp);
            }
            

            // kettle bigint -> kudu bigint 
            longTmp=get(Fields.In, "bigint_value").getInteger(currentRow);
            if (longTmp!=null)
            {
                 kuduRow.addLong("bigint_value", longTmp);
             }
 
            // kettle date/timestamp  -> kudu timestamp 
            dateTmp= get(Fields.In, "timestamp_value").getDate(currentRow);
            if (dateTmp!=null)
            {
               longTmp =dateTmp.getTime()+8*3600*1000; // Go to East 8th zone time 
               kuduRow.addLong("timestamp_value", longTmp*1000 );             
            }
    
 
            // kettle boolean  -> kudu int
            booleanTmp= get(Fields.In, "boolean_value").getBoolean(currentRow);
            if (booleanTmp!=null)
            {
               intTmp=0;
               if (booleanTmp)
                {intTmp = 1 ;}         
               kuduRow.addInt("boolean_value", intTmp);             
            }
           
            // For manual submission, the buffer needs to be flushed when it is not full. Here, when half of the buffer is used, submit 
             uncommit = uncommit + 1 ;
                 if (uncommit > OPERATION_BATCH / 2 ) {
                    session.flush();
                     uncommit = 0;
                }
            session.apply(insert);
            previousRow=currentRow;
            currentRow=getRow();
        }

        // For manual commits, make sure to complete the final commit 
        if (uncommit > 0 ) {
            session.flush();
        }

       } catch (Exception e) {
        e.printStackTrace ();
        throw e; 
     }

  // Send the row on to the next step.
  //putRow(data.outputRowMeta, currentRow);

  return false;
}

public boolean init(StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface) {
  try {
         client = new KuduClient.KuduClientBuilder(KUDU_SERVERS).build();
         session = client.newSession();      
         table =client.openTable(KUDU_TABLE);
         mode = SessionConfiguration.FlushMode.MANUAL_FLUSH;
       } catch (Exception e) {
        e.printStackTrace ();
        throw e; 
     }

  return parent.initImpl(stepMetaInterface, stepDataInterface);
}

public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
    try {
            if (!session.isClosed()) {
                session.close();
            }
       } catch (Exception e) {
        e.printStackTrace ();
        throw e; 
     }
  parent.disposeImpl(smi, sdi);
}
 

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325113567&siteId=291194637