This article introduces in detail the use of Kudu API in Kettle to write data into Kudu. From this article, you can learn:
1. How to write a simple Kettle's Used defined Java class.
2. How to read the fields of each Kettle record. Required Note that getInteger() returns a Long object; the method to get Timestamp field is getDate().
3. How to call Kudu API.
This Kettle example is very simple, the Data Grid component defines some sample data (including multiple data types), and the Java class writes these sample data to kudu.
Kudu table schema:
CREATE TABLE kudu_testdb.perf_test_t1 ( id string ENCODING PLAIN_ENCODING COMPRESSION SNAPPY, int_value int, bigint_value bigint, timestamp_value timestamp, bool_value int, PRIMARY KEY (histdate,id) ) PARTITION BY HASH (histdate,id) PARTITIONS 2 STORED AS KUDU TBLPROPERTIES ( 'kudu.table_name' = 'testdb.perf_test_t1', 'kudu.master_addresses' = '10.205.6.1:7051,10.205.6.2:7051,10.205.7.3:7051' );
Focus on the Java class code:
import java.sql.Timestamp; import java.util.UUID; import static java.lang.Math.toIntExact; import org.apache.kudu.client.Insert; import org.apache.kudu.client.KuduClient; import org.apache.kudu.client.KuduException; import org.apache.kudu.client.KuduSession; import org.apache.kudu.client.KuduTable; import org.apache.kudu.client.PartialRow; import org.apache.kudu.client.SessionConfiguration; private final static String KUDU_TABLE="testdb.perf_test_t1"; private final static String KUDU_SERVERS="10.205.6.1:7051,10.205.6.2:7051,10.205.7.3:7051"; private final static int OPERATION_BATCH = 50; KuduClient client=null; KuduSession session=null; KuduTable table=null; Integer recordCount=null; SessionConfiguration.FlushMode mode; private Object[] previousRow; private Object[] currentRow; public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { if (first) { first = false; } currentRow = getRow(); if (currentRow == null) { setOutputDone(); return false; } try { session.setFlushMode(mode); session.setMutationBufferSpace(OPERATION_BATCH); int uncommit = 0; while(currentRow != null) { Insert insert = table.newInsert(); PartialRow kuduRow = insert.getRow(); int intTmp; Long longTmp; String stringTmp; java.util.Date dateTmp; Boolean booleanTmp; // kettle string -> kudu string //kuduRow.addString("id",UUID.randomUUID().toString()); stringTmp = get(Fields.In, "id").getString(currentRow); if (stringTmp!=null) { kuduRow.addString("id",stringTmp); } // kettle int -> kudu int //import static java.lang.Math.toIntExact; longTmp=get(Fields.In, "int_value").getInteger(currentRow); if (longTmp!=null) { intTmp =toIntExact(get(Fields.In, "int_value").getInteger(currentRow)); kuduRow.addInt("int_value", intTmp); } // kettle bigint -> kudu bigint longTmp=get(Fields.In, "bigint_value").getInteger(currentRow); if (longTmp!=null) { kuduRow.addLong("bigint_value", longTmp); } // kettle date/timestamp -> kudu timestamp dateTmp= get(Fields.In, "timestamp_value").getDate(currentRow); if (dateTmp!=null) { longTmp =dateTmp.getTime()+8*3600*1000; // Go to East 8th zone time kuduRow.addLong("timestamp_value", longTmp*1000 ); } // kettle boolean -> kudu int booleanTmp= get(Fields.In, "boolean_value").getBoolean(currentRow); if (booleanTmp!=null) { intTmp=0; if (booleanTmp) {intTmp = 1 ;} kuduRow.addInt("boolean_value", intTmp); } // For manual submission, the buffer needs to be flushed when it is not full. Here, when half of the buffer is used, submit uncommit = uncommit + 1 ; if (uncommit > OPERATION_BATCH / 2 ) { session.flush(); uncommit = 0; } session.apply(insert); previousRow=currentRow; currentRow=getRow(); } // For manual commits, make sure to complete the final commit if (uncommit > 0 ) { session.flush(); } } catch (Exception e) { e.printStackTrace (); throw e; } // Send the row on to the next step. //putRow(data.outputRowMeta, currentRow); return false; } public boolean init(StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface) { try { client = new KuduClient.KuduClientBuilder(KUDU_SERVERS).build(); session = client.newSession(); table =client.openTable(KUDU_TABLE); mode = SessionConfiguration.FlushMode.MANUAL_FLUSH; } catch (Exception e) { e.printStackTrace (); throw e; } return parent.initImpl(stepMetaInterface, stepDataInterface); } public void dispose(StepMetaInterface smi, StepDataInterface sdi) { try { if (!session.isClosed()) { session.close(); } } catch (Exception e) { e.printStackTrace (); throw e; } parent.disposeImpl(smi, sdi); }