package dim
import java.util
import myUtils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{
RichSinkFunction, SinkFunction}import org.apache.flink.streaming.api.functions.source.{
RichSourceFunction, SourceFunction}import org.apache.hadoop.hbase.{
Cell, TableName}import org.apache.hadoop.hbase.client.{
Connection, Put, Result, ResultScanner, Scan, Table}import org.apache.hadoop.hbase.util.Bytes
import scala.collection.JavaConverters._
class HBaseWriter extends RichSinkFunction[DimArea]{
var connection:Connection = _
var hTable:Table =_
overridedef open(parameters: Configuration):Unit={
connection =new ConnHBase().connToHabse
hTable = connection.getTable(TableName.valueOf("dim_lagou_area"))}overridedef close():Unit={
if(hTable !=null){
hTable.close()}if(connection !=null){
connection.close()}}overridedef invoke(value: DimArea, context: SinkFunction.Context[_]):Unit={
insertDimArea(hTable,value)}def insertDimArea(table: Table, value: DimArea):Unit={
// val infos: Array[String] = value.split(",")// val areaId: String = infos(0).trim.toString// val aname: String = infos(1).trim.toString// val cid: String = infos(2).trim.toString// val city: String = infos(3).trim.toString// val proId: String = infos(4).trim.toString// val province: String = infos(5).trim.toStringval areaId:String= value.areaId.toString
val aname:String= value.aname.toString
val cid:String= value.cid.toString
val city:String= value.city.toString
val proId:String= value.proId.toString
val province:String= value.province.toString
val put =new Put(areaId.getBytes())
put.addColumn("f1".getBytes(),"aname".getBytes(),aname.getBytes())
put.addColumn("f1".getBytes(),"cid".getBytes(),cid.getBytes())
put.addColumn("f1".getBytes(),"city".getBytes(),city.getBytes())
put.addColumn("f1".getBytes(),"proId".getBytes(),proId.getBytes())
put.addColumn("f1".getBytes(),"province".getBytes(),province.getBytes())
hTable.put(put)}}
Use flink flinktable to process data. Ideas: 1. Initialize the data source and get flinkStreamExcutionEnvironment 2. Obtain and parse the stream data 3. Create a temporary table based on the stream data 4. Write the conversion sql 5. Convert the flink table into stream data 6. Use self Define sinker to write into hbase table
package dim
import org.apache.flink.streaming.api.{
CheckpointingMode, TimeCharacteristic}import org.apache.flink.streaming.api.scala.{
DataStream, StreamExecutionEnvironment}import org.apache.flink.api.scala._
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
object AreaDetailInfo {
def main(args: Array[String]):Unit={
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(5000)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)val data: DataStream[(String,String)]= env.addSource(new HBaseReader)//data.print()val dataStream: DataStream[AreaDetail]= data.map(x =>{
val id:Int= x._1.toInt
val datas: Array[String]= x._2.split("-")val name:String= datas(5).trim
val pid:Int= datas(6).trim.toInt
AreaDetail(id, name, pid)})val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
tableEnv.createTemporaryView("lagou_area",dataStream)val sql:String="""
|select a.id as areaId,a.name as aname,a.pid as cid,b.name as city,c.id as proid,c.name as province
|from lagou_area as a
|inner join lagou_area as b on a.pid = b.id
|inner join lagou_area as c on b.pid = c.id
|""".stripMargin
val areaTable: Table = tableEnv.sqlQuery(sql)// val resultStream: DataStream[String] = tableEnv.toRetractStream[DimArea](areaTable).map(x => {
// val row: DimArea = x._2// row.areaId + "," + row.aname + "," + row.cid + "," + row.city + "," + row.proId + "," + row.province// })//x_1是trueval resultStream: DataStream[DimArea]= tableEnv.toRetractStream[DimArea](areaTable).map(x =>{
val row: DimArea = x._2
row
})
resultStream.print()
resultStream.addSink(new HBaseWriter)
env.execute("dimareabuild")}}