Hive数据写入ES,处理经纬度范围查询

使用ES的初衷就是快速检索,他的默认输出数据是一万条,并不适合大数据的处理计算。要是需要导出或者将取出的数据再做二次处理的话,就需要换组件。

首先需要将hive中的数据写入ES,要是涉及到经纬度,需要lat,lng的格式放到location字段里面。

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
import org.elasticsearch.spark.rdd.EsSpark

/**
  * Created by yinyi on 2019/9/3.
  */

object BigWide_V2{
  def main(args: Array[String]) {
  val spark = SparkSession
  .builder()
  .appName("TestES")
  .config("spark.sql.warehouse.dir", Utils.HIVEWAREHOUSELOCATION)
  .config("es.nodes", "10.0.4.67 ,0.0.4.235,0.0.4.161,0.0.4.90 ,0.0.4.111") //es的节点,多个用逗号分隔
  .config("es.index.auto.create", "true") //开启自动创建索引
  .config("index.refresh_interval", "-1") //设置为-1以禁用刷新
  .config("es.nodes",Utils.HOST_ES) //es的节点,多个用逗号分隔
  //      .config("es.write.operation","upsert")//表示如果id重复就更新数据;
  .config("es.mapping.date.rich","false")
  .config("spark.sql.shuffle.partitions","6000")
  .enableHiveSupport()
  .getOrCreate()

  

    val los = spark.sql("select name,mobile ,province,city,county,concat_ws(',',nvl(lat,'0.0'),nvl(lng,'0.0')) as location,child_english_label," +
      " chusan_gaosan_label,vippl_label,xinyongka_label,cos_med_label,age_label,sex_label,edu_label," +
      " marry_status_label,income_level_label,consume_level_label,month_consume_pinci_label " +
      " from  precisionmarketing.result_190905_sugang_jingqing_big_wide_label "+
      " where mobile                   rlike '^1[3456789]\\\\d{9}$'   " +
      " and lng >-180    and lng<180 and lat>-90 and lat<90         " +
      "").rdd


    //lter()
    //precisionmarketing.result_sugang_jingqing_big_wide_label
    val lostcontact:RDD[BigWide_V2]= los.map(row =>{
      val name = row.getAs[String]("name")
      val mobile = row.getAs[String]("mobile").toString.toLong
      val province = row.getAs[String]("province")
      val city = row.getAs[String]("city")
      val county = row.getAs[String]("county")

//      val lng = row.getAs[String]("lng").toString.toDouble
//      val lat = row.getAs[String]("lat").toString.toDouble
      val location=row.getAs[String]("location")
      val child_english_label = getLabel(row,"child_english_label")
      val chusan_gaosan_label = getLabel(row,"chusan_gaosan_label")
      val vippl_label = getLabel(row,"vippl_label")
      val xinyongka_label = getLabel(row,"xinyongka_label")
      val cos_med_label = getLabel(row,"cos_med_label")
      val age_label = getLabel(row,"age_label")
      val sex_label = getLabel(row,"sex_label")
      val edu_label = getLabel(row,"edu_label")

      val marry_status_label = getLabel(row,"marry_status_label")
      val income_level_label = getLabel(row,"income_level_label")
      val consume_level_label = getLabel(row,"consume_level_label")
      val month_consume_pinci_label = getLabel(row,"month_consume_pinci_label")
      BigWide_V2(name,mobile,province,city,county,location,child_english_label,chusan_gaosan_label,vippl_label,xinyongka_label,cos_med_label,age_label,sex_label,edu_label,marry_status_label,income_level_label,consume_level_label,month_consume_pinci_label)//
    })

    EsSpark.saveToEs(lostcontact, args(0))
    spark.close()
  }
  def getLabel(row:Row,label:String):Array[String] ={
    try {
      row.getAs[String](label).split(",")
    }catch{
      case e: Exception => null
    }
  }
  def getLabel1(row:Row,label:String):Array[String] ={
    try {
      row.getAs[String](label).split("\t")
    }catch{
      case e: Exception => null
    }
  }

}
case class BigWide_V2(name:String,mobile:Long,province:String,city:String,county:String,
                      location:String,child_english_label:Array[String],chusan_gaosan_label:Array[String],
                      vippl_label:Array[String],xinyongka_label:Array[String],cos_med_label:Array[String],
                      age_label:Array[String],sex_label:Array[String],edu_label:Array[String],
                      marry_status_label:Array[String],income_level_label:Array[String],consume_level_label:Array[String],
                      month_consume_pinci_label:Array[String])//

1.将不同的querybuilder分装一起查询

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.elasticsearch.action.admin.indices.delete.DeleteIndexAction;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;
/**   
	 * @Title: queryTotalNumLngLat   
	 * @Description:num query       
	 * @return: long      
	 * @throws   
	 */
	public static long queryTotalLngLat(String[] input_indices,String[] input_types,String[] delete_indices,String[] delete_types,
			String[] indices,String[] types,List<Map<String,String>> list){
		boolean flag=true;
		long all_bum=0;
		//输入手机号
		long upnum = MarketingDao.queryByTotal(input_indices, input_types);
		if(list.size()==1||list.size()==0){
			for(int i=0;i<list.size();i++){
				Map<String, String> map=list.get(i);
				flag=mapScope(map);
				if(flag){
					BoolQueryBuilder queryBuilders=QueryBuilders.boolQuery();
					QueryBuilder query=Utils.queryRangeScopLngLat(list);
					
					Map<String, String> removeMap = removeMap(map);
					list.remove(i);
					list.add(removeMap);
					BoolQueryBuilder boolQuery = Utils.getBoolQuery(list);
					
					if(null!=boolQuery){queryBuilders.must(boolQuery);}
					if(null!=query){queryBuilders.must(query);}
					all_bum = MarketingDao.queryByTotal(indices, types, queryBuilders);
					break;
				}else{
					BoolQueryBuilder boolQuery = Utils.getBoolQuery(list);
					all_bum = MarketingDao.queryByTotal(indices, types, boolQuery);
					break;
				}
				
			}
		}
		return all_bum+upnum;
	}

2.获取范围内的数据

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;


import org.elasticsearch.common.geo.GeoDistance;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.GeoDistanceQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.*;




/**   
	 * @Title: queryRangeScopLngLat   
	 * @Description: 范围查询      
	 * @return: GeoDistanceQueryBuilder      
	 * @throws   
	 */
	public static QueryBuilder queryRangeScopLngLat(List<Map<String,String>> list){
		
		double lng=0;
		double lat=0;
		double distance=0;
		for (Map<String, String> map : list) {
			try {
				lng=Double.valueOf(map.get("lng"));
				lat=Double.valueOf(map.get("lat"));
				distance=Double.valueOf(map.get("distance"));
			}catch ( NumberFormatException e) {
				continue;
			}
		}
		QueryBuilder geoDistance = (QueryBuilder) QueryBuilders.geoDistanceQuery("location")
					.point(lat,lng)
					.distance(distance,DistanceUnit.KILOMETERS)
					.geoDistance(GeoDistance.ARC);
		return geoDistance;
		
	}

 3.获取具体值

/**   
	 * @Title: queryByTotalLngLat   
	 * @Description: 经纬度范围查询      
	 * @return: long      
	 * @throws   
	 */
	public static List<String> queryByTotalLngLat(String[] indices,String[] types,QueryBuilder boolQuery){
		SearchResponse response = client
				.prepareSearch(indices)
				.setTypes(types)
				.setQuery(boolQuery)
				.setSize(100000)
				.get();
		SearchHits hits = response.getHits();
		List<String> list=new ArrayList<String>();
		if(hits != null){
			for (SearchHit searchHit : hits) {
				Object object = searchHit.getSource().get("mobile");
				if(object!=null){
					list.add(object.toString());
				}
				
			}
		}
		return list;
	}
发布了33 篇原创文章 · 获赞 7 · 访问量 9585

猜你喜欢

转载自blog.csdn.net/Baron_ND/article/details/103025793