package com.duowan.hive.udf.generic; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; /** * Sort a map * the function simple:sort_map(map,'key','desc'),返回map(string,string) * @author Irwin * */ public class GenericUDFSortMap extends GenericUDF { private MapObjectInspector mapOI; private final Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>(); private ArrayList<Object> keyList = new ArrayList<Object>(); private ArrayList<Object> valueList = new ArrayList<Object>(); @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 3) { throw new UDFArgumentLengthException("The function SORT_MAP only accepts 3 argument. simple: sort_map(map,'key','desc')"); } else if (!(arguments[0] instanceof MapObjectInspector) || !(arguments[1] instanceof ObjectInspector) || !(arguments[2] instanceof ObjectInspector)) { throw new UDFArgumentTypeException(0, "\"" + Category.MAP.toString().toLowerCase() + "\" is expected at function SORT_MAP, " + "but \"" + arguments[0].getTypeName() + " or " + arguments[1].getTypeName() + " or " + arguments[2].getTypeName() + "\" is found"); } mapOI = (MapObjectInspector) arguments[0]; return ObjectInspectorFactory.getStandardMapObjectInspector( PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector); } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { sortMap.clear(); keyList.clear(); valueList.clear(); Object mapObj = arguments[0].get(); String key_valueObj = ((Text)arguments[1].get()).toString(); String orderObj = ((Text)arguments[2].get()).toString(); keyList.addAll(mapOI.getMap(mapObj).keySet()); valueList.addAll(mapOI.getMap(mapObj).values()); Map<Text,Text> inputMap = new HashMap<Text, Text>(); //put keys and values for (int i = 0; i < keyList.size(); i++) { inputMap.put(new Text(keyList.get(i).toString()), new Text(valueList.get(i).toString())); } if (key_valueObj.equals("key")) { sortMap.putAll(sortMayByKey(inputMap, orderObj)); return sortMap; }else { sortMap.putAll(sortMapByValue(inputMap, orderObj)); return sortMap; } } @Override public String getDisplayString(String[] children) { return "map(" + children[0] + ")"; } public static Map<Text, Text> sortMapByValue(Map<Text, Text> map, final String order) { Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>(); List<Map.Entry<Text, Text>> list_Data = new ArrayList<Map.Entry<Text, Text>>(map.entrySet()); // 通过Collections.sort(List I,Comparator c)方法进行排序 Collections.sort(list_Data, new Comparator<Map.Entry<Text, Text>>() { @Override public int compare(Entry<Text, Text> o1, Entry<Text, Text> o2) { if (order.equals("asc")) { return o1.toString().compareTo(o2.toString()); } else { return o2.toString().compareTo(o1.toString()); } } }); // put the sorted map for (Entry<Text, Text> entry : list_Data) { sortMap.put(entry.getKey(), entry.getValue()); } return sortMap; } public static Map<Text, Text> sortMayByKey(Map<Text, Text> map, final String order) { Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>(); List<Text> arrayList = new ArrayList<Text>(map.keySet()); Collections.sort(arrayList, new Comparator<Text>() { @Override public int compare(Text o1, Text o2) { if (order.equals("asc")) { return o1.toString().compareTo(o2.toString()); }else { return o2.toString().compareTo(o1.toString()); } } }); //将排序好的key返回 for (Text key : arrayList) { sortMap.put(key, map.get(key)); } return sortMap; } }
HIVE 中 MAP排序,根据key或者value,“DESC”或者“ASC”进行Map排序。
例子:
SELECT sort_map(map,'key','desc') FROM TABLE;