文章目录
广播变量允许您将数据集提供给的operator所有并行实例,该数据集将作为集合在operator中进行访问
注意:由于广播变量的内容保存在每个节点的内存中,因此它不应该太大,常见使用在一些字典映射的环境中
package batch;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class BroadcastDemo {
public static void main(String[] args) throws Exception {
//获取运行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//准备需要广播的数据
ArrayList<Tuple2<String, String>> broadCastData = new ArrayList<>();
broadCastData.add(new Tuple2<>("101", "jack"));
broadCastData.add(new Tuple2<>("102", "tom"));
broadCastData.add(new Tuple2<>("103", "john"));
//读取数据源
DataSet<Tuple2<String, String>> tuple2broadCastData = env.fromCollection(broadCastData);
DataSet<HashMap<String, String>> toBroadCast = tuple2broadCastData.map(new MapFunction<Tuple2<String, String>, HashMap<String, String>>() {
@Override
public HashMap<String, String> map(Tuple2<String, String> value) throws Exception {
HashMap<String, String> map = new HashMap<>();
map.put(value.f0, value.f1);
return map;
}
});
//准备处理数据
ArrayList<Tuple2<String, Integer>> operatorData = new ArrayList<>();
operatorData.add(new Tuple2<>("101", 2000000));
operatorData.add(new Tuple2<>("102", 190000));
operatorData.add(new Tuple2<>("103", 1000000));
//读取处理数据
DataSet<Tuple2<String, Integer>> tuple2DataSource = env.fromCollection(operatorData);
MapOperator<Tuple2<String, Integer>, String> result = tuple2DataSource.map(new RichMapFunction<Tuple2<String, Integer>, String>() {
List<HashMap<String, String>> broadCastMap = new ArrayList<HashMap<String, String>>();
HashMap<String, String> allMap = new HashMap<String, String>();
//使用open方法获取广播变量
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
//获取广播变量的数据 根据广播变量的别名,取广播变量的值
this.broadCastMap = getRuntimeContext().getBroadcastVariable("broadCastName");
for (HashMap<String, String> map : broadCastMap) {
allMap.putAll(map);
}
}
@Override
public String map(Tuple2<String, Integer> t2) throws Exception {
String name = allMap.get(t2.f0);
return name + "," + t2.f1;
}
})
//调用广播变量
.withBroadcastSet(toBroadCast, "broadCastName");
result.print();
}
}