(二)flink的DataSet:广播变量的使用

文章目录


广播变量允许您将数据集提供给的operator所有并行实例,该数据集将作为集合在operator中进行访问
注意:由于广播变量的内容保存在每个节点的内存中,因此它不应该太大,常见使用在一些字典映射的环境中

package batch;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class BroadcastDemo {
    public static void main(String[] args) throws Exception {
        //获取运行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        //准备需要广播的数据
        ArrayList<Tuple2<String, String>> broadCastData = new ArrayList<>();
        broadCastData.add(new Tuple2<>("101", "jack"));
        broadCastData.add(new Tuple2<>("102", "tom"));
        broadCastData.add(new Tuple2<>("103", "john"));
        //读取数据源
        DataSet<Tuple2<String, String>> tuple2broadCastData = env.fromCollection(broadCastData);

        DataSet<HashMap<String, String>> toBroadCast = tuple2broadCastData.map(new MapFunction<Tuple2<String, String>, HashMap<String, String>>() {
            @Override
            public HashMap<String, String> map(Tuple2<String, String> value) throws Exception {
                HashMap<String, String> map = new HashMap<>();
                map.put(value.f0, value.f1);
                return map;
            }
        });
        //准备处理数据
        ArrayList<Tuple2<String, Integer>> operatorData = new ArrayList<>();
        operatorData.add(new Tuple2<>("101", 2000000));
        operatorData.add(new Tuple2<>("102", 190000));
        operatorData.add(new Tuple2<>("103", 1000000));
        //读取处理数据
        DataSet<Tuple2<String, Integer>> tuple2DataSource = env.fromCollection(operatorData);

        MapOperator<Tuple2<String, Integer>, String> result = tuple2DataSource.map(new RichMapFunction<Tuple2<String, Integer>, String>() {
            List<HashMap<String, String>> broadCastMap = new ArrayList<HashMap<String, String>>();
            HashMap<String, String> allMap = new HashMap<String, String>();

            //使用open方法获取广播变量
            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                //获取广播变量的数据  根据广播变量的别名,取广播变量的值
                this.broadCastMap = getRuntimeContext().getBroadcastVariable("broadCastName");
                for (HashMap<String, String> map : broadCastMap) {
                    allMap.putAll(map);
                }
            }

            @Override
            public String map(Tuple2<String, Integer> t2) throws Exception {
                String name = allMap.get(t2.f0);
                return name + "," + t2.f1;
            }
        })
                //调用广播变量
                .withBroadcastSet(toBroadCast, "broadCastName");

        result.print();
    }
}

发布了483 篇原创文章 · 获赞 62 · 访问量 14万+

猜你喜欢

转载自blog.csdn.net/wwwzydcom/article/details/103832608