erlang抽象码与basho的protobuf(三)代码生成原理之语义分析

上文介绍了protobuffs的词法与语法分析过程,发现其收集了目标proto文件及其import文件的语法树,此处继续观察语义分析过程。

语法树的格式如下:

[Message1 = {message,MessageName,[FieldRecord1,FieldRecord2,...,FieldRecordn]},Message2,...,Messagen],

FieldRecord = {FieldID,required/optional/repeated/repeated_packed,FieldType,FieldName,DefaultValue}。

protobuffs_compile.erl

generate_source(ProtoFile,Options) when is_list (ProtoFile) ->

    Basename = filename:basename(ProtoFile, ".proto") ++ "_pb",

    {ok,FirstParsed} = parse(ProtoFile),

    ImportPaths = ["./", "src/" | proplists:get_value(imports_dir, Options, [])],

    Parsed = parse_imports(FirstParsed, ImportPaths),

扫描二维码关注公众号,回复: 675780 查看本文章

    Collected = collect_full_messages(Parsed), 

    Messages = resolve_types(Collected#collected.msg,Collected#collected.enum),

    output_source (Basename, Messages, Collected#collected.enum, Options).

语义分析需要收集所有message、enum、extends定义,收集到所有的message域定义,enum定义,并根据extends定义对message进行扩展。

collect_full_messages(Data) -> collect_full_messages(Data, #collected{}).

collect_full_messages([{message, Name, Fields} | Tail], Collected) ->

    ListName = case erlang:is_list (hd(Name)) of

  true -> Name;

  false -> [Name]

      end,

    FieldsOut = lists:foldl(

 fun ({_,_,_,_,_} = Input, TmpAcc) -> [Input | TmpAcc];

     (_, TmpAcc) -> TmpAcc

 end, [], Fields),

    Enums = lists:foldl(

     fun ({enum,C,D}, TmpAcc) -> [{enum, [C | ListName], D} | TmpAcc];

 (_, TmpAcc) -> TmpAcc

     end, [], Fields),

    Extensions = lists:foldl(

  fun ({extensions, From, To}, TmpAcc) -> [{From,To}|TmpAcc];

      (_, TmpAcc) -> TmpAcc

  end, [], Fields),

    SubMessages = lists:foldl(

   fun ({message, C, D}, TmpAcc) -> [{message, [C | ListName], D} | TmpAcc];

(_, TmpAcc) -> TmpAcc

   end, [], Fields),

    NewCollected = Collected#collected{

    msg=[{ListName, FieldsOut} | Collected#collected.msg],

    extensions=[{ListName,Extensions} | Collected#collected.extensions]

   },

    collect_full_messages(Tail ++ SubMessages ++ Enums, NewCollected);

...

仍旧分析主角message的处理过程:

可以看到对message的处理主要包括取出一个message中的所有域信息、枚举定义、扩展定义、嵌套message,并将枚举定义和嵌套message合并到上级语法树中继续处理,而将域信息和扩展定义收集到一个collected结构中。

collect_full_messages([{enum, Name, Fields} | Tail], Collected) ->

    ListName = case erlang:is_list (hd(Name)) of

  true -> Name;

  false -> [Name]

      end,

    FieldsOut = lists:foldl(

 fun (Field, TmpAcc) ->

 case Field of

     {EnumAtom, IntValue} -> [{enum, type_path_to_type(ListName), 

                 IntValue, EnumAtom} | TmpAcc];

     _ -> TmpAcc

 end

 end, [], Fields),

    NewCollected = Collected#collected{enum=FieldsOut++Collected#collected.enum},

    collect_full_messages(Tail, NewCollected);

type_path_to_type (TypePath) ->

    string:join (lists:reverse (TypePath), "_").

...

对enum的处理过程:

enum的处理比较简单,仅仅是提取所有的enum定义,并为其生成一条记录{enum,EnumName,EnumValue,EnumAtom},将枚举定义继续收集到collected结构中。

collect_full_messages([{extend, Name, ExtendedFields} | Tail], Collected) ->

    ListName = case erlang:is_list (hd(Name)) of

  true -> Name;

  false -> [Name]

      end,

    CollectedMsg = Collected#collected.msg,

    {ListName,FieldsOut} = lists:keyfind(ListName,1,CollectedMsg),

    {ListName,Extensions} = lists:keyfind(ListName,1,Collected#collected.extensions),

    FunNotInReservedRange = fun(Id) -> not(19000 =< Id andalso Id =< 19999) end,

    FunInRange = fun(Id,From,max) -> From =< Id andalso Id =< 16#1fffffff;

   (Id,From,To) -> From =< Id andalso Id =< To

end,

    ExtendedFieldsOut = lists:append(FieldsOut,

    lists:foldl(

      fun ({Id, _, _, FieldName, _} = Input, TmpAcc) ->

      case lists:any(fun({From,To}) -> FunNotInReservedRange(Id) 

   andalso FunInRange(Id,From,To)

     end,Extensions) of 

  true ->

      [Input | TmpAcc];

  _ ->

      error_logger:error_report(["Extended field not in valid range",

 {message, Name},

 {field_id,Id},

 {field_name,FieldName},

 {defined_ranges,Extensions},

 {reserved_range,{19000,19999}},

 {max,16#1fffffff}]),

      throw(out_of_range)

      end;

  (_, TmpAcc) -> TmpAcc

      end, [], ExtendedFields)

    ),

    NewCollected = Collected#collected{msg=lists:keyreplace(ListName,1,CollectedMsg,{ListName,ExtendedFieldsOut})},

    collect_full_messages(Tail, NewCollected);

对 extends的处理过程:

extends本质是一种依赖关系的体现,此处要检查extends是否满足message的extensions定义,同时也是对extends依赖的message是否存在的检查,然后将extends的域追加到原始message定义中,并替换collected中message的定义。

generate_source(ProtoFile,Options) when is_list (ProtoFile) ->

    Basename = filename:basename(ProtoFile, ".proto") ++ "_pb",

    {ok,FirstParsed} = parse(ProtoFile),

    ImportPaths = ["./", "src/" | proplists:get_value(imports_dir, Options, [])],

    Parsed = parse_imports(FirstParsed, ImportPaths),

    Collected = collect_full_messages(Parsed), 

    Messages = resolve_types(Collected#collected.msg,Collected#collected.enum),

    output_source (Basename, Messages, Collected#collected.enum, Options).

收集完了所有的message定义和enum定义,接着将进行所有message的类型解析了。

resolve_types (Data, Enums) -> resolve_types (Data, Data, Enums, []).

resolve_types ([{TypePath, Fields} | Tail], AllPaths, Enums, Acc) ->

    FieldsOut = lists:foldl(

 fun (Input, TmpAcc) ->

 case Input of

     {Index, Rules, Type, Identifier, Other} ->

 case is_scalar_type (Type) of

     true -> [Input | TmpAcc];

     false ->

 PossiblePaths =

     case string:tokens (Type,".") of

 [Type] ->

     all_possible_type_paths (Type, TypePath);

 FullPath ->

% handle types of the form Foo.Bar which are absolute,

% so we just convert to a type path and check it.

     [lists:reverse (FullPath)]

     end,

 RealPath =

     case find_type (PossiblePaths, AllPaths) of

 false ->

     case is_enum_type(Type, PossiblePaths, Enums) of

 {true,EnumType} ->

     EnumType;

 false ->

     throw (["Unknown Type ", Type])

     end;

 ResultType ->

     ResultType

     end,

 [{Index, Rules, type_path_to_type (RealPath), Identifier, Other} | TmpAcc]

 end;

     _ -> TmpAcc

 end

 end, [], Fields),

    resolve_types (Tail, AllPaths, Enums, [{type_path_to_type (TypePath), lists:reverse (FieldsOut) } | Acc]);

resolve_types ([], _, _, Acc) ->

    Acc.

由于protocol buffers实质是一种对类型的描述,此处类型解析将是protobuffs的核心语义分析过程,对message的域进行类型检查和类型绑定,主要根据以下原则进行:

对于标量类型,也即protocol buffers的内建类型,无需过多检查;对于枚举类型和自定义类型,需要检查所依赖的类型,将其绑定到具体的枚举类型或自定义类型上。

至此,protobuffers的语义分析过程就结束了,这个过程比较简单,仅仅是类型收集、类型检查和类型绑定,此处已经收集到了类型的符号表:

[

Message1 = {MessageName,

                      [

                         Field1 = {FieldId,

                                         FieldRule(required/optional/repeated/repeated_packed...),

                                         FieldType(ScalarType, EnumType,OtherType),

                                         FieldName,Other(DefaultValue)},

                         Field2,...,Fieldn]},

Message2,...Messagen,

Enum1 = {enum, EnumName, EnumValue, EnumAtom},

Enum2,...,Enumn

]。

未完待续...

猜你喜欢

转载自wqtn22.iteye.com/blog/1581101
今日推荐