diff --git a/apps/iot/src/data_format/line_format.erl b/apps/iot/src/data_format/line_format.erl index 7f949a9..d780ad5 100644 --- a/apps/iot/src/data_format/line_format.erl +++ b/apps/iot/src/data_format/line_format.erl @@ -16,67 +16,53 @@ -spec parse(Metric :: binary()) -> map(). parse(Metric) when is_binary(Metric) -> case lexer(Metric) of - [Measurement, Fields, Timestamp] -> - Map = case binary:split(Measurement, <<",">>) of - [Measurement] -> - #{<<"measurement">> => Measurement, <<"tags">> => #{}}; - [Measurement0, Tags0] -> - Tags = maps:from_list(parser_keys(Tags0)), - #{<<"measurement">> => Measurement0, <<"tags">> => Tags} - end, - FieldsMap = maps:from_list(parser_keys(Fields)), - {ok, Map#{<<"fields">> => FieldsMap, <<"timestamp">> => binary_to_integer(Timestamp)}}; + [[Measurement|Tags], Fields, [Timestamp|_]] -> + {ok, #{ + <<"measurement">> => Measurement, + <<"tags">> => parser_keys(Tags), + <<"fields">> => parser_keys(Fields), + <<"timestamp">> => Timestamp + }}; _ -> error end. --spec lexer(Input :: binary()) -> [Token :: binary()]. -lexer(Input) when is_binary(Input) -> - lexer(Input, false, [], []). -lexer(<<>>, _Quotes, Current, Acc) -> +%% 词法分析:将输入文本转换为标记序列 +lexer(Input) -> + lexer(Input, false, [], [], []). +lexer(<<>>, _Quotes, Current, Acc, Groups) -> Last = list_to_binary(lists:reverse(Current)), - lists:reverse([Last|Acc]); -lexer(<<$\s, Rest/binary>>, Quotes, Current, Acc) -> + NAcc = lists:reverse([Last|Acc]), + lists:reverse([NAcc|Groups]); +%% 遇到逗号 +lexer(<<$,, Rest/binary>>, Quotes, Current, Acc, Groups) -> case Quotes of true -> - lexer(Rest, Quotes, [" "|Current], Acc); + lexer(Rest, Quotes, [","|Current], Acc, Groups); false -> Part = list_to_binary(lists:reverse(Current)), - lexer(Rest, Quotes, [], [Part|Acc]) + lexer(Rest, Quotes, [], [Part|Acc], Groups) end; -lexer(<<$\", Rest/binary>>, Quotes, Current, Acc) -> - lexer(Rest, not Quotes, [$\"|Current], Acc); -lexer(<>, Quotes, Current, Acc) -> - lexer(Rest, Quotes, [Char|Current], Acc). +lexer(<<$\s, Rest/binary>>, Quotes, Current, Acc, Groups) -> + case Quotes of + true -> + lexer(Rest, Quotes, [$\s|Current], Acc, Groups); + false -> + Part = list_to_binary(lists:reverse(Current)), + NAcc = lists:reverse([Part|Acc]), + lexer(Rest, Quotes, [], [], [NAcc|Groups]) + end; +lexer(<<$\", Rest/binary>>, Quotes, Current, Acc, Groups) -> + lexer(Rest, not Quotes, [$\"|Current], Acc, Groups); +lexer(<>, Quotes, Current, Acc, Groups) -> + lexer(Rest, Quotes, [Char|Current], Acc, Groups). --spec parser_keys(Input :: binary()) -> [{Key :: binary(), Val :: binary()}]. -parser_keys(Input) when is_binary(Input) -> - Parts = split_keys(Input), - lists:flatmap(fun(Item) -> +parser_keys(Tokens) -> + maps:from_list(lists:flatmap(fun(Item) -> case binary:split(Item, <<"=">>) of [Key, Val] -> [{Key, Val}]; _ -> [] end - end, Parts). - -%% 将 "key=val,key1=val1"的格式转换成 ["key=val", "key1=val1"] --spec split_keys(Input :: binary()) -> [binary()]. -split_keys(Input) when is_binary(Input) -> - split_keys(Input, false, [], []). -split_keys(<<>>, _Quotes, Current, Acc) -> - Last = list_to_binary(lists:reverse(Current)), - lists:reverse([Last|Acc]); -split_keys(<<$,, Rest/binary>>, Quotes, Current, Acc) -> - case Quotes of - true -> - split_keys(Rest, Quotes, [$,|Current], Acc); - false -> - Part = list_to_binary(lists:reverse(Current)), - split_keys(Rest, Quotes, [], [Part|Acc]) - end; -split_keys(<<$\", Rest/binary>>, Quotes, Current, Acc) -> - split_keys(Rest, not Quotes, [$\"|Current], Acc); -split_keys(<>, Quotes, Current, Acc) -> - split_keys(Rest, Quotes, [Char|Current], Acc). \ No newline at end of file + end, Tokens)). \ No newline at end of file