diff --git a/apps/efka/src/efka_agent.erl b/apps/efka/src/efka_agent.erl index 42afd69..b96c621 100644 --- a/apps/efka/src/efka_agent.erl +++ b/apps/efka/src/efka_agent.erl @@ -2,25 +2,24 @@ %%% @author anlicheng %%% @copyright (C) 2025, %%% @doc -%%% 需要支持 云服务离线时候的数据暂存 +%%% %%% @end -%%% Created : 06. 5月 2025 00:01 +%%% Created : 21. 5月 2025 18:38 %%%------------------------------------------------------------------- -module(efka_agent). -author("anlicheng"). -include("message_pb.hrl"). --include("efka_tables.hrl"). -include("efka.hrl"). +-include("efka_tables.hrl"). --behaviour(gen_server). +-behaviour(gen_statem). %% API -export([start_link/0]). --export([metric_data/3, event/3, ping/13]). --export([request_service_config/2, await_reply/2]). +-export([metric_data/3, event/3, ping/13, request_service_config/2, await_reply/2]). -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). +%% gen_statem callbacks +-export([init/1, handle_event/4, terminate/3, code_change/4, callback_mode/0]). -define(SERVER, ?MODULE). @@ -28,7 +27,6 @@ -define(STATE_DENIED, denied). -define(STATE_CONNECTING, connecting). -define(STATE_AUTH, auth). - %% 不能推送消息到服务,但是可以接受服务器的部分指令 -define(STATE_RESTRICTED, restricted). %% 激活状态下 @@ -36,9 +34,10 @@ -record(state, { transport_pid :: undefined | pid(), - status = ?STATE_DENIED, - %% 映射关系 #{Ref => PacketId} - inflight = #{} + %% 服务器端推送的消息的未确认列表, 映射关系 #{Ref => PacketId} + push_inflight = #{}, + %% 发送的请求的未确认列表, 映射关系 #{Ref => ReceiverPid} + request_inflight = #{} }). %%%=================================================================== @@ -48,101 +47,102 @@ %% 发送数据 -spec metric_data(ServiceId :: binary(), DeviceUUID::binary(), LineProtocolData :: binary()) -> no_return(). metric_data(ServiceId, DeviceUUID, LineProtocolData) when is_binary(ServiceId), is_binary(DeviceUUID), is_binary(LineProtocolData) -> - gen_server:cast(?SERVER, {metric_data, ServiceId, DeviceUUID, LineProtocolData}). + gen_statem:cast(?SERVER, {metric_data, ServiceId, DeviceUUID, LineProtocolData}). -spec event(ServiceId :: binary(), EventType :: integer(), Params :: binary()) -> no_return(). event(ServiceId, EventType, Params) when is_binary(ServiceId), is_integer(EventType), is_binary(Params) -> - gen_server:cast(?SERVER, {event, ServiceId, EventType, Params}). + gen_statem:cast(?SERVER, {event, ServiceId, EventType, Params}). ping(AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces) -> - gen_server:cast(?SERVER, {ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces}). + gen_statem:cast(?SERVER, {ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces}). %% 请求微服务的配置 -spec request_service_config(ReceiverPid :: pid(), ServiceId :: binary()) -> {ok, Ref :: reference()} | {error, Reason :: term()}. request_service_config(ReceiverPid, ServiceId) when is_binary(ServiceId) -> - gen_server:call(?SERVER, {request_service_config, ReceiverPid, ServiceId}). + gen_statem:call(?SERVER, {request_service_config, ReceiverPid, ServiceId}). %% 等待消息的回复 -spec await_reply(Ref :: reference(), Timeout :: timeout()) -> {ok, Reply :: binary()} | {error, timeout}. await_reply(Ref, Timeout) when is_reference(Ref), is_integer(Timeout) -> receive - {transport_reply, Ref, ReplyBin} -> + {request_reply, Ref, ReplyBin} -> {ok, ReplyBin} after Timeout -> {error, timeout} end. -%% @doc Spawns the server and registers the local name (unique) --spec(start_link() -> - {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +%% @doc Creates a gen_statem process which calls Module:init/1 to +%% initialize. To ensure a synchronized start-up procedure, this +%% function does not return until Module:init/1 has returned. start_link() -> - gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + gen_statem:start_link({local, ?SERVER}, ?MODULE, [], []). %%%=================================================================== -%%% gen_server callbacks +%%% gen_statem callbacks %%%=================================================================== %% @private -%% @doc Initializes the server --spec(init(Args :: term()) -> - {ok, State :: #state{}} | {ok, State :: #state{}, timeout() | hibernate} | - {stop, Reason :: term()} | ignore). +%% @doc Whenever a gen_statem is started using gen_statem:start/[3,4] or +%% gen_statem:start_link/[3,4], this function is called by the new +%% process to initialize. init([]) -> erlang:process_flag(trap_exit, true), erlang:start_timer(0, self(), create_transport), - {ok, #state{status = ?STATE_DENIED}}. + {ok, ?STATE_DENIED, #state{}}. %% @private -%% @doc Handling call messages --spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, - State :: #state{}) -> - {reply, Reply :: term(), NewState :: #state{}} | - {reply, Reply :: term(), NewState :: #state{}, timeout() | hibernate} | - {noreply, NewState :: #state{}} | - {noreply, NewState :: #state{}, timeout() | hibernate} | - {stop, Reason :: term(), Reply :: term(), NewState :: #state{}} | - {stop, Reason :: term(), NewState :: #state{}}). -handle_call({request_service_config, ReceiverPid, ServiceId}, _From, State = #state{transport_pid = TransportPid}) -> - case is_pid(TransportPid) andalso is_process_alive(TransportPid) of - true -> - Ref = efka_transport:request(TransportPid, ReceiverPid, ?METHOD_REQUEST_SERVICE_CONFIG, ServiceId), - {reply, {ok, Ref}, State}; - false -> - {reply, {error, <<"transport is not alive">>}, State} - end. +%% @doc This function is called by a gen_statem when it needs to find out +%% the callback mode of the callback module. +callback_mode() -> + handle_event_function. %% @private -%% @doc Handling cast messages --spec(handle_cast(Request :: term(), State :: #state{}) -> - {noreply, NewState :: #state{}} | - {noreply, NewState :: #state{}, timeout() | hibernate} | - {stop, Reason :: term(), NewState :: #state{}}). +%% @doc If callback_mode is handle_event_function, then whenever a +%% gen_statem receives an event from call/2, cast/2, or as a normal +%% process message, this function is called. +handle_event({call, From}, {request_service_config, ReceiverPid, ServiceId}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid, request_inflight = RequestInflight}) -> + Ref = efka_transport:request(TransportPid, ?METHOD_REQUEST_SERVICE_CONFIG, ServiceId), + {keep_state, State#state{request_inflight = maps:put(Ref, ReceiverPid, RequestInflight)}, [{reply, From, {ok, Ref}}]}; -%% 发送数据 -handle_cast({metric_data, ServiceId, DeviceUUID, LineProtocolData}, State) -> +handle_event({call, From}, {request_service_config, _ReceiverPid, _ServiceId}, _, State) -> + {keep_state, State, [{reply, From, {error, <<"transport is not alive">>}}]}; + +handle_event(cast, {metric_data, ServiceId, DeviceUUID, LineProtocolData}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> Packet = message_pb:encode_msg(#data{ service_id = ServiceId, device_uuid = DeviceUUID, metric = LineProtocolData }), - safe_send(?METHOD_DATA, Packet, State), + efka_transport:send(TransportPid, ?METHOD_DATA, Packet), + {keep_state, State}; +handle_event(cast, {metric_data, ServiceId, DeviceUUID, LineProtocolData}, _, State) -> + Packet = message_pb:encode_msg(#data{ + service_id = ServiceId, + device_uuid = DeviceUUID, + metric = LineProtocolData + }), + ok = cache_model:insert(?METHOD_DATA, Packet), + {keep_state, State}; - {noreply, State}; - -%% Event事件 -handle_cast({event, ServiceId, EventType, Params}, State) -> +handle_event(cast, {event, ServiceId, EventType, Params}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> EventPacket = message_pb:encode_msg(#event{ service_id = ServiceId, event_type = EventType, params = Params }), - safe_send(?METHOD_EVENT, EventPacket, State), + efka_transport:send(TransportPid, ?METHOD_EVENT, EventPacket), + {keep_state, State}; +handle_event(cast, {event, ServiceId, EventType, Params}, ?STATE_ACTIVATED, State) -> + EventPacket = message_pb:encode_msg(#event{ + service_id = ServiceId, + event_type = EventType, + params = Params + }), + ok = cache_model:insert(?METHOD_EVENT, EventPacket), + {keep_state, State}; - {noreply, State}; - -%% 处理ping消息 -handle_cast({ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces}, - State = #state{status = Status, transport_pid = TransportPid}) -> +handle_event(cast, {ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces}, ?STATE_ACTIVATED, + State = #state{transport_pid = TransportPid}) -> Ping = message_pb:encode_msg(#ping{ adcode = AdCode, @@ -159,48 +159,31 @@ handle_cast({ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ip memory = Memory, interfaces = Interfaces }), - case Status =:= ?STATE_ACTIVATED andalso is_pid(TransportPid) of - true -> - efka_transport:send(TransportPid, ?METHOD_PING, Ping); - false -> - ok - end, + efka_transport:send(TransportPid, ?METHOD_PING, Ping), + {keep_state, State}; - {noreply, State}; - -handle_cast(_Request, State = #state{}) -> - {noreply, State}. - -%% @private -%% @doc Handling all non call/cast messages --spec(handle_info(Info :: timeout() | term(), State :: #state{}) -> - {noreply, NewState :: #state{}} | - {noreply, NewState :: #state{}, timeout() | hibernate} | - {stop, Reason :: term(), NewState :: #state{}}). -handle_info({timeout, _, create_transport}, State = #state{status = ?STATE_DENIED}) -> +handle_event(info, {timeout, _, create_transport}, ?STATE_ACTIVATED, State) -> {ok, Props} = application:get_env(efka, tls_server), Host = proplists:get_value(host, Props), Port = proplists:get_value(port, Props), {ok, TransportPid} = efka_transport:start_link(self(), Host, Port), efka_transport:connect(TransportPid), - {noreply, State#state{status = ?STATE_CONNECTING, transport_pid = TransportPid}}; + {next_state, ?STATE_CONNECTING, State#state{transport_pid = TransportPid}}; -%% 收到连接回复 -handle_info({connect_reply, Reply}, State = #state{status = ?STATE_CONNECTING, transport_pid = TransportPid}) when is_pid(TransportPid) -> +handle_event(info, {connect_reply, Reply}, ?STATE_CONNECTING, State = #state{transport_pid = TransportPid}) -> case Reply of ok -> AuthBin = auth_request(), efka_transport:auth_request(TransportPid, AuthBin), - {noreply, State#state{status = ?STATE_AUTH}}; + {next_state, ?STATE_AUTH, State}; {error, Reason} -> lager:debug("[efka_agent] connect failed, error: ~p, pid: ~p", [Reason, TransportPid]), efka_transport:stop(TransportPid), - {noreply, State#state{status = ?STATE_DENIED}} + {next_state, ?STATE_DENIED, State} end; -%% 收到auth回复 -handle_info({auth_reply, Reply}, State = #state{status = ?STATE_AUTH, transport_pid = TransportPid}) when is_pid(TransportPid) -> +handle_event(info, {auth_reply, Reply}, ?STATE_AUTH, State = #state{transport_pid = TransportPid}) -> case Reply of {ok, ReplyBin} -> #auth_reply{code = Code, message = Message} = message_pb:decode_msg(ReplyBin, auth_reply), @@ -213,35 +196,34 @@ handle_info({auth_reply, Reply}, State = #state{status = ?STATE_AUTH, transport_ efka_transport:send(TransportPid, Method, Packet), cache_model:delete(Id) end, CacheItems), - - {noreply, State#state{status = ?STATE_ACTIVATED}}; + {next_state, ?STATE_ACTIVATED, State}; 1 -> %% 主机在后台的授权未通过;此时agent不能推送数据给云端服务器,但是云端服务器可以推送命令给agent %% socket的连接状态需要维持 lager:debug("[efka_agent] auth denied, message: ~p", [Message]), - {noreply, State#state{status = ?STATE_RESTRICTED}}; + {next_state, ?STATE_RESTRICTED, State}; 2 -> % 其他类型的错误,需要间隔时间重试 lager:debug("[efka_agent] auth failed, message: ~p", [Message]), efka_transport:stop(TransportPid), - {noreply, State#state{transport_pid = undefined, status = ?STATE_DENIED}}; + {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}}; _ -> % 其他类型的错误,需要间隔时间重试 lager:debug("[efka_agent] auth failed, invalid message"), efka_transport:stop(TransportPid), - {noreply, State#state{transport_pid = undefined, status = ?STATE_DENIED}} + {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}} end; {error, Reason} -> lager:debug("[efka_agent] auth_request failed, error: ~p", [Reason]), efka_transport:stop(TransportPid), - {noreply, State#state{transport_pid = undefined, status = ?STATE_DENIED}} + {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}} end; %% 云端服务器推送了消息 %% 激活消息 %% 微服务部署 -handle_info({server_push, PacketId, <>}, State = #state{transport_pid = TransportPid}) -> +handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> #deploy{task_id = TaskId, service_id = ServiceId, tar_url = TarUrl} = message_pb:decode_msg(DeployBin, deploy), %% 短暂的等待,efka_inetd收到消息后就立即返回了 @@ -253,10 +235,10 @@ handle_info({server_push, PacketId, <>}, State end, efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - {noreply, State}; + {keep_state, State}; %% 启动微服务 -handle_info({server_push, PacketId, <>}, State = #state{transport_pid = TransportPid}) -> +handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> %% 短暂的等待,efka_inetd收到消息后就立即返回了 Reply = case efka_inetd:start_service(ServiceId) of ok -> @@ -266,10 +248,10 @@ handle_info({server_push, PacketId, <>} end, efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - {noreply, State}; + {keep_state, State}; %% 停止微服务 -handle_info({server_push, PacketId, <>}, State = #state{transport_pid = TransportPid}) -> +handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> %% 短暂的等待,efka_inetd收到消息后就立即返回了 Reply = case efka_inetd:stop_service(ServiceId) of ok -> @@ -279,17 +261,17 @@ handle_info({server_push, PacketId, <>}, end, efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - {noreply, State}; + {keep_state, State}; %% config.json配置信息 -handle_info({server_push, PacketId, <>}, State = #state{transport_pid = TransportPid, inflight = Inflight}) -> +handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid, push_inflight = PushInflight}) -> #push_service_config{service_id = ServiceId, config_json = ConfigJson, timeout = Timeout} = message_pb:decode_msg(ConfigBin, push_service_config), case efka_service:get_pid(ServiceId) of undefined -> Reply = #async_call_reply{code = 0, message = <<"service not run">>}, efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - {noreply, State}; + {keep_state, State}; ServicePid when is_pid(ServicePid) -> Ref = make_ref(), %% 将配置文件推送到对应的微服务 @@ -297,29 +279,30 @@ handle_info({server_push, PacketId, <> %% 处理超时逻辑 erlang:start_timer(Timeout, self(), {request_timeout, Ref}), - {noreply, State#state{inflight = maps:put(Ref, PacketId, Inflight)}} + {keep_state, State#state{push_inflight = maps:put(Ref, PacketId, PushInflight)}} end; %% 收到需要回复的指令 -handle_info({server_push, PacketId, <>}, State = #state{status = ?STATE_ACTIVATED, inflight = Inflight}) -> +handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{push_inflight = PushInflight, transport_pid = TransportPid}) -> #invoke{service_id = ServiceId, payload = Payload, timeout = Timeout} = message_pb:decode_msg(InvokeBin, invoke), %% 消息发送到订阅系统 case efka_service:get_pid(ServiceId) of undefined -> Reply = #async_call_reply{code = 0, message = <<"micro_service not run">>}, - safe_async_call_reply(PacketId, message_pb:encode_msg(Reply), State), - {noreply, State}; + efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), + + {keep_state, State}; ServicePid when is_pid(ServicePid) -> Ref = make_ref(), efka_service:invoke(ServicePid, Ref, Payload), %% 处理超时逻辑 erlang:start_timer(Timeout, self(), {request_timeout, Ref}), - {noreply, State#state{inflight = maps:put(Ref, PacketId, Inflight)}} + {keep_state, State#state{push_inflight = maps:put(Ref, PacketId, PushInflight)}} end; %% 处理task_log -handle_info({server_push, PacketId, <>}, State = #state{status = ?STATE_ACTIVATED}) -> +handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> #fetch_task_log{task_id = TaskId} = message_pb:decode_msg(TaskLogBin, fetch_task_log), lager:debug("[efka_agent] get task_log request: ~p", [TaskId]), {ok, Logs} = efka_inetd_task_log:get_logs(TaskId), @@ -330,106 +313,94 @@ handle_info({server_push, PacketId, <>}, St false -> #async_call_reply{code = 1, result = <<"[]">>} end, - safe_async_call_reply(PacketId, message_pb:encode_msg(Reply), State), - {noreply, State}; + efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), + + {keep_state, State}; %% 处理命令 -handle_info({server_command, ?COMMAND_AUTH, <>}, State = #state{transport_pid = TransportPid, status = Status}) -> - case {Auth, Status} of +handle_event(info, {server_command, ?COMMAND_AUTH, <>}, StateName, State = #state{transport_pid = TransportPid}) -> + case {Auth, StateName} of {1, ?STATE_ACTIVATED} -> - {noreply, State}; + {keep_state, State}; {1, ?STATE_DENIED} -> %% 重新激活, 需要重新校验 AuthRequestBin = auth_request(), efka_transport:auth_request(TransportPid, AuthRequestBin), - {noreply, State#state{status = ?STATE_AUTH}}; + {next_state, ?STATE_AUTH, State}; {0, _} -> %% 这个时候的主机应该是受限制的状态,不允许发送消息;但是能够接受服务器推送的消息 - {noreply, State#state{status = ?STATE_RESTRICTED}} + {next_state, ?STATE_RESTRICTED, State} end; %% 收到需要回复的指令 -handle_info({server_pub, Topic, Content}, State = #state{status = ?STATE_ACTIVATED}) -> +handle_event(info, {server_pub, Topic, Content}, ?STATE_ACTIVATED, State) -> lager:debug("[efka_agent] get pub topic: ~p, content: ~p", [Topic, Content]), %% 消息发送到订阅系统 efka_subscription:publish(Topic, Content), - - {noreply, State}; + {keep_state, State}; %% 收到来自efka_service的回复 -handle_info({service_reply, Ref, EmsReply}, State = #state{inflight = Inflight}) -> - case maps:take(Ref, Inflight) of +handle_event(info, {service_reply, Ref, EmsReply}, ?STATE_ACTIVATED, State = #state{push_inflight = PushInflight, transport_pid = TransportPid}) -> + case maps:take(Ref, PushInflight) of error -> - {noreply, State}; - {PacketId, NInflight} -> + {keep_state, State}; + {PacketId, NPushInflight} -> Reply = case EmsReply of {ok, Result} -> #async_call_reply{code = 1, result = Result}; {error, Reason} -> #async_call_reply{code = 0, message = Reason} end, - safe_async_call_reply(PacketId, message_pb:encode_msg(Reply), State), + efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - {noreply, State#state{inflight = NInflight}} + {keep_state, State#state{push_inflight = NPushInflight}} end; -%% 请求超时逻辑处理 -handle_info({timeout, _, {request_timeout, Ref}}, State = #state{inflight = Inflight}) -> - case maps:take(Ref, Inflight) of +%% 收到来自服务器端的回复 +handle_event(info, {server_reply, Ref, ReplyBin}, ?STATE_ACTIVATED, State = #state{request_inflight = RequestInflight}) -> + case maps:take(Ref, RequestInflight) of error -> - {noreply, State}; - {PacketId, NInflight} -> - Reply = #async_call_reply{code = 0, message = <<"reqeust timeout">>, result = <<>>}, - safe_async_call_reply(PacketId, message_pb:encode_msg(Reply), State), + {keep_state, State}; + {ReceiverPid, NRequestInflight} -> + is_process_alive(ReceiverPid) andalso erlang:send(ReceiverPid, {request_reply, Ref, ReplyBin}), + {keep_state, State#state{push_inflight = NRequestInflight}} + end; - {noreply, State#state{inflight = NInflight}} +%% todo 请求超时逻辑处理 +handle_event(info, {timeout, _, {request_timeout, Ref}}, ?STATE_ACTIVATED, State = #state{push_inflight = PushInflight, transport_pid = TransportPid}) -> + case maps:take(Ref, PushInflight) of + error -> + {keep_state, State}; + {PacketId, NPushInflight} -> + Reply = #async_call_reply{code = 0, message = <<"reqeust timeout">>, result = <<>>}, + efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), + + {keep_state, State#state{push_inflight = NPushInflight}} end; %% transport进程退出 -handle_info({'EXIT', TransportPid, Reason}, State = #state{transport_pid = TransportPid}) -> +handle_event(info, {'EXIT', TransportPid, Reason}, _, State = #state{transport_pid = TransportPid}) -> lager:debug("[efka_agent] transport pid: ~p, exit with reason: ~p", [TransportPid, Reason]), erlang:start_timer(5000, self(), create_transport), - {noreply, State#state{transport_pid = undefined, status = ?STATE_DENIED}}; - -handle_info(_Info, State = #state{}) -> - {noreply, State}. + {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}}. %% @private -%% @doc This function is called by a gen_server when it is about to +%% @doc This function is called by a gen_statem when it is about to %% terminate. It should be the opposite of Module:init/1 and do any -%% necessary cleaning up. When it returns, the gen_server terminates -%% with Reason. The return value is ignored. --spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), - State :: #state{}) -> term()). -terminate(_Reason, _State = #state{}) -> +%% necessary cleaning up. When it returns, the gen_statem terminates with +%% Reason. The return value is ignored. +terminate(_Reason, _StateName, _State = #state{}) -> ok. %% @private %% @doc Convert process state when code is changed --spec(code_change(OldVsn :: term() | {down, term()}, State :: #state{}, - Extra :: term()) -> - {ok, NewState :: #state{}} | {error, Reason :: term()}). -code_change(_OldVsn, State = #state{}, _Extra) -> - {ok, State}. +code_change(_OldVsn, StateName, State = #state{}, _Extra) -> + {ok, StateName, State}. %%%=================================================================== %%% Internal functions %%%=================================================================== -%% 安全回复 --spec safe_async_call_reply(PacketId :: integer(), Reply :: binary(), State :: #state{}) -> no_return(). -safe_async_call_reply(PacketId, Reply, #state{status = ?STATE_ACTIVATED, transport_pid = TransportPid}) when is_integer(PacketId), is_binary(Reply), is_pid(TransportPid) -> - is_process_alive(TransportPid) andalso efka_transport:async_call_reply(TransportPid, PacketId, Reply); -safe_async_call_reply(_PacketId, _Reply, #state{}) -> - ok. - -%% 当连接正常的时候发送,否则暂存数据 --spec safe_send(Method :: integer(), Packet :: binary(), State :: #state{}) -> no_return(). -safe_send(Method, Packet, #state{status = ?STATE_ACTIVATED, transport_pid = TransportPid}) when is_pid(TransportPid) -> - efka_transport:send(TransportPid, Method, Packet); -safe_send(Method, Packet, #state{}) -> - ok = cache_model:insert(Method, Packet). - -spec auth_request() -> binary(). auth_request() -> {ok, AuthInfo} = application:get_env(efka, auth), diff --git a/apps/efka/src/efka_agent2.erl b/apps/efka/src/efka_agent2.erl deleted file mode 100644 index 55e931b..0000000 --- a/apps/efka/src/efka_agent2.erl +++ /dev/null @@ -1,418 +0,0 @@ -%%%------------------------------------------------------------------- -%%% @author anlicheng -%%% @copyright (C) 2025, -%%% @doc -%%% -%%% @end -%%% Created : 21. 5月 2025 18:38 -%%%------------------------------------------------------------------- --module(efka_agent2). --author("anlicheng"). --include("message_pb.hrl"). --include("efka.hrl"). --include("efka_tables.hrl"). - --behaviour(gen_statem). - -%% API --export([start_link/0]). --export([metric_data/3, event/3, ping/13, request_service_config/2, await_reply/2]). - -%% gen_statem callbacks --export([init/1, handle_event/4, terminate/3, code_change/4, callback_mode/0]). - --define(SERVER, ?MODULE). - -%% 标记当前agent的状态,只有在 activated 状态下才可以正常的发送数据 --define(STATE_DENIED, denied). --define(STATE_CONNECTING, connecting). --define(STATE_AUTH, auth). -%% 不能推送消息到服务,但是可以接受服务器的部分指令 --define(STATE_RESTRICTED, restricted). -%% 激活状态下 --define(STATE_ACTIVATED, activated). - --record(state, { - transport_pid :: undefined | pid(), - %% 服务器端推送的消息的未确认列表, 映射关系 #{Ref => PacketId} - push_inflight = #{}, - %% 发送的请求的未确认列表, 映射关系 #{Ref => ReceiverPid} - request_inflight = #{} -}). - -%%%=================================================================== -%%% API -%%%=================================================================== - -%% 发送数据 --spec metric_data(ServiceId :: binary(), DeviceUUID::binary(), LineProtocolData :: binary()) -> no_return(). -metric_data(ServiceId, DeviceUUID, LineProtocolData) when is_binary(ServiceId), is_binary(DeviceUUID), is_binary(LineProtocolData) -> - gen_statem:cast(?SERVER, {metric_data, ServiceId, DeviceUUID, LineProtocolData}). - --spec event(ServiceId :: binary(), EventType :: integer(), Params :: binary()) -> no_return(). -event(ServiceId, EventType, Params) when is_binary(ServiceId), is_integer(EventType), is_binary(Params) -> - gen_statem:cast(?SERVER, {event, ServiceId, EventType, Params}). - -ping(AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces) -> - gen_statem:cast(?SERVER, {ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces}). - -%% 请求微服务的配置 --spec request_service_config(ReceiverPid :: pid(), ServiceId :: binary()) -> {ok, Ref :: reference()} | {error, Reason :: term()}. -request_service_config(ReceiverPid, ServiceId) when is_binary(ServiceId) -> - gen_statem:call(?SERVER, {request_service_config, ReceiverPid, ServiceId}). - -%% 等待消息的回复 --spec await_reply(Ref :: reference(), Timeout :: timeout()) -> {ok, Reply :: binary()} | {error, timeout}. -await_reply(Ref, Timeout) when is_reference(Ref), is_integer(Timeout) -> - receive - {request_reply, Ref, ReplyBin} -> - {ok, ReplyBin} - after Timeout -> - {error, timeout} - end. - -%% @doc Creates a gen_statem process which calls Module:init/1 to -%% initialize. To ensure a synchronized start-up procedure, this -%% function does not return until Module:init/1 has returned. -start_link() -> - gen_statem:start_link({local, ?SERVER}, ?MODULE, [], []). - -%%%=================================================================== -%%% gen_statem callbacks -%%%=================================================================== - -%% @private -%% @doc Whenever a gen_statem is started using gen_statem:start/[3,4] or -%% gen_statem:start_link/[3,4], this function is called by the new -%% process to initialize. -init([]) -> - erlang:process_flag(trap_exit, true), - erlang:start_timer(0, self(), create_transport), - {ok, ?STATE_DENIED, #state{}}. - -%% @private -%% @doc This function is called by a gen_statem when it needs to find out -%% the callback mode of the callback module. -callback_mode() -> - handle_event_function. - -%% @private -%% @doc If callback_mode is handle_event_function, then whenever a -%% gen_statem receives an event from call/2, cast/2, or as a normal -%% process message, this function is called. -handle_event({call, From}, {request_service_config, ReceiverPid, ServiceId}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid, request_inflight = RequestInflight}) -> - Ref = efka_transport:request(TransportPid, ?METHOD_REQUEST_SERVICE_CONFIG, ServiceId), - {keep_state, State#state{request_inflight = maps:put(Ref, ReceiverPid, RequestInflight)}, [{reply, From, {ok, Ref}}]}; - -handle_event({call, From}, {request_service_config, _ReceiverPid, _ServiceId}, _, State) -> - {keep_state, State, [{reply, From, {error, <<"transport is not alive">>}}]}; - -handle_event(cast, {metric_data, ServiceId, DeviceUUID, LineProtocolData}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> - Packet = message_pb:encode_msg(#data{ - service_id = ServiceId, - device_uuid = DeviceUUID, - metric = LineProtocolData - }), - efka_transport:send(TransportPid, ?METHOD_DATA, Packet), - {keep_state, State}; -handle_event(cast, {metric_data, ServiceId, DeviceUUID, LineProtocolData}, _, State) -> - Packet = message_pb:encode_msg(#data{ - service_id = ServiceId, - device_uuid = DeviceUUID, - metric = LineProtocolData - }), - ok = cache_model:insert(?METHOD_DATA, Packet), - {keep_state, State}; - -handle_event(cast, {event, ServiceId, EventType, Params}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> - EventPacket = message_pb:encode_msg(#event{ - service_id = ServiceId, - event_type = EventType, - params = Params - }), - efka_transport:send(TransportPid, ?METHOD_EVENT, EventPacket), - {keep_state, State}; -handle_event(cast, {event, ServiceId, EventType, Params}, ?STATE_ACTIVATED, State) -> - EventPacket = message_pb:encode_msg(#event{ - service_id = ServiceId, - event_type = EventType, - params = Params - }), - ok = cache_model:insert(?METHOD_EVENT, EventPacket), - {keep_state, State}; - -handle_event(cast, {ping, AdCode, BootTime, Province, City, EfkaVersion, KernelArch, Ips, CpuCore, CpuLoad, CpuTemperature, Disk, Memory, Interfaces}, ?STATE_ACTIVATED, - State = #state{transport_pid = TransportPid}) -> - - Ping = message_pb:encode_msg(#ping{ - adcode = AdCode, - boot_time = BootTime, - province = Province, - city = City, - efka_version = EfkaVersion, - kernel_arch = KernelArch, - ips = Ips, - cpu_core = CpuCore, - cpu_load = CpuLoad, - cpu_temperature = CpuTemperature, - disk = Disk, - memory = Memory, - interfaces = Interfaces - }), - efka_transport:send(TransportPid, ?METHOD_PING, Ping), - {keep_state, State}; - -handle_event(info, {timeout, _, create_transport}, ?STATE_ACTIVATED, State) -> - {ok, Props} = application:get_env(efka, tls_server), - Host = proplists:get_value(host, Props), - Port = proplists:get_value(port, Props), - {ok, TransportPid} = efka_transport:start_link(self(), Host, Port), - efka_transport:connect(TransportPid), - - {next_state, ?STATE_CONNECTING, State#state{transport_pid = TransportPid}}; - -handle_event(info, {connect_reply, Reply}, ?STATE_CONNECTING, State = #state{transport_pid = TransportPid}) -> - case Reply of - ok -> - AuthBin = auth_request(), - efka_transport:auth_request(TransportPid, AuthBin), - {next_state, ?STATE_AUTH, State}; - {error, Reason} -> - lager:debug("[efka_agent] connect failed, error: ~p, pid: ~p", [Reason, TransportPid]), - efka_transport:stop(TransportPid), - {next_state, ?STATE_DENIED, State} - end; - -handle_event(info, {auth_reply, Reply}, ?STATE_AUTH, State = #state{transport_pid = TransportPid}) -> - case Reply of - {ok, ReplyBin} -> - #auth_reply{code = Code, message = Message} = message_pb:decode_msg(ReplyBin, auth_reply), - case Code of - 0 -> - lager:debug("[efka_agent] auth success, message: ~p", [Message]), - %% 上传缓冲区里面的所有数据 - CacheItems = cache_model:get_all_cache(), - lists:foreach(fun(#cache{id = Id, method = Method, data = Packet}) -> - efka_transport:send(TransportPid, Method, Packet), - cache_model:delete(Id) - end, CacheItems), - {next_state, ?STATE_ACTIVATED, State}; - 1 -> - %% 主机在后台的授权未通过;此时agent不能推送数据给云端服务器,但是云端服务器可以推送命令给agent - %% socket的连接状态需要维持 - lager:debug("[efka_agent] auth denied, message: ~p", [Message]), - {next_state, ?STATE_RESTRICTED, State}; - 2 -> - % 其他类型的错误,需要间隔时间重试 - lager:debug("[efka_agent] auth failed, message: ~p", [Message]), - efka_transport:stop(TransportPid), - {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}}; - _ -> - % 其他类型的错误,需要间隔时间重试 - lager:debug("[efka_agent] auth failed, invalid message"), - efka_transport:stop(TransportPid), - {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}} - end; - {error, Reason} -> - lager:debug("[efka_agent] auth_request failed, error: ~p", [Reason]), - efka_transport:stop(TransportPid), - {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}} - end; - -%% 云端服务器推送了消息 -%% 激活消息 - -%% 微服务部署 -handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> - #deploy{task_id = TaskId, service_id = ServiceId, tar_url = TarUrl} = message_pb:decode_msg(DeployBin, deploy), - - %% 短暂的等待,efka_inetd收到消息后就立即返回了 - Reply = case efka_inetd:deploy(TaskId, ServiceId, TarUrl) of - ok -> - #async_call_reply{code = 1, result = <<"ok">>}; - {error, Reason} when is_binary(Reason) -> - #async_call_reply{code = 0, message = Reason} - end, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State}; - -%% 启动微服务 -handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> - %% 短暂的等待,efka_inetd收到消息后就立即返回了 - Reply = case efka_inetd:start_service(ServiceId) of - ok -> - #async_call_reply{code = 1, result = <<"ok">>}; - {error, Reason} when is_binary(Reason) -> - #async_call_reply{code = 0, message = Reason} - end, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State}; - -%% 停止微服务 -handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> - %% 短暂的等待,efka_inetd收到消息后就立即返回了 - Reply = case efka_inetd:stop_service(ServiceId) of - ok -> - #async_call_reply{code = 1, result = <<"ok">>}; - {error, Reason} when is_binary(Reason) -> - #async_call_reply{code = 0, message = Reason} - end, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State}; - -%% config.json配置信息 -handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid, push_inflight = PushInflight}) -> - #push_service_config{service_id = ServiceId, config_json = ConfigJson, timeout = Timeout} = message_pb:decode_msg(ConfigBin, push_service_config), - - case efka_service:get_pid(ServiceId) of - undefined -> - Reply = #async_call_reply{code = 0, message = <<"service not run">>}, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - {keep_state, State}; - ServicePid when is_pid(ServicePid) -> - Ref = make_ref(), - %% 将配置文件推送到对应的微服务 - efka_service:push_config(ServicePid, Ref, ConfigJson), - %% 处理超时逻辑 - erlang:start_timer(Timeout, self(), {request_timeout, Ref}), - - {keep_state, State#state{push_inflight = maps:put(Ref, PacketId, PushInflight)}} - end; - -%% 收到需要回复的指令 -handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{push_inflight = PushInflight, transport_pid = TransportPid}) -> - #invoke{service_id = ServiceId, payload = Payload, timeout = Timeout} = message_pb:decode_msg(InvokeBin, invoke), - %% 消息发送到订阅系统 - case efka_service:get_pid(ServiceId) of - undefined -> - Reply = #async_call_reply{code = 0, message = <<"micro_service not run">>}, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State}; - ServicePid when is_pid(ServicePid) -> - Ref = make_ref(), - efka_service:invoke(ServicePid, Ref, Payload), - %% 处理超时逻辑 - erlang:start_timer(Timeout, self(), {request_timeout, Ref}), - - {keep_state, State#state{push_inflight = maps:put(Ref, PacketId, PushInflight)}} - end; - -%% 处理task_log -handle_event(info, {server_push, PacketId, <>}, ?STATE_ACTIVATED, State = #state{transport_pid = TransportPid}) -> - #fetch_task_log{task_id = TaskId} = message_pb:decode_msg(TaskLogBin, fetch_task_log), - lager:debug("[efka_agent] get task_log request: ~p", [TaskId]), - {ok, Logs} = efka_inetd_task_log:get_logs(TaskId), - Reply = case length(Logs) > 0 of - true -> - Result = iolist_to_binary(jiffy:encode(Logs, [force_utf8])), - #async_call_reply{code = 1, result = Result}; - false -> - #async_call_reply{code = 1, result = <<"[]">>} - end, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State}; - -%% 处理命令 -handle_event(info, {server_command, ?COMMAND_AUTH, <>}, StateName, State = #state{transport_pid = TransportPid}) -> - case {Auth, StateName} of - {1, ?STATE_ACTIVATED} -> - {keep_state, State}; - {1, ?STATE_DENIED} -> - %% 重新激活, 需要重新校验 - AuthRequestBin = auth_request(), - efka_transport:auth_request(TransportPid, AuthRequestBin), - {next_state, ?STATE_AUTH, State}; - {0, _} -> - %% 这个时候的主机应该是受限制的状态,不允许发送消息;但是能够接受服务器推送的消息 - {next_state, ?STATE_RESTRICTED, State} - end; - -%% 收到需要回复的指令 -handle_event(info, {server_pub, Topic, Content}, ?STATE_ACTIVATED, State) -> - lager:debug("[efka_agent] get pub topic: ~p, content: ~p", [Topic, Content]), - %% 消息发送到订阅系统 - efka_subscription:publish(Topic, Content), - {keep_state, State}; - -%% 收到来自efka_service的回复 -handle_event(info, {service_reply, Ref, EmsReply}, ?STATE_ACTIVATED, State = #state{push_inflight = PushInflight, transport_pid = TransportPid}) -> - case maps:take(Ref, PushInflight) of - error -> - {keep_state, State}; - {PacketId, NPushInflight} -> - Reply = case EmsReply of - {ok, Result} -> - #async_call_reply{code = 1, result = Result}; - {error, Reason} -> - #async_call_reply{code = 0, message = Reason} - end, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State#state{push_inflight = NPushInflight}} - end; - -%% 收到来自服务器端的回复 -handle_event(info, {server_reply, Ref, ReplyBin}, ?STATE_ACTIVATED, State = #state{request_inflight = RequestInflight}) -> - case maps:take(Ref, RequestInflight) of - error -> - {keep_state, State}; - {ReceiverPid, NRequestInflight} -> - is_process_alive(ReceiverPid) andalso erlang:send(ReceiverPid, {request_reply, Ref, ReplyBin}), - {keep_state, State#state{push_inflight = NRequestInflight}} - end; - -%% todo 请求超时逻辑处理 -handle_event(info, {timeout, _, {request_timeout, Ref}}, ?STATE_ACTIVATED, State = #state{push_inflight = PushInflight, transport_pid = TransportPid}) -> - case maps:take(Ref, PushInflight) of - error -> - {keep_state, State}; - {PacketId, NPushInflight} -> - Reply = #async_call_reply{code = 0, message = <<"reqeust timeout">>, result = <<>>}, - efka_transport:async_call_reply(TransportPid, PacketId, message_pb:encode_msg(Reply)), - - {keep_state, State#state{push_inflight = NPushInflight}} - end; - -%% transport进程退出 -handle_event(info, {'EXIT', TransportPid, Reason}, _, State = #state{transport_pid = TransportPid}) -> - lager:debug("[efka_agent] transport pid: ~p, exit with reason: ~p", [TransportPid, Reason]), - erlang:start_timer(5000, self(), create_transport), - {next_state, ?STATE_DENIED, State#state{transport_pid = undefined}}. - -%% @private -%% @doc This function is called by a gen_statem when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any -%% necessary cleaning up. When it returns, the gen_statem terminates with -%% Reason. The return value is ignored. -terminate(_Reason, _StateName, _State = #state{}) -> - ok. - -%% @private -%% @doc Convert process state when code is changed -code_change(_OldVsn, StateName, State = #state{}, _Extra) -> - {ok, StateName, State}. - -%%%=================================================================== -%%% Internal functions -%%%=================================================================== - --spec auth_request() -> binary(). -auth_request() -> - {ok, AuthInfo} = application:get_env(efka, auth), - UUID = proplists:get_value(uuid, AuthInfo), - Username = proplists:get_value(username, AuthInfo), - Salt = proplists:get_value(salt, AuthInfo), - Token = proplists:get_value(token, AuthInfo), - - message_pb:encode_msg(#auth_request{ - uuid = unicode:characters_to_binary(UUID), - username = unicode:characters_to_binary(Username), - salt = unicode:characters_to_binary(Salt), - token = unicode:characters_to_binary(Token), - timestamp = efka_util:timestamp() - }). \ No newline at end of file