From 6c96c18332221b536da9474e498fc525421b1f67 Mon Sep 17 00:00:00 2001 From: anlicheng <244108715@qq.com> Date: Tue, 20 May 2025 10:53:55 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AE=80=E5=8C=96service=E7=9A=84=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/efka/src/efka_inetd.erl | 40 +++++++- apps/efka/src/efka_manifest.erl | 3 +- apps/efka/src/efka_service.erl | 136 +++++++------------------ apps/efka/src/efka_service_sup.erl | 9 +- apps/efka/src/mnesia/service_model.erl | 17 +++- 5 files changed, 101 insertions(+), 104 deletions(-) diff --git a/apps/efka/src/efka_inetd.erl b/apps/efka/src/efka_inetd.erl index e8f0240..ba08c9c 100644 --- a/apps/efka/src/efka_inetd.erl +++ b/apps/efka/src/efka_inetd.erl @@ -17,7 +17,7 @@ %% API -export([start_link/0]). --export([deploy/3]). +-export([deploy/3, start_service/1, stop_service/1]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -38,6 +38,14 @@ deploy(TaskId, ServerId, TarUrl) when is_integer(TaskId), is_binary(ServerId), is_binary(TarUrl) -> gen_server:call(?SERVER, {deploy, TaskId, ServerId, TarUrl}). +-spec start_service(ServiceId :: binary()) -> ok | {error, Reason :: term()}. +start_service(ServiceId) when is_binary(ServiceId) -> + gen_server:call(?SERVER, {start_service, ServiceId}). + +-spec stop_service(ServiceId :: binary()) -> ok | {error, Reason :: term()}. +stop_service(ServiceId) when is_binary(ServiceId) -> + gen_server:call(?SERVER, {stop_service, ServiceId}). + %% @doc Spawns the server and registers the local name (unique) -spec(start_link() -> {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). @@ -73,6 +81,8 @@ handle_call({deploy, TaskId, ServiceId, TarUrl}, _From, State = #state{root_dir {ok, ServiceRootDir} = ensure_dirs(RootDir, ServiceId), ServicePid = efka_service:get_pid(ServiceId), + lager:debug("service pid is: ~p", [ServicePid]), + case is_pid(ServicePid) andalso efka_service:is_running(ServicePid) of true -> {reply, {error, <<"the service is running, stop first">>}, State}; @@ -90,6 +100,34 @@ handle_call({deploy, TaskId, ServiceId, TarUrl}, _From, State = #state{root_dir end end; +%% 启动服务: 当前服务如果正常运行,则不允许重启 +handle_call({start_service, ServiceId}, _From, State) -> + case efka_service:get_pid(ServiceId) of + undefined -> + case efka_service_sup:start_service(ServiceId) of + {ok, _} -> + %% 更新数据库状态, 状态是为了保证下次efka重启的时候,服务能够启动 + ok = service_model:change_status(ServiceId, 1), + {reply, ok, State}; + {error, Reason} -> + {reply, {error, Reason}, State} + end; + ServicePid when is_pid(ServicePid) -> + {reply, {error, <<"service is running">>}, State} + end; + +%% 停止服务, 主动停止的时候会改变服务配置的status字段 +handle_call({stop_service, ServiceId}, _From, State = #state{}) -> + case efka_service:get_pid(ServiceId) of + undefined -> + {reply, {error, <<"service not running">>}, State}; + ServicePid -> + efka_service_sup:delete_service(ServicePid), + %% 主动停止的服务,需要更新数据库状态, 状态是为了保证下次efka重启的时候,不自动启动服务 + ok = service_model:change_status(ServiceId, 0), + {reply, ok, State} + end; + handle_call(_Request, _From, State = #state{}) -> {reply, ok, State}. diff --git a/apps/efka/src/efka_manifest.erl b/apps/efka/src/efka_manifest.erl index d7869ec..f4db6ce 100644 --- a/apps/efka/src/efka_manifest.erl +++ b/apps/efka/src/efka_manifest.erl @@ -41,7 +41,7 @@ new(ServiceRootDir) when is_list(ServiceRootDir) -> end. -spec startup(Manifest :: #manifest{}) -> {ok, Port :: port()} | {error, Reason :: binary()}. -startup(#manifest{work_dir = WorkDir, exec = ExecCmd0, args = Args0}) -> +startup(#manifest{id = Id, work_dir = WorkDir, exec = ExecCmd0, args = Args0}) -> PortSettings = [ {cd, WorkDir}, {args, [binary_to_list(A) || A <- Args0]}, @@ -49,6 +49,7 @@ startup(#manifest{work_dir = WorkDir, exec = ExecCmd0, args = Args0}) -> ], ExecCmd = binary_to_list(ExecCmd0), RealExecCmd = filename:absname_join(WorkDir, ExecCmd), + lager:debug("[efka_manifest] service_id: ~p, real command is: ~p", [Id, RealExecCmd]), case catch erlang:open_port({spawn_executable, RealExecCmd}, PortSettings) of Port when is_port(Port) -> {ok, Port}; diff --git a/apps/efka/src/efka_service.erl b/apps/efka/src/efka_service.erl index 55591bd..c38712a 100644 --- a/apps/efka/src/efka_service.erl +++ b/apps/efka/src/efka_service.erl @@ -13,14 +13,10 @@ -behaviour(gen_server). -%% 当前微服务的状态 --define(STATUS_STOPPED, stopped). --define(STATUS_RUNNING, running). - %% API -export([start_link/2]). -export([get_name/1, get_pid/1, start_service/1, stop_service/1, attach_channel/2]). --export([push_config/3, request_config/1, invoke/3, is_running/1]). +-export([push_config/3, request_config/1, invoke/3]). -export([metric_data/3, send_event/3]). %% gen_server callbacks @@ -36,9 +32,7 @@ os_pid :: undefined | integer(), %% 配置信息 manifest :: undefined | efka_manifest:manifest(), - inflight = #{}, - %% 当前服务的运行状态 - running_status = ?STATUS_STOPPED + inflight = #{} }). %%%=================================================================== @@ -59,10 +53,6 @@ push_config(Pid, Ref, ConfigJson) when is_pid(Pid), is_binary(ConfigJson) -> invoke(Pid, Ref, Payload) when is_pid(Pid), is_reference(Ref), is_binary(Payload) -> gen_server:cast(Pid, {invoke, Ref, self(), Payload}). --spec is_running(Pid :: pid()) -> boolean(). -is_running(Pid) when is_pid(Pid) -> - gen_server:call(Pid, is_running). - request_config(Pid) when is_pid(Pid) -> gen_server:call(Pid, request_config). @@ -104,31 +94,25 @@ init([ServiceId]) -> error -> lager:notice("[efka_service] service_id: ~p, not found", [ServiceId]), ignore; - {ok, Service = #service{root_dir = RootDir}} -> + {ok, #service{root_dir = RootDir}} -> + %% 第一次启动,要求必须成功;只有第一次启动成功,后续的重启逻辑才有意义 case efka_manifest:new(RootDir) of {ok, Manifest} -> - init0(Service, Manifest); + case efka_manifest:startup(Manifest) of + {ok, Port} -> + {os_pid, OSPid} = erlang:port_info(Port, os_pid), + lager:debug("[efka_service] service: ~p, port: ~p, boot_service success os_pid: ~p", [ServiceId, Port, OSPid]), + {ok, #state{service_id = ServiceId, manifest = Manifest, port = Port, os_pid = OSPid}}; + {error, Reason} -> + lager:debug("[efka_service] service: ~p, boot_service get error: ~p", [ServiceId, Reason]), + {stop, Reason} + end; {error, Reason} -> lager:notice("[efka_service] service: ~p, read manifest.json get error: ~p", [ServiceId, Reason]), ignore end end. -init0(#service{service_id = ServiceId, status = 1}, Manifest) -> - %% 数据的状态和运行状态是2回事 - case efka_manifest:startup(Manifest) of - {ok, Port} -> - {os_pid, OSPid} = erlang:port_info(Port, os_pid), - lager:debug("[efka_service] service: ~p, port: ~p, boot_service success os_pid: ~p", [ServiceId, Port, OSPid]), - {ok, #state{service_id = ServiceId, manifest = Manifest, running_status = ?STATUS_RUNNING, port = Port, os_pid = OSPid}}; - {error, Reason} -> - lager:debug("[efka_service] service: ~p, boot_service get error: ~p", [ServiceId, Reason]), - {ok, #state{service_id = ServiceId, manifest = Manifest, running_status = ?STATUS_STOPPED, port = undefined, os_pid = undefined}} - end; -init0(#service{service_id = ServiceId, status = 0}, Manifest) -> - lager:debug("[efka_service] service: ~p current status is 0, not boot", [ServiceId]), - {ok, #state{service_id = ServiceId, manifest = Manifest, running_status = ?STATUS_STOPPED, port = undefined, os_pid = undefined}}. - %% @private %% @doc Handling call messages -spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, @@ -140,59 +124,20 @@ init0(#service{service_id = ServiceId, status = 0}, Manifest) -> {stop, Reason :: term(), Reply :: term(), NewState :: #state{}} | {stop, Reason :: term(), NewState :: #state{}}). %% 绑定channel -handle_call({attach_channel, ChannelPid}, _From, State = #state{channel_pid = OldChannelPid, service_id = ServiceId}) -> - Status = service_model:get_status(ServiceId), - case {Status, is_pid(OldChannelPid) andalso is_process_alive(OldChannelPid)} of - {1, false} -> +handle_call({attach_channel, ChannelPid}, _From, State = #state{channel_pid = OldChannelPid}) -> + case is_pid(OldChannelPid) andalso is_process_alive(OldChannelPid) of + false -> erlang:monitor(process, ChannelPid), {reply, ok, State#state{channel_pid = ChannelPid}}; - {1, true} -> - {reply, {error, <<"channel exists">>}, State}; - {0, _} -> - {reply, {error, <<"serivce stopped">>}, State} + true -> + {reply, {error, <<"channel exists">>}, State} end; -%% 获取服务的运行状态 -handle_call(is_running, _From, State = #state{running_status = RunningStatus}) -> - {reply, RunningStatus, State}; - %% 请求参数项 done -handle_call(request_config, _From, State = #state{service_id = ServiceId, running_status = ?STATUS_RUNNING}) -> +handle_call(request_config, _From, State = #state{service_id = ServiceId}) -> Params = service_model:get_params(ServiceId), {reply, {ok, Params}, State}; -%% 启动服务: 当前服务如果正常运行,则不允许重启 -handle_call(start_service, _From, State = #state{running_status = ?STATUS_RUNNING}) -> - {reply, {error, <<"service is running">>}, State}; -handle_call(start_service, _From, State = #state{running_status = ?STATUS_STOPPED, manifest = Manifest, service_id = ServiceId}) -> - %% 异步启动服务 - case efka_manifest:startup(Manifest) of - {ok, Port} -> - {os_pid, OSPid} = erlang:port_info(Port, os_pid), - lager:debug("[efka_service] service_id: ~p, start_service port: ~p, os_pid: ~p", [ServiceId, Port, OSPid]), - %% 更新数据库状态 - ok = service_model:change_status(ServiceId, 1), - {reply, ok, State#state{running_status = ?STATUS_RUNNING, port = Port, os_pid = OSPid}}; - {error, Reason} -> - %% 启动失败不能更新数据库里面的状态 - {reply, {error, Reason}, State} - end; - -%% 停止服务, 主动停止的时候会改变服务配置的status字段 -handle_call(stop_service, _From, State = #state{running_status = ?STATUS_STOPPED, service_id = ServiceId, port = Port, os_pid = OSPid}) -> - lager:debug("[efka_service] service_id: ~p, stop service port: ~p, os_pid: ~p", [ServiceId, Port, OSPid]), - {reply, {error, <<"service not running">>}, State}; - -handle_call(stop_service, _From, State = #state{running_status = ?STATUS_RUNNING, port = Port, os_pid = OSPid, service_id = ServiceId}) when is_port(Port) -> - %% 优先使用微服务提供的stop指令, 没有提供的情况下,使用kill指令 - kill_os_pid(OSPid), - - erlang:is_port(Port) andalso erlang:port_close(Port), - lager:debug("[efka_service] service_id: ~p, port: ~p, os_pid: ~p, will closed", [ServiceId, Port, OSPid]), - ok = service_model:change_status(ServiceId, 0), - - {reply, ok, State#state{port = undefined, os_pid = undefined, running_status = ?STATUS_STOPPED}}; - handle_call(_Request, _From, State = #state{}) -> {reply, ok, State}. @@ -211,7 +156,7 @@ handle_cast({send_event, EventType, Params}, State = #state{service_id = Service {noreply, State}; %% 推送配置项目 -handle_cast({push_config, Ref, ReceiverPid, ConfigJson}, State = #state{running_status = ?STATUS_RUNNING, channel_pid = ChannelPid, inflight = Inflight}) -> +handle_cast({push_config, Ref, ReceiverPid, ConfigJson}, State = #state{channel_pid = ChannelPid, inflight = Inflight}) -> case is_pid(ChannelPid) andalso is_process_alive(ChannelPid) of true -> efka_tcp_channel:push_config(ChannelPid, Ref, self(), ConfigJson), @@ -222,7 +167,7 @@ handle_cast({push_config, Ref, ReceiverPid, ConfigJson}, State = #state{running_ end; %% 推送配置项目 -handle_cast({invoke, Ref, ReceiverPid, Payload}, State = #state{running_status = ?STATUS_RUNNING, channel_pid = ChannelPid, inflight = Inflight}) -> +handle_cast({invoke, Ref, ReceiverPid, Payload}, State = #state{channel_pid = ChannelPid, inflight = Inflight}) -> case is_pid(ChannelPid) andalso is_process_alive(ChannelPid) of true -> efka_tcp_channel:invoke(ChannelPid, Ref, self(), Payload), @@ -243,20 +188,15 @@ handle_cast(_Request, State = #state{}) -> {stop, Reason :: term(), NewState :: #state{}}). %% 重启服务 handle_info({timeout, _, reboot_service}, State = #state{service_id = ServiceId, manifest = Manifest}) -> - case service_model:get_status(ServiceId) of - 0 -> - lager:debug("[efka_service] service_id: ~p, is stopped, ignore boot_service", [ServiceId]), - {noreply, State}; - 1 -> - case efka_manifest:startup(Manifest) of - {ok, Port} -> - {os_pid, OSPid} = erlang:port_info(Port, os_pid), - lager:debug("[efka_service] service_id: ~p, reboot success: ~p, port: ~p, os_pid: ~p", [ServiceId, Port, OSPid]), - {noreply, State#state{running_status = ?STATUS_RUNNING, port = Port, os_pid = OSPid}}; - {error, Reason} -> - lager:debug("[efka_service] service_id: ~p, boot_service get error: ~p", [ServiceId, Reason]), - {noreply, State#state{running_status = ?STATUS_STOPPED}} - end + case efka_manifest:startup(Manifest) of + {ok, Port} -> + {os_pid, OSPid} = erlang:port_info(Port, os_pid), + lager:debug("[efka_service] service_id: ~p, reboot success: ~p, port: ~p, os_pid: ~p", [ServiceId, Port, OSPid]), + {noreply, State#state{port = Port, os_pid = OSPid}}; + {error, Reason} -> + lager:debug("[efka_service] service_id: ~p, boot_service get error: ~p", [ServiceId, Reason]), + try_reboot(), + {noreply, State} end; %% 处理channel的回复 @@ -276,8 +216,8 @@ handle_info({Port, {data, Data}}, State = #state{port = Port, service_id = Servi %% 处理port的消息, Port的被动关闭会触发;因此这个时候的Port和State.port的值是相等的 handle_info({Port, {exit_status, Code}}, State = #state{service_id = ServiceId}) -> lager:debug("[efka_service] service_id: ~p, port: ~p, exit with code: ~p", [ServiceId, Port, Code]), - % erlang:start_timer(5000, self(), reboot_service), - {noreply, State#state{port = undefined, os_pid = undefined, running_status = ?STATUS_STOPPED}}; + try_reboot(), + {noreply, State#state{port = undefined, os_pid = undefined}}; %% 处理channel进程的退出 handle_info({'DOWN', _Ref, process, ChannelPid, Reason}, State = #state{channel_pid = ChannelPid, service_id = ServiceId}) -> @@ -291,11 +231,9 @@ handle_info({'DOWN', _Ref, process, ChannelPid, Reason}, State = #state{channel_ %% with Reason. The return value is ignored. -spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), State :: #state{}) -> term()). -terminate(Reason, _State = #state{service_id = ServiceId, os_pid = OSPid}) -> - lager:debug("[efka_service] service_id: ~p, terminate with reason: ~p", [ServiceId, Reason]), +terminate(Reason, _State = #state{service_id = ServiceId, port = Port, os_pid = OSPid}) -> + erlang:is_port(Port) andalso erlang:port_close(Port), kill_os_pid(OSPid), - ok; -terminate(Reason, #state{service_id = ServiceId}) -> lager:debug("[efka_service] service_id: ~p, terminate with reason: ~p", [ServiceId, Reason]), ok. @@ -318,4 +256,8 @@ kill_os_pid(undefined) -> kill_os_pid(OSPid) when is_integer(OSPid) -> Cmd = lists:flatten(io_lib:format("kill -9 ~p", [OSPid])), lager:debug("kill cmd is: ~p", [Cmd]), - os:cmd(Cmd). \ No newline at end of file + os:cmd(Cmd). + +-spec try_reboot() -> no_return(). +try_reboot() -> + erlang:start_timer(5000, self(), reboot_service). \ No newline at end of file diff --git a/apps/efka/src/efka_service_sup.erl b/apps/efka/src/efka_service_sup.erl index 94c9078..ec9b665 100644 --- a/apps/efka/src/efka_service_sup.erl +++ b/apps/efka/src/efka_service_sup.erl @@ -46,8 +46,9 @@ start_link() -> | ignore | {error, Reason :: term()}). init([]) -> SupFlags = #{strategy => one_for_one, intensity => 1000, period => 3600}, - MicroServiceIds = service_model:get_all_service_ids(), - Specs = lists:map(fun(ServiceId) -> child_spec(ServiceId) end, MicroServiceIds), + %% 简化逻辑,只启动需要运行的微服务 + Services = service_model:get_running_services(), + Specs = lists:map(fun(ServiceId) -> child_spec(ServiceId) end, Services), {ok, {SupFlags, Specs}}. @@ -72,13 +73,15 @@ delete_service(ServiceId) when is_binary(ServiceId) -> ok = supervisor:terminate_child(?MODULE, ChildId), supervisor:delete_child(?MODULE, ChildId). +child_spec(#service{service_id = ServiceId}) when is_binary(ServiceId) -> + child_spec(ServiceId); child_spec(ServiceId) when is_binary(ServiceId) -> Name = efka_service:get_name(ServiceId), #{ id => Name, start => {efka_service, start_link, [Name, ServiceId]}, restart => permanent, - shutdown => 2000, + shutdown => 5000, type => worker, modules => ['efka_service'] }. \ No newline at end of file diff --git a/apps/efka/src/mnesia/service_model.erl b/apps/efka/src/mnesia/service_model.erl index f90a7e0..b967b69 100644 --- a/apps/efka/src/mnesia/service_model.erl +++ b/apps/efka/src/mnesia/service_model.erl @@ -15,7 +15,7 @@ %% API -export([create_table/0]). --export([insert/1, get_all_services/0, get_all_service_ids/0]). +-export([insert/1, get_all_services/0, get_all_service_ids/0, get_running_services/0]). -export([get_metrics/1, get_params/1, set_metrics/2, set_params/2, get_service/1, get_status/1, change_status/2]). create_table() -> @@ -137,4 +137,17 @@ get_all_services() -> -spec get_all_service_ids() -> [ServiceId :: binary()]. get_all_service_ids() -> - mnesia:dirty_all_keys(?TAB). \ No newline at end of file + mnesia:dirty_all_keys(?TAB). + +-spec get_running_services() -> {ok, [#service{}]} | {error, Reason :: term()}. +get_running_services() -> + F = fun() -> + Q = qlc:q([E || E <- mnesia:table(?TAB), E#service.status == 1]), + qlc:e(Q) + end, + case mnesia:transaction(F) of + {atomic, Services} -> + {ok, Services}; + {aborted, Error} -> + {error, Error} + end. \ No newline at end of file