增加主机的离线和在线告警

This commit is contained in:
anlicheng 2024-09-11 14:07:21 +08:00
parent f320d7defa
commit dcd23dd6da
2 changed files with 47 additions and 11 deletions

View File

@ -32,7 +32,7 @@
-record(state, { -record(state, {
host_id :: integer(), host_id :: integer(),
name :: binary(), name = <<"">> :: binary(),
%% %%
uuid :: binary(), uuid :: binary(),
%% aes的key, %% aes的key,
@ -341,7 +341,11 @@ handle_event({call, From}, {create_session, PubKey}, ?STATE_ACTIVATED, State = #
Reply = #{<<"a">> => true, <<"aes">> => Aes}, Reply = #{<<"a">> => true, <<"aes">> => Aes},
EncReply = iot_cipher_rsa:encode(Reply, PubKey), EncReply = iot_cipher_rsa:encode(Reply, PubKey),
{ok, AffectedRow} = host_bo:change_status(UUID, ?HOST_ONLINE), {ok, AffectedRow} = host_bo:change_status(UUID, ?HOST_ONLINE),
warn_status(Name, <<"在线"/utf8>>),
%% 线
Warn = format_warn(Name, <<"在线"/utf8>>),
catch iot_watchdog:delay_warn(Warn),
report_event(UUID, ?HOST_ONLINE), report_event(UUID, ?HOST_ONLINE),
lager:debug("[iot_host] host_id(session) uuid: ~p, create_session, will change status, affected_row: ~p", [UUID, AffectedRow]), lager:debug("[iot_host] host_id(session) uuid: ~p, create_session, will change status, affected_row: ~p", [UUID, AffectedRow]),
@ -526,8 +530,11 @@ handle_event(info, {timeout, _, heartbeat_ticker}, _, State = #state{uuid = UUID
lager:debug("[iot_host] host: ~p, host_maybe_offline, host now is offline, do nothing", [UUID]); lager:debug("[iot_host] host: ~p, host_maybe_offline, host now is offline, do nothing", [UUID]);
?HOST_ONLINE -> ?HOST_ONLINE ->
{ok, _} = host_bo:change_status(UUID, ?HOST_OFFLINE), {ok, _} = host_bo:change_status(UUID, ?HOST_OFFLINE),
warn_status(Name, <<"离线"/utf8>>), %% 线
report_event(UUID, ?HOST_OFFLINE) Warn = format_warn(Name, <<"离线"/utf8>>),
iot_watchdog:warn(Warn),
catch report_event(UUID, ?HOST_OFFLINE)
end, end,
%% channel %% channel
@ -635,11 +642,8 @@ report_event(UUID, NewStatus) when is_binary(UUID), is_integer(NewStatus) ->
lager:debug("[iot_host] host_uuid: ~p, route fields: ~p", [UUID, FieldsList]). lager:debug("[iot_host] host_uuid: ~p, route fields: ~p", [UUID, FieldsList]).
%% %%
warn_status(Name, Status) when is_binary(Name), is_binary(Status) -> format_warn(Name, Status) when is_binary(Name), is_binary(Status) ->
Warn = iolist_to_binary([<<"主机: "/utf8>>, Name, <<" || ">>, Status]), iolist_to_binary([<<"主机: "/utf8>>, Name, <<" || ">>, Status]).
iot_watchdog:warn(Warn);
warn_status(_, _) ->
ok.
%% state转换成map %% state转换成map
state_map(#state{host_id = HostId, uuid = UUID, aes = Aes, has_session = HasSession, heartbeat_counter = HeartbeatCounter, channel_pid = ChannelPid, metrics = Metrics}) -> state_map(#state{host_id = HostId, uuid = UUID, aes = Aes, has_session = HasSession, heartbeat_counter = HeartbeatCounter, channel_pid = ChannelPid, metrics = Metrics}) ->

View File

@ -13,7 +13,7 @@
%% API %% API
-export([start_link/0]). -export([start_link/0]).
-export([detection/3, warn/1]). -export([detection/3, warn/1, delay_warn/1]).
%% gen_server callbacks %% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
@ -23,6 +23,9 @@
%% id %% id
-define(SYS_ID, <<"ZNWLZJJKXT1">>). -define(SYS_ID, <<"ZNWLZJJKXT1">>).
%% warn相关的消息每10分钟最多发送一次
-define(WARN_TICKER, 600_000).
%% %%
-record(limiter, { -record(limiter, {
cpu_temperature = 0 :: integer(), cpu_temperature = 0 :: integer(),
@ -38,7 +41,8 @@
users :: binary(), %% : "S123, S1234" users :: binary(), %% : "S123, S1234"
pri_key :: public_key:private_key(), pri_key :: public_key:private_key(),
%% : #{uuid => #limiter{}} %% : #{uuid => #limiter{}}
limiters = #{} limiters = #{},
warn_buf = []
}). }).
%%%=================================================================== %%%===================================================================
@ -55,6 +59,10 @@ detection(HostUUID, Name, _) when is_binary(HostUUID), is_binary(Name) ->
warn(Warn) when is_binary(Warn) -> warn(Warn) when is_binary(Warn) ->
gen_server:cast(?SERVER, {warn, Warn}). gen_server:cast(?SERVER, {warn, Warn}).
-spec delay_warn(Warn :: binary()) -> no_return().
delay_warn(Warn) when is_binary(Warn) ->
gen_server:cast(?SERVER, {delay_warn, Warn}).
%% @doc Spawns the server and registers the local name (unique) %% @doc Spawns the server and registers the local name (unique)
-spec(start_link() -> -spec(start_link() ->
{ok, Pid :: pid()} | ignore | {error, Reason :: term()}). {ok, Pid :: pid()} | ignore | {error, Reason :: term()}).
@ -84,6 +92,8 @@ init([]) ->
{ok, LoggerPid} = iot_logger:start_link("watchdog_data"), {ok, LoggerPid} = iot_logger:start_link("watchdog_data"),
PriKey = generate_private_key(PriFile), PriKey = generate_private_key(PriFile),
erlang:start_timer(?WARN_TICKER, self(), warn_ticker),
{ok, #state{logger_pid = LoggerPid, pri_key = PriKey, report_interval = ReportInterval, url = Url, users = Users, limiters = #{}, guard_items = GuardItems}}. {ok, #state{logger_pid = LoggerPid, pri_key = PriKey, report_interval = ReportInterval, url = Url, users = Users, limiters = #{}, guard_items = GuardItems}}.
%% @private %% @private
@ -134,6 +144,10 @@ handle_cast({detection, HostUUID, Name, Metric},
{noreply, State#state{limiters = maps:put(HostUUID, NLimiter, Limiters)}}; {noreply, State#state{limiters = maps:put(HostUUID, NLimiter, Limiters)}};
%%
handle_cast({delay_warn, Warn}, State = #state{warn_buf = WarnBuf}) ->
{noreply, State#state{warn_buf = [Warn|WarnBuf]}};
handle_cast({warn, Warn}, State = #state{url = Url, users = Users, pri_key = PriKey, logger_pid = LoggerPid}) -> handle_cast({warn, Warn}, State = #state{url = Url, users = Users, pri_key = PriKey, logger_pid = LoggerPid}) ->
Body = format_warn(Warn, Users, PriKey), Body = format_warn(Warn, Users, PriKey),
case catch do_post(Url, Body) of case catch do_post(Url, Body) of
@ -150,6 +164,24 @@ handle_cast({warn, Warn}, State = #state{url = Url, users = Users, pri_key = Pri
{noreply, NewState :: #state{}} | {noreply, NewState :: #state{}} |
{noreply, NewState :: #state{}, timeout() | hibernate} | {noreply, NewState :: #state{}, timeout() | hibernate} |
{stop, Reason :: term(), NewState :: #state{}}). {stop, Reason :: term(), NewState :: #state{}}).
handle_info({timeout, _, warn_ticker}, State = #state{warn_buf = WarnBuf, url = Url, users = Users, pri_key = PriKey, logger_pid = LoggerPid}) ->
erlang:start_timer(?WARN_TICKER, self(), warn_ticker),
case length(WarnBuf) > 0 of
true ->
Warn0 = hd(WarnBuf),
Warn = iolist_to_binary([Warn0, <<"(累计: ">>, length(WarnBuf), <<")">>]),
Body = format_warn(Warn, Users, PriKey),
case catch do_post(Url, Body) of
{ok, Resp} ->
iot_logger:write(LoggerPid, [Body, Resp]);
{error, Reason} ->
lager:warning("[iot_watchdog] url: ~p, send body: ~ts, get error: ~p", [Url, Body, Reason])
end;
false ->
ok
end,
{noreply, State#state{warn_buf = []}};
handle_info(_Info, State = #state{}) -> handle_info(_Info, State = #state{}) ->
{noreply, State}. {noreply, State}.