164 lines
5.1 KiB
Go
164 lines
5.1 KiB
Go
package business
|
||
|
||
import (
|
||
"Dynamic_environmental_detection/logger"
|
||
"fmt"
|
||
"strconv"
|
||
"sync"
|
||
"time"
|
||
)
|
||
|
||
// FaultDetector 故障检测器
|
||
type FaultDetector struct {
|
||
deviceStatuses map[string]*DeviceStatus
|
||
mutex sync.RWMutex
|
||
hostUUID string
|
||
}
|
||
|
||
// NewFaultDetector 创建故障检测器
|
||
func NewFaultDetector(hostUUID string) *FaultDetector {
|
||
return &FaultDetector{
|
||
deviceStatuses: make(map[string]*DeviceStatus),
|
||
hostUUID: hostUUID,
|
||
}
|
||
}
|
||
|
||
// UpdateDeviceStatus 更新设备状态
|
||
func (fd *FaultDetector) UpdateDeviceStatus(deviceUUID string, success bool, errMsg string, sensorType SensorType) (needReport bool, event *EventData) {
|
||
fd.mutex.Lock()
|
||
defer fd.mutex.Unlock()
|
||
|
||
// 获取或创建设备状态
|
||
status, exists := fd.deviceStatuses[deviceUUID]
|
||
if !exists {
|
||
status = &DeviceStatus{
|
||
DeviceUUID: deviceUUID,
|
||
IsOnline: true,
|
||
}
|
||
fd.deviceStatuses[deviceUUID] = status
|
||
}
|
||
|
||
currentTime := time.Now()
|
||
needReport = false
|
||
|
||
if success {
|
||
// 设备读取成功
|
||
status.LastSuccessTime = currentTime
|
||
status.ErrorCount = 0
|
||
status.LastError = ""
|
||
|
||
if !status.IsOnline {
|
||
// 设备从离线恢复在线
|
||
status.IsOnline = true
|
||
needReport = true
|
||
event = fd.createOnlineEvent(deviceUUID, sensorType, currentTime)
|
||
logger.Logger.Printf("Equipment restored to operational status: %s", deviceUUID)
|
||
}
|
||
} else {
|
||
// 设备读取失败
|
||
status.ErrorCount++
|
||
status.LastError = errMsg
|
||
|
||
if status.IsOnline && status.ErrorCount >= 3 {
|
||
// 设备从在线变为离线(连续3次失败)
|
||
status.IsOnline = false
|
||
needReport = true
|
||
event = fd.createAlarmEvent(deviceUUID, sensorType, status.LastSuccessTime, currentTime, errMsg)
|
||
logger.Logger.Printf("Equipment Offline Alarm: %s, Error: %s", deviceUUID, errMsg)
|
||
}
|
||
}
|
||
|
||
status.LastReportTime = currentTime
|
||
return needReport, event
|
||
}
|
||
|
||
// createOnlineEvent 创建设备在线事件
|
||
func (fd *FaultDetector) createOnlineEvent(deviceUUID string, sensorType SensorType, timestamp time.Time) *EventData {
|
||
return &EventData{
|
||
Description: "Sensor online",
|
||
Timestamp: strconv.FormatInt(timestamp.Unix(), 10),
|
||
HostUUID: fd.hostUUID,
|
||
DeviceUUID: deviceUUID,
|
||
Level: "online",
|
||
SensorType: GetSensorTypeDescription(sensorType),
|
||
}
|
||
}
|
||
|
||
// createAlarmEvent 创建设备报警事件
|
||
func (fd *FaultDetector) createAlarmEvent(deviceUUID string, sensorType SensorType, lastSuccessTime, timestamp time.Time, errMsg string) *EventData {
|
||
return &EventData{
|
||
Description: fmt.Sprintf("Sensor communication failure: %s", errMsg),
|
||
Timestamp: strconv.FormatInt(timestamp.Unix(), 10),
|
||
HostUUID: fd.hostUUID,
|
||
DeviceUUID: deviceUUID,
|
||
Level: "alarm",
|
||
LastSuccessfulTime: strconv.FormatInt(lastSuccessTime.Unix(), 10),
|
||
SensorType: GetSensorTypeDescription(sensorType),
|
||
Recommendation: "Check device connection and communication line",
|
||
}
|
||
}
|
||
|
||
// GetOnlineDevices 获取在线设备列表
|
||
func (fd *FaultDetector) GetOnlineDevices() []string {
|
||
fd.mutex.RLock()
|
||
defer fd.mutex.RUnlock()
|
||
|
||
var onlineDevices []string
|
||
for deviceUUID, status := range fd.deviceStatuses {
|
||
if status.IsOnline {
|
||
onlineDevices = append(onlineDevices, deviceUUID)
|
||
}
|
||
}
|
||
return onlineDevices
|
||
}
|
||
|
||
// GetDeviceStatus 获取设备状态
|
||
func (fd *FaultDetector) GetDeviceStatus(deviceUUID string) (*DeviceStatus, bool) {
|
||
fd.mutex.RLock()
|
||
defer fd.mutex.RUnlock()
|
||
|
||
status, exists := fd.deviceStatuses[deviceUUID]
|
||
return status, exists
|
||
}
|
||
|
||
// CollectAndClearPendingEvents 收集所有待上报的事件并清空(供批量上报使用)
|
||
func (fd *FaultDetector) CollectAndClearPendingEvents() (alarms []EventData, onlines []EventData) {
|
||
fd.mutex.Lock()
|
||
defer fd.mutex.Unlock()
|
||
|
||
current := time.Now()
|
||
timestamp := strconv.FormatInt(current.Unix(), 10)
|
||
|
||
for deviceUUID, status := range fd.deviceStatuses {
|
||
// 检查是否有需要上报的离线恢复(原来逻辑是实时上报的,这里改为攒着批量发)
|
||
if !status.IsOnline && status.ErrorCount >= 3 && status.LastReportTime.Before(current.Add(-5*time.Second)) {
|
||
// 防止重复发,简单用时间判断
|
||
alarms = append(alarms, EventData{
|
||
Description: fmt.Sprintf("Sensor communication failure: %s", status.LastError),
|
||
Timestamp: timestamp,
|
||
HostUUID: fd.hostUUID,
|
||
DeviceUUID: deviceUUID,
|
||
Level: "alarm",
|
||
LastSuccessfulTime: strconv.FormatInt(status.LastSuccessTime.Unix(), 10),
|
||
SensorType: "未知", // 后面你采集时会带 sensorType,这里先占位
|
||
Recommendation: "Check device connection and communication line",
|
||
})
|
||
status.LastReportTime = current // 标记已处理
|
||
}
|
||
|
||
if status.IsOnline && status.LastSuccessTime.After(status.LastReportTime) {
|
||
// 如果最近一次成功采集时间 > 上次上报时间,说明刚刚恢复,需要发 online
|
||
onlines = append(onlines, EventData{
|
||
Description: "Sensor online",
|
||
Timestamp: timestamp,
|
||
HostUUID: fd.hostUUID,
|
||
DeviceUUID: deviceUUID,
|
||
Level: "online",
|
||
SensorType: "未知", // 同上
|
||
})
|
||
status.LastReportTime = current
|
||
}
|
||
}
|
||
return alarms, onlines
|
||
}
|