695 lines
27 KiB
TypeScript
695 lines
27 KiB
TypeScript
import { query, queryOne, execute, getPool } from './db'
|
||
|
||
interface ServerTarget {
|
||
target_id: number
|
||
server_name: string
|
||
server_ip: string
|
||
glances_url: string
|
||
is_active: number
|
||
collect_interval: number
|
||
}
|
||
|
||
// 서버별 타이머 관리
|
||
const serverTimers = new Map<number, ReturnType<typeof setInterval>>()
|
||
// 서버별 API 버전 캐시
|
||
const apiVersionCache = new Map<number, string>()
|
||
let isRunning = false
|
||
|
||
// 타임스탬프 생성
|
||
function timestamp(): string {
|
||
return new Date().toLocaleString('sv-SE', { timeZone: 'Asia/Seoul' }).replace('T', ' ')
|
||
}
|
||
|
||
// Glances API 호출 (버전 지정)
|
||
async function fetchGlancesApi(baseUrl: string, endpoint: string, version: string): Promise<any> {
|
||
try {
|
||
const url = `${baseUrl}/api/${version}/${endpoint}`
|
||
const response = await fetch(url, {
|
||
signal: AbortSignal.timeout(5000)
|
||
})
|
||
if (!response.ok) return null
|
||
return await response.json()
|
||
} catch (err) {
|
||
return null
|
||
}
|
||
}
|
||
|
||
// API 버전 자동 감지 (v4 우선, 실패 시 v3)
|
||
async function detectApiVersion(baseUrl: string, serverName: string): Promise<string | null> {
|
||
const now = timestamp()
|
||
|
||
// v4 먼저 시도
|
||
console.log(`[${now}] 🔍 [${serverName}] API 버전 감지 중... (v4 시도)`)
|
||
const v4Result = await fetchGlancesApi(baseUrl, 'system', '4')
|
||
if (v4Result && v4Result.os_name) {
|
||
console.log(`[${now}] ✅ [${serverName}] API v4 감지됨`)
|
||
return '4'
|
||
}
|
||
|
||
// v3 시도
|
||
console.log(`[${now}] 🔍 [${serverName}] API 버전 감지 중... (v3 시도)`)
|
||
const v3Result = await fetchGlancesApi(baseUrl, 'system', '3')
|
||
if (v3Result && v3Result.os_name) {
|
||
console.log(`[${now}] ✅ [${serverName}] API v3 감지됨`)
|
||
return '3'
|
||
}
|
||
|
||
console.log(`[${now}] ❌ [${serverName}] API 버전 감지 실패`)
|
||
return null
|
||
}
|
||
|
||
// 이상감지 실행
|
||
async function detectAnomalies(targetId: number, serverName: string) {
|
||
const now = timestamp()
|
||
|
||
try {
|
||
// === 단기 변화율 감지 ===
|
||
const SHORT_TERM_THRESHOLD = 30
|
||
|
||
const snapshots = await query<any>(`
|
||
SELECT cpu_percent, memory_percent
|
||
FROM server_snapshots
|
||
WHERE target_id = $1 AND is_online = 1
|
||
ORDER BY collected_at DESC
|
||
LIMIT 20
|
||
`, [targetId])
|
||
|
||
if (snapshots.length >= 4) {
|
||
const half = Math.floor(snapshots.length / 2)
|
||
const currSnapshots = snapshots.slice(0, half)
|
||
const prevSnapshots = snapshots.slice(half)
|
||
|
||
const currCpuAvg = currSnapshots.reduce((sum, s) => sum + (s.cpu_percent || 0), 0) / currSnapshots.length
|
||
const prevCpuAvg = prevSnapshots.reduce((sum, s) => sum + (s.cpu_percent || 0), 0) / prevSnapshots.length
|
||
const currMemAvg = currSnapshots.reduce((sum, s) => sum + (s.memory_percent || 0), 0) / currSnapshots.length
|
||
const prevMemAvg = prevSnapshots.reduce((sum, s) => sum + (s.memory_percent || 0), 0) / prevSnapshots.length
|
||
|
||
const cpuChange = prevCpuAvg > 1 ? ((currCpuAvg - prevCpuAvg) / prevCpuAvg) * 100 : currCpuAvg - prevCpuAvg
|
||
const memChange = prevMemAvg > 1 ? ((currMemAvg - prevMemAvg) / prevMemAvg) * 100 : currMemAvg - prevMemAvg
|
||
|
||
// CPU 단기 변화율 체크 (증가만 감지)
|
||
if (cpuChange >= SHORT_TERM_THRESHOLD) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'short-term' AND metric = 'CPU'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = cpuChange >= 100 ? 'danger' : 'warning'
|
||
const message = `CPU 급증 감지 (${prevCpuAvg.toFixed(1)}% → ${currCpuAvg.toFixed(1)}%)`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'short-term', 'CPU', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currCpuAvg, cpuChange, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] 단기변화율 이상감지: CPU +${cpuChange.toFixed(1)}% (${level})`)
|
||
}
|
||
}
|
||
|
||
// Memory 단기 변화율 체크 (증가만 감지)
|
||
if (memChange >= SHORT_TERM_THRESHOLD) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'short-term' AND metric = 'Memory'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = memChange >= 100 ? 'danger' : 'warning'
|
||
const message = `Memory 급증 감지 (${prevMemAvg.toFixed(1)}% → ${currMemAvg.toFixed(1)}%)`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'short-term', 'Memory', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currMemAvg, memChange, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] 단기변화율 이상감지: Memory +${memChange.toFixed(1)}% (${level})`)
|
||
}
|
||
}
|
||
}
|
||
|
||
// === Z-Score 감지 ===
|
||
const WARNING_Z = 2.0
|
||
const DANGER_Z = 3.0
|
||
|
||
const hourSnapshots = await query<any>(`
|
||
SELECT cpu_percent, memory_percent
|
||
FROM server_snapshots
|
||
WHERE target_id = $1 AND is_online = 1
|
||
AND collected_at::timestamp >= NOW() - INTERVAL '1 hour'
|
||
ORDER BY collected_at DESC
|
||
`, [targetId])
|
||
|
||
if (hourSnapshots.length >= 10) {
|
||
const current = hourSnapshots[0]
|
||
const currCpu = current.cpu_percent ?? 0
|
||
const currMem = current.memory_percent ?? 0
|
||
|
||
const cpuValues = hourSnapshots.map(s => s.cpu_percent ?? 0)
|
||
const memValues = hourSnapshots.map(s => s.memory_percent ?? 0)
|
||
|
||
const cpuAvg = cpuValues.reduce((a, b) => a + b, 0) / cpuValues.length
|
||
const memAvg = memValues.reduce((a, b) => a + b, 0) / memValues.length
|
||
|
||
const cpuVariance = cpuValues.reduce((sum, val) => sum + Math.pow(val - cpuAvg, 2), 0) / cpuValues.length
|
||
const memVariance = memValues.reduce((sum, val) => sum + Math.pow(val - memAvg, 2), 0) / memValues.length
|
||
const cpuStd = Math.sqrt(cpuVariance)
|
||
const memStd = Math.sqrt(memVariance)
|
||
|
||
const cpuZscore = cpuStd > 0.1 ? (currCpu - cpuAvg) / cpuStd : 0
|
||
const memZscore = memStd > 0.1 ? (currMem - memAvg) / memStd : 0
|
||
|
||
// CPU Z-Score 체크 (높은 경우만 감지)
|
||
if (cpuZscore >= WARNING_Z) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'zscore' AND metric = 'CPU'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = cpuZscore >= DANGER_Z ? 'danger' : 'warning'
|
||
const message = `CPU 평균 대비 ${cpuZscore.toFixed(1)}σ 높음 (평균: ${cpuAvg.toFixed(1)}%, 현재: ${currCpu.toFixed(1)}%)`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'zscore', 'CPU', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currCpu, cpuZscore, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] Z-Score 이상감지: CPU Z=${cpuZscore.toFixed(2)} (${level})`)
|
||
}
|
||
}
|
||
|
||
// Memory Z-Score 체크 (높은 경우만 감지)
|
||
if (memZscore >= WARNING_Z) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'zscore' AND metric = 'Memory'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = memZscore >= DANGER_Z ? 'danger' : 'warning'
|
||
const message = `Memory 평균 대비 ${memZscore.toFixed(1)}σ 높음 (평균: ${memAvg.toFixed(1)}%, 현재: ${currMem.toFixed(1)}%)`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'zscore', 'Memory', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currMem, memZscore, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] Z-Score 이상감지: Memory Z=${memZscore.toFixed(2)} (${level})`)
|
||
}
|
||
}
|
||
}
|
||
|
||
// === 시간대별 베이스라인 감지 ===
|
||
const DEVIATION_THRESHOLD = 2.0
|
||
const currentHour = new Date().getHours()
|
||
const currentDayOfWeek = new Date().getDay()
|
||
const isWeekend = currentDayOfWeek === 0 || currentDayOfWeek === 6
|
||
const dayType = isWeekend ? 'weekend' : 'weekday'
|
||
|
||
const baselineData = await query<any>(`
|
||
SELECT cpu_percent, memory_percent
|
||
FROM server_snapshots
|
||
WHERE target_id = $1 AND is_online = 1
|
||
AND collected_at::timestamp >= NOW() - INTERVAL '14 days'
|
||
AND EXTRACT(HOUR FROM collected_at::timestamp) = $2
|
||
AND (
|
||
($3 = 'weekend' AND EXTRACT(DOW FROM collected_at::timestamp) IN (0, 6))
|
||
OR
|
||
($3 = 'weekday' AND EXTRACT(DOW FROM collected_at::timestamp) NOT IN (0, 6))
|
||
)
|
||
`, [targetId, currentHour, dayType])
|
||
|
||
const currentSnapshot = await queryOne<any>(`
|
||
SELECT cpu_percent, memory_percent
|
||
FROM server_snapshots
|
||
WHERE target_id = $1 AND is_online = 1
|
||
ORDER BY collected_at DESC LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (baselineData.length >= 5 && currentSnapshot) {
|
||
const currCpu = currentSnapshot.cpu_percent ?? 0
|
||
const currMem = currentSnapshot.memory_percent ?? 0
|
||
|
||
const cpuValues = baselineData.map(s => s.cpu_percent ?? 0)
|
||
const memValues = baselineData.map(s => s.memory_percent ?? 0)
|
||
|
||
const cpuAvg = cpuValues.reduce((a, b) => a + b, 0) / cpuValues.length
|
||
const memAvg = memValues.reduce((a, b) => a + b, 0) / memValues.length
|
||
|
||
const cpuVariance = cpuValues.reduce((sum, val) => sum + Math.pow(val - cpuAvg, 2), 0) / cpuValues.length
|
||
const memVariance = memValues.reduce((sum, val) => sum + Math.pow(val - memAvg, 2), 0) / memValues.length
|
||
const cpuStd = Math.sqrt(cpuVariance)
|
||
const memStd = Math.sqrt(memVariance)
|
||
|
||
const cpuDeviation = cpuStd > 0.1 ? (currCpu - cpuAvg) / cpuStd : 0
|
||
const memDeviation = memStd > 0.1 ? (currMem - memAvg) / memStd : 0
|
||
|
||
// CPU 베이스라인 체크 (높은 경우만 감지)
|
||
if (cpuDeviation >= DEVIATION_THRESHOLD) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'baseline' AND metric = 'CPU'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = cpuDeviation >= 3.0 ? 'danger' : 'warning'
|
||
const dayLabel = isWeekend ? '주말' : '평일'
|
||
const message = `CPU ${dayLabel} ${currentHour}시 베이스라인 대비 ${cpuDeviation.toFixed(1)}σ 높음`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'baseline', 'CPU', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currCpu, cpuDeviation, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] 베이스라인 이상감지: CPU σ=${cpuDeviation.toFixed(2)} (${level})`)
|
||
}
|
||
}
|
||
|
||
// Memory 베이스라인 체크 (높은 경우만 감지)
|
||
if (memDeviation >= DEVIATION_THRESHOLD) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'baseline' AND metric = 'Memory'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = memDeviation >= 3.0 ? 'danger' : 'warning'
|
||
const dayLabel = isWeekend ? '주말' : '평일'
|
||
const message = `Memory ${dayLabel} ${currentHour}시 베이스라인 대비 ${memDeviation.toFixed(1)}σ 높음`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'baseline', 'Memory', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currMem, memDeviation, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] 베이스라인 이상감지: Memory σ=${memDeviation.toFixed(2)} (${level})`)
|
||
}
|
||
}
|
||
}
|
||
|
||
// === 추세 분석 감지 ===
|
||
const SLOPE_THRESHOLD = 0.5
|
||
const WINDOW_MINUTES = 30
|
||
|
||
const trendSnapshots = await query<any>(`
|
||
SELECT cpu_percent, memory_percent
|
||
FROM server_snapshots
|
||
WHERE target_id = $1 AND is_online = 1
|
||
AND collected_at::timestamp >= NOW() - INTERVAL '${WINDOW_MINUTES} minutes'
|
||
ORDER BY collected_at ASC
|
||
`, [targetId])
|
||
|
||
if (trendSnapshots.length >= 10) {
|
||
const n = trendSnapshots.length
|
||
const currCpu = trendSnapshots[n - 1].cpu_percent ?? 0
|
||
const currMem = trendSnapshots[n - 1].memory_percent ?? 0
|
||
|
||
// 선형 회귀 계산
|
||
function calcSlope(values: number[]): { slope: number, r2: number } {
|
||
const n = values.length
|
||
let sumX = 0, sumY = 0, sumXY = 0, sumX2 = 0
|
||
for (let i = 0; i < n; i++) {
|
||
sumX += i; sumY += values[i]; sumXY += i * values[i]; sumX2 += i * i
|
||
}
|
||
const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX)
|
||
const yMean = sumY / n
|
||
let ssTotal = 0, ssResidual = 0
|
||
for (let i = 0; i < n; i++) {
|
||
const yPred = slope * i + (sumY - slope * sumX) / n
|
||
ssTotal += Math.pow(values[i] - yMean, 2)
|
||
ssResidual += Math.pow(values[i] - yPred, 2)
|
||
}
|
||
const r2 = ssTotal > 0 ? 1 - (ssResidual / ssTotal) : 0
|
||
return { slope: (slope * n) / WINDOW_MINUTES, r2 }
|
||
}
|
||
|
||
const cpuResult = calcSlope(trendSnapshots.map(s => s.cpu_percent ?? 0))
|
||
const memResult = calcSlope(trendSnapshots.map(s => s.memory_percent ?? 0))
|
||
|
||
if (cpuResult.slope >= SLOPE_THRESHOLD && cpuResult.r2 >= 0.3) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'trend' AND metric = 'CPU'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = cpuResult.slope >= 1.0 ? 'danger' : 'warning'
|
||
const message = `CPU 지속 상승 중 (분당 +${cpuResult.slope.toFixed(2)}%, R²=${cpuResult.r2.toFixed(2)})`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'trend', 'CPU', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currCpu, cpuResult.slope, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] 추세 이상감지: CPU +${cpuResult.slope.toFixed(2)}/분 (${level})`)
|
||
}
|
||
}
|
||
|
||
if (memResult.slope >= SLOPE_THRESHOLD && memResult.r2 >= 0.3) {
|
||
const recentExists = await queryOne(`
|
||
SELECT 1 FROM anomaly_logs
|
||
WHERE target_id = $1 AND detect_type = 'trend' AND metric = 'Memory'
|
||
AND detected_at::timestamp > NOW() - INTERVAL '1 minute'
|
||
LIMIT 1
|
||
`, [targetId])
|
||
|
||
if (!recentExists) {
|
||
const level = memResult.slope >= 1.0 ? 'danger' : 'warning'
|
||
const message = `Memory 지속 상승 중 (분당 +${memResult.slope.toFixed(2)}%, R²=${memResult.r2.toFixed(2)})`
|
||
|
||
await execute(`
|
||
INSERT INTO anomaly_logs (target_id, server_name, detect_type, metric, level, current_value, threshold_value, message)
|
||
VALUES ($1, $2, 'trend', 'Memory', $3, $4, $5, $6)
|
||
`, [targetId, serverName, level, currMem, memResult.slope, message])
|
||
console.log(`[${now}] 🚨 [${serverName}] 추세 이상감지: Memory +${memResult.slope.toFixed(2)}/분 (${level})`)
|
||
}
|
||
}
|
||
}
|
||
|
||
} catch (err) {
|
||
console.error(`[${now}] ❌ [${serverName}] 이상감지 에러:`, err)
|
||
}
|
||
}
|
||
|
||
|
||
// 서버 데이터 수집
|
||
async function collectServerData(target: ServerTarget) {
|
||
const now = timestamp()
|
||
|
||
console.log(`[${now}] 📡 [${target.server_name}] 수집 시작... (${target.glances_url})`)
|
||
|
||
try {
|
||
// API 버전 확인 (캐시 또는 자동 감지)
|
||
let apiVersion = apiVersionCache.get(target.target_id)
|
||
|
||
if (!apiVersion) {
|
||
apiVersion = await detectApiVersion(target.glances_url, target.server_name)
|
||
if (apiVersion) {
|
||
apiVersionCache.set(target.target_id, apiVersion)
|
||
}
|
||
}
|
||
|
||
if (!apiVersion) {
|
||
console.log(`[${now}] ❌ [${target.server_name}] 연결 실패 - Offline 기록`)
|
||
await execute(`
|
||
INSERT INTO server_snapshots (target_id, is_online, collected_at)
|
||
VALUES ($1, 0, $2)
|
||
`, [target.target_id, now])
|
||
return
|
||
}
|
||
|
||
console.log(`[${now}] 📡 [${target.server_name}] Glances API v${apiVersion} 호출 중...`)
|
||
|
||
// 병렬로 API 호출
|
||
const [system, cpu, mem, memswap, fs, docker, network, quicklook, uptime, sensors, load] = await Promise.all([
|
||
fetchGlancesApi(target.glances_url, 'system', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'cpu', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'mem', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'memswap', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'fs', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'containers', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'network', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'quicklook', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'uptime', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'sensors', apiVersion),
|
||
fetchGlancesApi(target.glances_url, 'load', apiVersion)
|
||
])
|
||
|
||
const isOnline = system !== null
|
||
|
||
if (!isOnline) {
|
||
// 캐시 클리어 후 재시도 위해
|
||
apiVersionCache.delete(target.target_id)
|
||
console.log(`[${now}] ❌ [${target.server_name}] 연결 실패 - Offline 기록`)
|
||
await execute(`
|
||
INSERT INTO server_snapshots (target_id, is_online, collected_at)
|
||
VALUES ($1, 0, $2)
|
||
`, [target.target_id, now])
|
||
return
|
||
}
|
||
|
||
console.log(`[${now}] ✅ [${target.server_name}] 연결 성공 - 데이터 저장 중...`)
|
||
|
||
// CPU 온도 추출 (sensors 배열에서)
|
||
let cpuTemp: number | null = null
|
||
if (Array.isArray(sensors)) {
|
||
const tempSensor = sensors.find((s: any) =>
|
||
s.label?.toLowerCase().includes('cpu') ||
|
||
s.label?.toLowerCase().includes('core') ||
|
||
s.type === 'temperature_core'
|
||
)
|
||
cpuTemp = tempSensor?.value ?? null
|
||
}
|
||
|
||
// server_snapshots INSERT
|
||
console.log(`[${now}] 💾 [${target.server_name}] snapshot 저장 (API v${apiVersion}, CPU: ${cpu?.total?.toFixed(1) || 0}%, MEM: ${mem?.percent?.toFixed(1) || 0}%, TEMP: ${cpuTemp ?? 'N/A'}°C, LOAD: ${quicklook?.load?.toFixed(1) ?? 'N/A'}%)`)
|
||
|
||
await execute(`
|
||
INSERT INTO server_snapshots (
|
||
target_id, os_name, os_version, host_name, uptime_seconds, uptime_str, ip_address,
|
||
cpu_name, cpu_count, cpu_percent, memory_total, memory_used, memory_percent,
|
||
swap_total, swap_used, swap_percent, is_online, api_version, cpu_temp,
|
||
load_1, load_5, load_15, load_percent, collected_at
|
||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
|
||
`, [
|
||
target.target_id,
|
||
system?.os_name || system?.linux_distro || null,
|
||
system?.os_version || null,
|
||
system?.hostname || null,
|
||
null,
|
||
typeof uptime === 'string' ? uptime : null,
|
||
target.glances_url.match(/https?:\/\/([^:\/]+)/)?.[1] || null,
|
||
quicklook?.cpu_name || null,
|
||
quicklook?.cpu_number || quicklook?.cpu_log_core || cpu?.cpucore || null,
|
||
cpu?.total ?? quicklook?.cpu ?? null,
|
||
mem?.total || null,
|
||
mem?.used || null,
|
||
mem?.percent || null,
|
||
memswap?.total || null,
|
||
memswap?.used || null,
|
||
memswap?.percent || null,
|
||
isOnline ? 1 : 0,
|
||
apiVersion,
|
||
cpuTemp,
|
||
load?.min1 ?? null,
|
||
load?.min5 ?? null,
|
||
load?.min15 ?? null,
|
||
quicklook?.load ?? null,
|
||
now
|
||
])
|
||
|
||
// server_disks INSERT (배열)
|
||
if (Array.isArray(fs) && fs.length > 0) {
|
||
console.log(`[${now}] 💾 [${target.server_name}] disk 저장 (${fs.length}개 파티션)`)
|
||
for (const disk of fs) {
|
||
await execute(`
|
||
INSERT INTO server_disks (
|
||
target_id, device_name, mount_point, fs_type,
|
||
disk_total, disk_used, disk_percent, collected_at
|
||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||
`, [
|
||
target.target_id,
|
||
disk.device_name || null,
|
||
disk.mnt_point || null,
|
||
disk.fs_type || null,
|
||
disk.size || null,
|
||
disk.used || null,
|
||
disk.percent || null,
|
||
now
|
||
])
|
||
}
|
||
}
|
||
|
||
// server_containers INSERT (배열)
|
||
if (Array.isArray(docker) && docker.length > 0) {
|
||
console.log(`[${now}] 🐳 [${target.server_name}] container 저장 (${docker.length}개 컨테이너)`)
|
||
for (const container of docker) {
|
||
await execute(`
|
||
INSERT INTO server_containers (
|
||
target_id, docker_id, container_name, container_image,
|
||
container_status, cpu_percent, memory_usage, memory_limit,
|
||
memory_percent, uptime, network_rx, network_tx, collected_at
|
||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||
`, [
|
||
target.target_id,
|
||
container.id || null,
|
||
container.name || null,
|
||
Array.isArray(container.image) ? container.image.join(', ') : container.image || null,
|
||
container.status || null,
|
||
container.cpu?.total ?? container.cpu_percent ?? null,
|
||
container.memory?.usage || container.memory_usage || null,
|
||
container.memory?.limit || container.memory_limit || null,
|
||
container.memory?.usage && container.memory?.limit
|
||
? (container.memory.usage / container.memory.limit * 100)
|
||
: container.memory_percent ?? null,
|
||
container.uptime || null,
|
||
container.network?.rx ?? container.network_rx ?? null,
|
||
container.network?.tx ?? container.network_tx ?? null,
|
||
now
|
||
])
|
||
}
|
||
}
|
||
|
||
// server_networks INSERT (배열)
|
||
if (Array.isArray(network) && network.length > 0) {
|
||
console.log(`[${now}] 🌐 [${target.server_name}] network 저장 (${network.length}개 인터페이스)`)
|
||
for (const iface of network) {
|
||
await execute(`
|
||
INSERT INTO server_networks (
|
||
target_id, interface_name, bytes_recv, bytes_sent,
|
||
packets_recv, packets_sent, speed_recv, speed_sent,
|
||
is_up, collected_at
|
||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||
`, [
|
||
target.target_id,
|
||
iface.interface_name || null,
|
||
iface.bytes_recv || iface.cumulative_rx || null,
|
||
iface.bytes_sent || iface.cumulative_tx || null,
|
||
iface.packets_recv || null,
|
||
iface.packets_sent || null,
|
||
iface.bytes_recv_rate_per_sec || iface.rx || iface.bytes_recv_rate || null,
|
||
iface.bytes_sent_rate_per_sec || iface.tx || iface.bytes_sent_rate || null,
|
||
iface.is_up ? 1 : 0,
|
||
now
|
||
])
|
||
}
|
||
}
|
||
|
||
console.log(`[${now}] ✅ [${target.server_name}] 수집 완료!`)
|
||
|
||
// 이상감지 실행
|
||
await detectAnomalies(target.target_id, target.server_name)
|
||
|
||
} catch (err) {
|
||
console.error(`[${now}] ❌ [${target.server_name}] 수집 에러:`, err)
|
||
|
||
// 캐시 클리어
|
||
apiVersionCache.delete(target.target_id)
|
||
|
||
// 오프라인 기록
|
||
await execute(`
|
||
INSERT INTO server_snapshots (target_id, is_online, collected_at)
|
||
VALUES ($1, 0, $2)
|
||
`, [target.target_id, now])
|
||
}
|
||
}
|
||
|
||
|
||
// 서버별 타이머 시작
|
||
function startServerTimer(target: ServerTarget) {
|
||
const now = timestamp()
|
||
|
||
// 기존 타이머 제거
|
||
stopServerTimer(target.target_id)
|
||
|
||
console.log(`[${now}] ⏰ [${target.server_name}] 타이머 등록 (주기: ${target.collect_interval}초)`)
|
||
|
||
// 즉시 한 번 실행
|
||
collectServerData(target)
|
||
|
||
// 주기적 실행
|
||
const intervalMs = (target.collect_interval || 60) * 1000
|
||
const timer = setInterval(() => {
|
||
collectServerData(target)
|
||
}, intervalMs)
|
||
|
||
serverTimers.set(target.target_id, timer)
|
||
}
|
||
|
||
// 서버별 타이머 중지
|
||
function stopServerTimer(targetId: number) {
|
||
const timer = serverTimers.get(targetId)
|
||
if (timer) {
|
||
clearInterval(timer)
|
||
serverTimers.delete(targetId)
|
||
apiVersionCache.delete(targetId)
|
||
console.log(`[${timestamp()}] ⏹️ 타이머 중지 (target_id: ${targetId})`)
|
||
}
|
||
}
|
||
|
||
// 스케줄러 시작 (모든 활성 서버)
|
||
export async function startServerScheduler() {
|
||
const now = timestamp()
|
||
|
||
if (isRunning) {
|
||
console.log(`[${now}] ⚠️ [Server Scheduler] 이미 실행 중`)
|
||
return
|
||
}
|
||
|
||
console.log(`[${now}] 🚀 [Server Scheduler] ========== 스케줄러 시작 ==========`)
|
||
|
||
const targets = await query<ServerTarget>(`
|
||
SELECT * FROM server_targets WHERE is_active = 1
|
||
`)
|
||
|
||
console.log(`[${now}] 📋 [Server Scheduler] 활성 서버: ${targets.length}개`)
|
||
|
||
for (const target of targets) {
|
||
console.log(`[${now}] 📋 [Server Scheduler] - ${target.server_name} (${target.glances_url}) / ${target.collect_interval}초`)
|
||
startServerTimer(target)
|
||
}
|
||
|
||
isRunning = true
|
||
console.log(`[${now}] ✅ [Server Scheduler] ========== 스케줄러 시작 완료 ==========`)
|
||
}
|
||
|
||
// 스케줄러 중지 (모든 서버)
|
||
export function stopServerScheduler() {
|
||
const now = timestamp()
|
||
|
||
console.log(`[${now}] 🛑 [Server Scheduler] ========== 스케줄러 중지 ==========`)
|
||
|
||
for (const [targetId] of serverTimers) {
|
||
stopServerTimer(targetId)
|
||
}
|
||
|
||
isRunning = false
|
||
console.log(`[${now}] ✅ [Server Scheduler] ========== 스케줄러 중지 완료 ==========`)
|
||
}
|
||
|
||
// 스케줄러 상태
|
||
export async function getServerSchedulerStatus() {
|
||
const activeServers = serverTimers.size
|
||
const targets = await query<ServerTarget>(`
|
||
SELECT * FROM server_targets WHERE is_active = 1
|
||
`)
|
||
|
||
return {
|
||
is_running: isRunning,
|
||
active_timers: activeServers,
|
||
total_targets: targets.length,
|
||
targets: targets.map(t => ({
|
||
target_id: t.target_id,
|
||
server_name: t.server_name,
|
||
glances_url: t.glances_url,
|
||
collect_interval: t.collect_interval,
|
||
has_timer: serverTimers.has(t.target_id),
|
||
api_version: apiVersionCache.get(t.target_id) || null
|
||
}))
|
||
}
|
||
}
|
||
|
||
// 특정 서버 타이머 갱신 (설정 변경 시)
|
||
export async function refreshServerTimer(targetId: number) {
|
||
const now = timestamp()
|
||
const target = await queryOne<ServerTarget>(`
|
||
SELECT * FROM server_targets WHERE target_id = $1 AND is_active = 1
|
||
`, [targetId])
|
||
|
||
if (target && isRunning) {
|
||
console.log(`[${now}] 🔄 [${target.server_name}] 타이머 갱신`)
|
||
apiVersionCache.delete(targetId) // 버전 재감지
|
||
startServerTimer(target)
|
||
} else {
|
||
stopServerTimer(targetId)
|
||
}
|
||
}
|