#!/bin/bash
# HDGL Resource Monitor
# Monitors and enforces storage caps across all services
# BULLETPROOF MODE: Never fails, always reports

# BULLETPROOF: Don't exit on error
set +e

LOG_FILE="/var/log/hdgl/resource-monitor.log"
ALERT_THRESHOLD=95  # Alert if over 95% of cap

# BULLETPROOF: Ensure log directory exists with fallback
if ! mkdir -p "$(dirname "$LOG_FILE")" 2>/dev/null; then
    LOG_FILE="/tmp/resource-monitor.log"
    mkdir -p "$(dirname "$LOG_FILE")" 2>/dev/null || LOG_FILE="/dev/stdout"
fi

# Color output
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

echo "=== HDGL Resource Monitor ===" | tee -a "$LOG_FILE"
echo "Time: $(date)" | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"

# BULLETPROOF: Function to check storage with error handling
check_storage() {
    local name=$1
    local path=$2
    local cap_mb=$3

    if [ ! -d "$path" ]; then
        echo "⚠️  $name: Directory not found: $path" | tee -a "$LOG_FILE" 2>/dev/null || echo "⚠️  $name: Directory not found: $path"
        return
    fi

    local size_mb=$(du -sm "$path" 2>/dev/null | cut -f1 || echo 0)
    local percent=0

    # BULLETPROOF: Avoid division by zero
    if [ "$cap_mb" -gt 0 ]; then
        percent=$((size_mb * 100 / cap_mb))
    fi

    local color=$GREEN
    local status="✅"

    if [ "$percent" -ge 100 ]; then
        color=$RED
        status="❌ OVER CAP"
    elif [ "$percent" -ge "$ALERT_THRESHOLD" ]; then
        color=$YELLOW
        status="⚠️  WARNING"
    fi

    echo -e "${color}${status} $name: ${size_mb}MB / ${cap_mb}MB (${percent}%)${NC}" | tee -a "$LOG_FILE" 2>/dev/null || echo "$status $name: ${size_mb}MB / ${cap_mb}MB (${percent}%)"
}

# Check POA nodes (1GB each)
echo "🔗 POA Blockchain Storage:" | tee -a "$LOG_FILE"
check_storage "POA-RPC" "/var/lib/lxc/poa-rpc-lxc/rootfs/root/.ethereum" 1000
check_storage "POA-Miner1" "/var/lib/lxc/poa-miner1-lxc/rootfs/root/.ethereum" 1000
check_storage "POA-Miner2" "/var/lib/lxc/poa-miner2-lxc/rootfs/root/.ethereum" 1000
echo "" | tee -a "$LOG_FILE"

# Check HDGL checkpoints (1GB)
echo "📊 HDGL Checkpoints:" | tee -a "$LOG_FILE"
check_storage "Checkpoints" "/var/lib/lxc/hdgl-bridge-lxc/rootfs/app/data/checkpoints" 1000
echo "" | tee -a "$LOG_FILE"

# Check IPFS (1GB)
echo "🌐 IPFS Storage:" | tee -a "$LOG_FILE"
check_storage "IPFS" "/var/lib/lxc/hdgl-bridge-lxc/rootfs/app/data/ipfs" 1000
echo "" | tee -a "$LOG_FILE"

# Check logs (500MB total)
echo "📝 Log Files:" | tee -a "$LOG_FILE"
check_storage "Logs" "/var/log/hdgl" 500
echo "" | tee -a "$LOG_FILE"

# BULLETPROOF: Check permanent snapshots (not subject to caps)
echo "📸 Permanent Snapshots:" | tee -a "$LOG_FILE"
CHECKPOINT_SNAPSHOTS="/var/lib/lxc/hdgl-bridge-lxc/rootfs/app/data/snapshots/permanent"
POA_SNAPSHOTS="/var/lib/lxc/hdgl-bridge-lxc/rootfs/app/data/snapshots/poa"

if [ -d "$CHECKPOINT_SNAPSHOTS" ]; then
    SNAP_COUNT=$(find "$CHECKPOINT_SNAPSHOTS" -name "permanent_snapshot_*.tar*" -type f 2>/dev/null | wc -l || echo 0)
    SNAP_SIZE=$(du -sm "$CHECKPOINT_SNAPSHOTS" 2>/dev/null | cut -f1 || echo 0)
    echo "  Checkpoint Snapshots: ${SNAP_COUNT} files (${SNAP_SIZE}MB) - PROTECTED" | tee -a "$LOG_FILE" 2>/dev/null
else
    echo "  Checkpoint Snapshots: Not configured" | tee -a "$LOG_FILE" 2>/dev/null
fi

if [ -d "$POA_SNAPSHOTS" ]; then
    POA_SNAP_COUNT=$(find "$POA_SNAPSHOTS" -name "poa_snapshot_*.tar*" -type f 2>/dev/null | wc -l || echo 0)
    POA_SNAP_SIZE=$(du -sm "$POA_SNAPSHOTS" 2>/dev/null | cut -f1 || echo 0)
    echo "  POA Snapshots: ${POA_SNAP_COUNT} files (${POA_SNAP_SIZE}MB) - PROTECTED" | tee -a "$LOG_FILE" 2>/dev/null
else
    echo "  POA Snapshots: Not configured" | tee -a "$LOG_FILE" 2>/dev/null
fi
echo "" | tee -a "$LOG_FILE"

# Memory usage
echo "💾 Memory Usage:" | tee -a "$LOG_FILE"
free -h | grep -E "Mem|Swap" | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"

# Container status
echo "📦 Container Status:" | tee -a "$LOG_FILE"
lxc-ls -f | grep -E "NAME|hdgl|poa" | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"

# Check for services over cap
POA_RPC_SIZE=$(du -sm "/var/lib/lxc/poa-rpc-lxc/rootfs/root/.ethereum" 2>/dev/null | cut -f1 || echo 0)
POA_M1_SIZE=$(du -sm "/var/lib/lxc/poa-miner1-lxc/rootfs/root/.ethereum" 2>/dev/null | cut -f1 || echo 0)
POA_M2_SIZE=$(du -sm "/var/lib/lxc/poa-miner2-lxc/rootfs/root/.ethereum" 2>/dev/null | cut -f1 || echo 0)
CHECKPOINT_SIZE=$(du -sm "/var/lib/lxc/hdgl-bridge-lxc/rootfs/app/data/checkpoints" 2>/dev/null | cut -f1 || echo 0)
IPFS_SIZE=$(du -sm "/var/lib/lxc/hdgl-bridge-lxc/rootfs/app/data/ipfs" 2>/dev/null | cut -f1 || echo 0)

# BULLETPROOF: Auto-remediation with error handling
echo "🔧 Auto-remediation:" | tee -a "$LOG_FILE"

REMEDIATION_PERFORMED=0

if [ "$POA_RPC_SIZE" -gt 1000 ]; then
    echo "  Running POA-RPC pruning..." | tee -a "$LOG_FILE"
    if lxc-attach -n poa-rpc-lxc -- /app/config/poa-config/prune-blockchain.sh 2>/dev/null; then
        REMEDIATION_PERFORMED=1
        echo "    ✅ POA-RPC pruning complete" | tee -a "$LOG_FILE"
    else
        echo "    ⚠️  POA-RPC pruning failed, will retry next cycle" | tee -a "$LOG_FILE"
    fi
fi

if [ "$POA_M1_SIZE" -gt 1000 ]; then
    echo "  Running POA-Miner1 pruning..." | tee -a "$LOG_FILE"
    if lxc-attach -n poa-miner1-lxc -- /app/config/poa-config/prune-blockchain.sh 2>/dev/null; then
        REMEDIATION_PERFORMED=1
        echo "    ✅ POA-Miner1 pruning complete" | tee -a "$LOG_FILE"
    else
        echo "    ⚠️  POA-Miner1 pruning failed, will retry next cycle" | tee -a "$LOG_FILE"
    fi
fi

if [ "$POA_M2_SIZE" -gt 1000 ]; then
    echo "  Running POA-Miner2 pruning..." | tee -a "$LOG_FILE"
    if lxc-attach -n poa-miner2-lxc -- /app/config/poa-config/prune-blockchain.sh 2>/dev/null; then
        REMEDIATION_PERFORMED=1
        echo "    ✅ POA-Miner2 pruning complete" | tee -a "$LOG_FILE"
    else
        echo "    ⚠️  POA-Miner2 pruning failed, will retry next cycle" | tee -a "$LOG_FILE"
    fi
fi

if [ "$CHECKPOINT_SIZE" -gt 1000 ]; then
    echo "  Running checkpoint compression..." | tee -a "$LOG_FILE"
    if lxc-attach -n hdgl-bridge-lxc -- /app/config/poa-config/compress-checkpoints.sh 2>/dev/null; then
        REMEDIATION_PERFORMED=1
        echo "    ✅ Checkpoint compression complete" | tee -a "$LOG_FILE"
    else
        echo "    ⚠️  Checkpoint compression failed, will retry next cycle" | tee -a "$LOG_FILE"
    fi
fi

if [ "$IPFS_SIZE" -gt 1000 ]; then
    echo "  Running IPFS garbage collection..." | tee -a "$LOG_FILE"
    if lxc-attach -n hdgl-bridge-lxc -- ipfs repo gc 2>/dev/null; then
        REMEDIATION_PERFORMED=1
        echo "    ✅ IPFS GC complete" | tee -a "$LOG_FILE"
    else
        echo "    ⚠️  IPFS GC failed, will retry next cycle" | tee -a "$LOG_FILE"
    fi
fi

if [ "$REMEDIATION_PERFORMED" -eq 0 ]; then
    echo "  No remediation needed" | tee -a "$LOG_FILE"
fi

echo "" | tee -a "$LOG_FILE"
echo "=== Monitor Complete ===" | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"

# BULLETPROOF: Always exit success (never kill the service)
exit 0
