/**
 * Debug tool to analyze entropy of mixed data
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>

float calculate_entropy(const uint8_t *data, size_t size) {
    if (size == 0) return 0.0f;

    uint32_t freq[256] = {0};
    for (size_t i = 0; i < size; i++) {
        freq[data[i]]++;
    }

    float entropy = 0.0f;
    for (int i = 0; i < 256; i++) {
        if (freq[i] > 0) {
            float p = (float)freq[i] / size;
            entropy -= p * log2f(p);
        }
    }

    return entropy;
}

void generate_mixed_data(uint8_t *data, size_t size) {
    size_t structured_size = size * 30 / 100;
    size_t correlated_size = size * 40 / 100;
    size_t random_size = size - structured_size - correlated_size;

    printf("Generating %zu bytes: %zu structured + %zu correlated + %zu random\n",
           size, structured_size, correlated_size, random_size);

    // Part 1: Structured repeated patterns
    for (size_t i = 0; i < structured_size; i++) {
        data[i] = (i / 1024) % 256;
    }

    // Part 2: Correlated time-series
    double *values = (double*)(data + structured_size);
    size_t count = correlated_size / sizeof(double);
    double base_value = 20.5;
    double drift = 0.0;

    for (size_t i = 0; i < count; i++) {
        drift += ((rand() % 100) - 50) * 0.001;
        double noise = ((rand() % 100) - 50) * 0.01;
        values[i] = base_value + drift + noise;
    }

    // Part 3: Random
    for (size_t i = structured_size + correlated_size; i < size; i++) {
        data[i] = rand() % 256;
    }
}

int main() {
    size_t test_size = 10 * 1024 * 1024; /* 10MB */
    uint8_t *data = malloc(test_size);

    srand(42);
    generate_mixed_data(data, test_size);

    printf("\n=== ENTROPY ANALYSIS ===\n\n");

    // Full file entropy
    float full_entropy = calculate_entropy(data, test_size);
    printf("Full file entropy: %.2f bits/byte\n", full_entropy);

    // Structured section
    size_t structured_size = test_size * 30 / 100;
    float structured_entropy = calculate_entropy(data, structured_size);
    printf("Structured section entropy: %.2f bits/byte\n", structured_entropy);

    // Correlated section
    size_t correlated_size = test_size * 40 / 100;
    float correlated_entropy = calculate_entropy(data + structured_size, correlated_size);
    printf("Correlated section entropy: %.2f bits/byte\n", correlated_entropy);

    // Random section
    size_t random_size = test_size - structured_size - correlated_size;
    float random_entropy = calculate_entropy(data + structured_size + correlated_size, random_size);
    printf("Random section entropy: %.2f bits/byte\n", random_entropy);

    printf("\n");
    printf("Threshold analysis:\n");
    printf("  entropy >= 7.8: Skip compression (currently: %.2f)\n", full_entropy);
    printf("  entropy 5.0-7.8: Try compression (currently: %s)\n",
           (full_entropy >= 5.0 && full_entropy < 7.8) ? "YES" : "NO");

    // Segment analysis (like improved Wu-Wei)
    printf("\n=== SEGMENT ANALYSIS (256KB chunks) ===\n\n");
    size_t segment_size = 256 * 1024;
    size_t num_segments = 8;
    float min_entropy = 10.0f, max_entropy = 0.0f, avg_entropy = 0.0f;

    for (size_t i = 0; i < num_segments; i++) {
        size_t offset = i * segment_size;
        float seg_entropy = calculate_entropy(data + offset, segment_size);
        avg_entropy += seg_entropy;
        if (seg_entropy < min_entropy) min_entropy = seg_entropy;
        if (seg_entropy > max_entropy) max_entropy = seg_entropy;
        printf("Segment %zu: %.2f bits/byte\n", i, seg_entropy);
    }
    avg_entropy /= num_segments;

    printf("\nSegment summary:\n");
    printf("  Min: %.2f, Max: %.2f, Avg: %.2f\n", min_entropy, max_entropy, avg_entropy);
    printf("  Using avg (%.2f) >= 7.8? %s (would skip compression)\n",
           avg_entropy, avg_entropy >= 7.8 ? "YES" : "NO");

    free(data);
    return 0;
}
