/* base4096_native.c
 *
 * Load the canonical frozen Base-4096 alphabet from
 * ./frozen_base4096_alphabet.txt at runtime and provide
 * encode/decode functions compatible with the test harness' names:
 *   char *base4096_c_encode(const uint8_t *buf, size_t len);
 *   uint8_t *base4096_c_decode(const char *s, size_t *out_len);
 *
 * This implementation splits the UTF-8 frozen alphabet into codepoint
 * strings, maps each 12-bit value to the corresponding glyph, and
 * encodes input bytes as MSB-first 12-bit groups. Decoding reverses
 * the process.
 *
 * It intentionally reads the alphabet file at runtime so updates to
 * the frozen alphabet file in the repo are picked up without re-building.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

// Prefix kept for compatibility with existing tests
static const char *PREFIX = "B4096:C:";

// Simple UTF-8 next codepoint length detection
static int utf8_len(const unsigned char *s, size_t avail) {
    if (avail == 0) return 0;
    unsigned char c = s[0];
    if ((c & 0x80) == 0) return 1;
    if ((c & 0xE0) == 0xC0) return (avail >= 2) ? 2 : 0;
    if ((c & 0xF0) == 0xE0) return (avail >= 3) ? 3 : 0;
    if ((c & 0xF8) == 0xF0) return (avail >= 4) ? 4 : 0;
    return 0;
}

// Load the frozen alphabet file into an array of 4096 NUL-terminated
// UTF-8 strings (malloc'ed). Returns entries or NULL on failure.
static char **load_alphabet_from_string(const char *src, size_t src_len, size_t *out_count) {
    // Strip newline and whitespace chars into a working buffer
    char *buf = malloc(src_len + 1);
    if (!buf) return NULL;
    size_t write_i = 0;
    for (size_t i = 0; i < src_len; i++) {
        unsigned char c = (unsigned char)src[i];
        if (c == '\n' || c == '\r' || c == '\t') continue;
        buf[write_i++] = src[i];
    }
    buf[write_i] = '\0';

    char **entries = calloc(4096, sizeof(char*));
    if (!entries) { free(buf); return NULL; }
    size_t pos = 0;
    size_t count = 0;
    while (pos < write_i && count < 4096) {
        int clen = utf8_len((unsigned char*)&buf[pos], write_i - pos);
        if (clen <= 0) clen = 1;
        char *s = malloc((size_t)clen + 1);
        if (!s) break;
        memcpy(s, &buf[pos], (size_t)clen);
        s[clen] = '\0';
        entries[count++] = s;
        pos += (size_t)clen;
    }
    free(buf);
    if (count != 4096) {
        for (size_t i = 0; i < count; i++) free(entries[i]);
        free(entries);
        return NULL;
    }
    *out_count = count;
    return entries;
}

// load from file wrapper
static char **load_alphabet(const char *path, size_t *out_count) {
    FILE *f = fopen(path, "rb");
    if (!f) return NULL;
    if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; }
    long flen = ftell(f);
    if (flen < 0) { fclose(f); return NULL; }
    rewind(f);
    char *buf = malloc((size_t)flen + 1);
    if (!buf) { fclose(f); return NULL; }
    size_t r = fread(buf, 1, (size_t)flen, f);
    buf[r] = '\0';
    fclose(f);
    char **res = load_alphabet_from_string(buf, r, out_count);
    free(buf);
    return res;
}

// Helper to free alphabet entries
static void free_alphabet(char **entries, size_t count) {
    if (!entries) return;
    for (size_t i = 0; i < count; i++) free(entries[i]);
    free(entries);
}

// Fast decode: build a simple open-addressing hash table mapping glyph byte sequences
// to index. Table size is a power-of-two at least 2x the alphabet size.
typedef struct { const char *key; int key_len; int val; } HashEntry;

static size_t hash_table_size_for_count(size_t n) {
    size_t s = 1;
    while (s < n * 2) s <<= 1;
    return s;
}

static unsigned long djb2_hash(const unsigned char *data, int len) {
    unsigned long hash = 5381;
    for (int i = 0; i < len; i++) hash = ((hash << 5) + hash) + data[i];
    return hash;
}

// Build a hash table from entries; returns malloc'd table (caller frees) and sets table_size.
static HashEntry *build_hash_table(char **entries, size_t count, size_t *table_size_out) {
    size_t tsize = hash_table_size_for_count(count);
    HashEntry *table = calloc(tsize, sizeof(HashEntry));
    if (!table) return NULL;
    for (size_t i = 0; i < count; i++) {
        const char *k = entries[i];
        int klen = (int)strlen(k);
        unsigned long h = djb2_hash((const unsigned char*)k, klen);
        size_t idx = h & (tsize - 1);
        while (table[idx].key != NULL) {
            idx = (idx + 1) & (tsize - 1);
        }
        table[idx].key = entries[i];
        table[idx].key_len = klen;
        table[idx].val = (int)i;
    }
    *table_size_out = tsize;
    return table;
}

// Lookup in hash table; returns -1 if not found
static int hash_table_lookup(HashEntry *table, size_t tsize, const char *s, int slen) {
    unsigned long h = djb2_hash((const unsigned char*)s, slen);
    size_t idx = h & (tsize - 1);
    size_t start = idx;
    while (table[idx].key != NULL) {
        if (table[idx].key_len == slen && memcmp(table[idx].key, s, slen) == 0) return table[idx].val;
        idx = (idx + 1) & (tsize - 1);
        if (idx == start) break;
    }
    return -1;
}

// Exposed API matching the placeholder names in ap_serialize.c
// Encode bytes into Base4096 string.
char *base4096_c_encode(const uint8_t *buf, size_t len) {
    if (!buf) return NULL;
    size_t alph_count = 0;
#ifdef EMBED_FROZEN_ALPHABET
    extern const char FROZEN_BASE4096_ALPHABET[];
    size_t src_len = strlen(FROZEN_BASE4096_ALPHABET);
    char **alphabet = load_alphabet_from_string(FROZEN_BASE4096_ALPHABET, src_len, &alph_count);
#else
    char **alphabet = load_alphabet("frozen_base4096_alphabet.txt", &alph_count);
#endif
    if (!alphabet) return NULL;

    size_t total_bits = len * 8;
    size_t groups = (total_bits + 11) / 12; // ceil(bits/12)
    size_t out_chars = groups;
    size_t prefix_len = strlen(PREFIX);
    size_t reserve = prefix_len + out_chars * 4 + 1;
    char *out = malloc(reserve);
    if (!out) { free_alphabet(alphabet, alph_count); return NULL; }
    size_t out_pos = 0;
    memcpy(out + out_pos, PREFIX, prefix_len); out_pos += prefix_len;

    uint32_t bitbuf = 0;
    int bitcount = 0;
    for (size_t i = 0; i < len; i++) {
        bitbuf = (bitbuf << 8) | buf[i];
        bitcount += 8;
        while (bitcount >= 12) {
            int shift = bitcount - 12;
            uint32_t group = (bitbuf >> shift) & 0xFFF;
            char *glyph = alphabet[group];
            size_t gsz = strlen(glyph);
            if (out_pos + gsz + 1 > reserve) {
                reserve = reserve * 2 + gsz + 16;
                char *tmp = realloc(out, reserve);
                if (!tmp) { free(out); free_alphabet(alphabet, alph_count); return NULL; }
                out = tmp;
            }
            memcpy(out + out_pos, glyph, gsz); out_pos += gsz;
            bitcount -= 12;
            if (shift > 0) bitbuf &= ((1u << shift) - 1u);
            else bitbuf = 0;
        }
    }
    if (bitcount > 0) {
        uint32_t group = (bitbuf << (12 - bitcount)) & 0xFFF;
        char *glyph = alphabet[group];
        size_t gsz = strlen(glyph);
        if (out_pos + gsz + 1 > reserve) {
            reserve = reserve * 2 + gsz + 16;
            char *tmp = realloc(out, reserve);
            if (!tmp) { free(out); free_alphabet(alphabet, alph_count); return NULL; }
            out = tmp;
        }
        memcpy(out + out_pos, glyph, gsz); out_pos += gsz;
    }
    out[out_pos] = '\0';

    free_alphabet(alphabet, alph_count);
    return out;
}

uint8_t *base4096_c_decode(const char *s, size_t *out_len) {
    if (!s || !out_len) return NULL;
    size_t prefix_len = strlen(PREFIX);
    if (strncmp(s, PREFIX, prefix_len) != 0) return NULL;
    const char *p = s + prefix_len;
    // load alphabet (embedded or file)
    size_t alph_count = 0;
#ifdef EMBED_FROZEN_ALPHABET
    extern const char FROZEN_BASE4096_ALPHABET[];
    size_t src_len = strlen(FROZEN_BASE4096_ALPHABET);
    char **alphabet = load_alphabet_from_string(FROZEN_BASE4096_ALPHABET, src_len, &alph_count);
#else
    char **alphabet = load_alphabet("frozen_base4096_alphabet.txt", &alph_count);
#endif
    if (!alphabet) return NULL;

    // Build hash table for fast glyph->index lookup
    size_t htable_size = 0;
    HashEntry *htable = build_hash_table(alphabet, alph_count, &htable_size);
    if (!htable) { free_alphabet(alphabet, alph_count); return NULL; }

    // First pass: iterate UTF-8 glyphs from p and map to group indices
    const unsigned char *up = (const unsigned char *)p;
    size_t pos = 0;
    uint16_t *groups = NULL;
    size_t groups_len = 0;
    while (up[pos] != '\0') {
        int clen = utf8_len(&up[pos], strlen((const char*)&up[pos]));
        if (clen <= 0) clen = 1;
        int idx = hash_table_lookup(htable, htable_size, (const char*)&up[pos], clen);
        if (idx < 0) { free_alphabet(alphabet, alph_count); free(htable); free(groups); return NULL; }
        uint16_t v = (uint16_t)idx;
        uint16_t *tmp = realloc(groups, (groups_len + 1) * sizeof(uint16_t));
        if (!tmp) { free_alphabet(alphabet, alph_count); free(htable); free(groups); return NULL; }
        groups = tmp; groups[groups_len++] = v;
        pos += (size_t)clen;
    }

    // total bits = groups_len * 12
    size_t total_bits = groups_len * 12;
    size_t bytes = (total_bits + 7) / 8;
    uint8_t *out = calloc(bytes, 1);
    if (!out) { free_alphabet(alphabet, alph_count); free(groups); return NULL; }

    // reconstruct bytes MSB-first
    uint64_t bitbuf = 0;
    int bitcount = 0;
    size_t out_i = 0;
    for (size_t gi = 0; gi < groups_len; gi++) {
        uint16_t gv = groups[gi] & 0x0FFFu;
        bitbuf = (bitbuf << 12) | gv;
        bitcount += 12;
        while (bitcount >= 8) {
            int shift = bitcount - 8;
            uint8_t b = (uint8_t)((bitbuf >> shift) & 0xFF);
            if (out_i < bytes) out[out_i++] = b;
            bitcount -= 8;
            if (shift > 0) bitbuf &= ((1ull << shift) - 1ull);
            else bitbuf = 0;
        }
    }

    *out_len = out_i;
    free_alphabet(alphabet, alph_count);
    free(htable);
    free(groups);
    return out;
}
