# freeze_base4096_alphabet_deterministic.py
import json
import unicodedata
import os

# --- Configuration ---
SEED = (
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    "!@#$%^&*()-_+=[{]};:',\"<>?/`|~"
    "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿"
    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß"
    "àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
)

EXCLUDE_CATEGORIES = {"Cc", "Cf", "Cs", "Cn", "Co", "Zs", "Zl", "Zp", "Mc", "Mn", "Me"}
EXCLUDE_BIDI = {"R", "AL", "AN", "RLE", "RLO", "LRE", "LRO"}

INCLUDE_SUPPLEMENTARY = False  # Set True to include 0x10000–0x10FFFF

OUTPUT_DIR = os.path.dirname(os.path.abspath(__file__))

# --- Helper Functions ---
def is_valid_char(c: str) -> bool:
    """Return True if character is valid for Base-4096 alphabet."""
    c = unicodedata.normalize('NFC', c)
    if len(c) != 1:  # must be a single Unicode codepoint
        return False
    if unicodedata.combining(c):
        return False
    cat = unicodedata.category(c)
    bidi = unicodedata.bidirectional(c)
    if cat in EXCLUDE_CATEGORIES or bidi in EXCLUDE_BIDI:
        return False
    return True

def generate_frozen_base4096(seed: str) -> str:
    seen = set()
    base_chars = []
    skipped_seed = []

    for ch in seed:
        if len(ch) != 1:
            skipped_seed.append(ch)
            continue
        if ch in seen:
            skipped_seed.append(ch)
        elif is_valid_char(ch):
            seen.add(ch)
            base_chars.append(ch)
        else:
            skipped_seed.append(ch)

    for codepoint in range(0x20, 0x10000):
        if len(base_chars) >= 4096:
            break
        c = chr(codepoint)
        if c in seen:
            continue
        if not is_valid_char(c):
            continue
        base_chars.append(c)
        seen.add(c)

    if len(base_chars) != 4096:
        raise ValueError(f"Only generated {len(base_chars)} valid characters.")

    return ''.join(base_chars)

def save_alphabet(frozen_alphabet: str):
    # Save plain text
    txt_path = os.path.join(OUTPUT_DIR, "frozen_base4096_alphabet.txt")
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(frozen_alphabet)

    # Save Python constant
    py_path = os.path.join(OUTPUT_DIR, "frozen_base4096_alphabet.py")
    with open(py_path, "w", encoding="utf-8") as f:
        f.write("# frozen_base4096_alphabet.py\n")
        f.write("# Deterministic Base-4096 Alphabet (BMP first, surrogate-safe)\n\n")
        f.write("FROZEN_BASE4096_ALPHABET = (\n")
        for i in range(0, 4096, 64):
            chunk = frozen_alphabet[i:i+64]
            f.write(f"    {json.dumps(chunk, ensure_ascii=False)}\n")
        f.write(")\n")

    # Save JSON array
    json_path = os.path.join(OUTPUT_DIR, "frozen_base4096_alphabet.json")
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(list(frozen_alphabet), f, ensure_ascii=False, indent=2)

    print(f"📂 Alphabet saved as TXT, Python module, and JSON in {OUTPUT_DIR}")

# --- Main Execution ---
if __name__ == "__main__":
    frozen_alphabet = generate_frozen_base4096(SEED)
    save_alphabet(frozen_alphabet)
    print("Length:", len(frozen_alphabet), "Unique:", len(set(frozen_alphabet)))
