# freeze_base4096_alphabet_deterministic.py
import json
import unicodedata

# Extended seed: digits, ASCII letters, common symbols, plus verified Unicode chars
SEED = (
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    "!@#$%^&*()-_+=[{]};:',\"<>?/`|~"
    "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿"
    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß"
    "àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
    # You can append further verified Unicode ranges here
)

# Categories to exclude
EXCLUDE_CATEGORIES = {"Cc", "Cf", "Cs", "Cn", "Co", "Zs", "Zl", "Zp", "Mc", "Mn", "Me"}
# Bidi classes to exclude
EXCLUDE_BIDI = {"R", "AL", "AN", "RLE", "RLO", "LRE", "LRO"}

def is_valid_char(c):
    cat = unicodedata.category(c)
    bidi = unicodedata.bidirectional(c)
    if cat in EXCLUDE_CATEGORIES or bidi in EXCLUDE_BIDI:
        return False
    if unicodedata.combining(c) != 0:
        return False
    return True

def generate_frozen_base4096(seed):
    seen = set()
    base_chars = []

    # Preserve seed order first
    for ch in seed:
        if ch not in seen and is_valid_char(ch):
            seen.add(ch)
            base_chars.append(ch)

    # Fill remaining slots in deterministic BMP order
    for codepoint in range(0x20, 0x10000):
        c = chr(codepoint)
        if c not in seen and is_valid_char(c):
            base_chars.append(c)
            seen.add(c)
            if len(base_chars) == 4096:
                break

    if len(base_chars) != 4096:
        raise ValueError(f"Only generated {len(base_chars)} valid characters.")
    return ''.join(base_chars)

# Generate frozen alphabet
frozen_alphabet = generate_frozen_base4096(SEED)

# Save plain text
with open("frozen_base4096_alphabet.txt", "w", encoding="utf-8") as f:
    f.write(frozen_alphabet)

# Save Python constant
with open("frozen_base4096_alphabet.py", "w", encoding="utf-8") as f:
    f.write("# frozen_base4096_alphabet.py\n")
    f.write("# Deterministic Base-4096 Alphabet (BMP only, surrogate-safe)\n\n")
    f.write("FROZEN_BASE4096_ALPHABET = (\n")
    for i in range(0, 4096, 64):
        chunk = frozen_alphabet[i:i+64]
        f.write(f"    {json.dumps(chunk, ensure_ascii=False)}\n")
    f.write(")\n")

print("✅ Deterministic Base-4096 alphabet exported.")
print("Length:", len(frozen_alphabet), "Unique:", len(set(frozen_alphabet)))
