# freeze_base4096_alphabet_visible.py
import unicodedata
import json

SEED = (
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    "!@#$%^&*()-_+=[{]};:',\"<>?/`|~"
)

def is_valid_char(c, ascii_seen, skeletons_seen):
    try:
        # Basic name check (control, surrogate, private, unassigned, tags)
        name = unicodedata.name(c)
        if any(bad in name for bad in ['CONTROL','PRIVATE USE','SURROGATE','UNASSIGNED','TAG']):
            return False

        # Exclude standard whitespace and zero-width / formatting characters
        cat = unicodedata.category(c)
        if cat in ['Zs','Cc','Cf','Cs','Cn','Co']:  # space, control, format, surrogate, unassigned, private
            return False

        # Exclude combining marks
        if cat in ['Mn','Mc','Me']:
            return False

        # For ASCII characters: filter confusables
        if ord(c) < 128:
            if c in ascii_seen:
                return False
            ascii_seen.add(c)
        else:
            # For non-ASCII: generate skeleton, only reject if skeleton already exists
            skeleton = unicodedata.normalize('NFKD', c)
            skeleton = ''.join(ch for ch in skeleton if ord(ch) < 128).lower()
            if skeleton and skeleton in skeletons_seen:
                return False
            skeletons_seen.add(skeleton)

        return True
    except ValueError:
        return False

def generate_frozen_base4096(seed):
    seen = set()
    base_chars = []
    ascii_seen = set()
    skeletons_seen = set()

    # Preserve seed order
    for ch in seed:
        if ch not in seen and is_valid_char(ch, ascii_seen, skeletons_seen):
            seen.add(ch)
            base_chars.append(ch)

    # Iterate through Unicode planes (up to 0x10FFFF)
    for codepoint in range(0x20, 0x110000):
        c = chr(codepoint)
        if c not in seen and is_valid_char(c, ascii_seen, skeletons_seen):
            base_chars.append(c)
            seen.add(c)
            if len(base_chars) == 4096:
                break

    if len(base_chars) != 4096:
        raise ValueError(f"Only generated {len(base_chars)} valid characters.")
    return ''.join(base_chars)

# Generate
frozen_alphabet = generate_frozen_base4096(SEED)

# Save as plain text (continuous, no line breaks)
with open("frozen_base4096_alphabet.txt","w",encoding="utf-8") as f:
    f.write(frozen_alphabet)

# Save as Python constant (escaped safely)
with open("frozen_base4096_alphabet.py","w",encoding="utf-8") as f:
    f.write("# frozen_base4096_alphabet.py\n")
    f.write("# Canonical Base-4096 Alphabet (fully visible, frozen, deterministic)\n\n")
    f.write("FROZEN_BASE4096_ALPHABET = (\n")
    for i in range(0, 4096, 64):
        chunk = frozen_alphabet[i:i+64]
        f.write(f"    {json.dumps(chunk, ensure_ascii=False)}\n")
    f.write(")\n")

print("✅ Fully visible canonical Base-4096 alphabet exported.")
print("Length:", len(frozen_alphabet), "Unique:", len(set(frozen_alphabet)))
