Files
Jonathan Tinney 7971961166
Some checks failed
Analyze modified files / flake8 (push) Failing after 2m28s
Build / build-win (push) Has been cancelled
Build / build-ubuntu2204 (push) Has been cancelled
ctest / Test C++ ubuntu-latest (push) Has been cancelled
ctest / Test C++ windows-latest (push) Has been cancelled
Analyze modified files / mypy (push) Has been cancelled
Build and Publish Docker Images / Push Docker image to Docker Hub (push) Successful in 5m4s
Native Code Static Analysis / scan-build (push) Failing after 5m2s
type check / pyright (push) Successful in 1m7s
unittests / Test Python 3.11.2 ubuntu-latest (push) Failing after 16m23s
unittests / Test Python 3.12 ubuntu-latest (push) Failing after 28m19s
unittests / Test Python 3.13 ubuntu-latest (push) Failing after 14m49s
unittests / Test hosting with 3.13 on ubuntu-latest (push) Successful in 5m0s
unittests / Test Python 3.13 macos-latest (push) Has been cancelled
unittests / Test Python 3.11 windows-latest (push) Has been cancelled
unittests / Test Python 3.13 windows-latest (push) Has been cancelled
add schedule I, sonic 1/frontiers/heroes, spirit island
2026-04-02 23:46:36 -07:00

312 lines
11 KiB
Python

import re
from collections import defaultdict
from functools import lru_cache
from typing import List, Union, Optional
from . import char_table, kanji_table, text_offset_split_index_seasons, text_offset_1_table_address_seasons, text_offset_2_table_address_seasons, \
text_table_eng_address_seasons, \
text_addresses_limit_seasons, text_offset_split_index_ages, text_offset_1_table_address_ages, text_offset_2_table_address_ages, text_table_eng_address_ages, \
text_addresses_limit_ages
from ..RomData import RomData
from ..Util import simple_hex
from ..z80asm.Assembler import GameboyAddress
control_sequence_pattern = re.compile(r"""
\\
(jump|cmd|col|charsfx|speed|pos|wait|sfx|call)
\(([^)]+)\) |
\\(link_name|child_name|w7SecretBuffer1|w7SecretBuffer2|
num1|opt|stop|heartpiece|num2|slow)
""", re.VERBOSE)
dict_pattern = re.compile(r"DICT(\d+)_([0-9a-f]+)")
def add_to_tree(tree: dict[str, list[int]], char: str, keys: list[int]):
tree[char] = keys
def build_encoding_dict() -> dict[str, list[int]]:
tree = {}
for i in range(len(char_table)):
char = char_table[i]
if char != "🚫" and char != "":
add_to_tree(tree, char, [i])
for i in range(len(kanji_table)):
char = kanji_table[i]
if char != "":
add_to_tree(tree, char, [0x06, i])
add_to_tree(tree, "jump", [0x07, 0x00])
add_to_tree(tree, "cmd", [0x08, 0x00])
add_to_tree(tree, "", [0x09, 0x00])
add_to_tree(tree, "🟥", [0x09, 0x01])
add_to_tree(tree, "🟧", [0x09, 0x02])
add_to_tree(tree, "🟦", [0x09, 0x03])
add_to_tree(tree, "🟩", [0x09, 0x04])
add_to_tree(tree, "col", [0x09, 0x00])
add_to_tree(tree, "link_name", [0x0a, 0x00])
add_to_tree(tree, "child_name", [0x0a, 0x01])
add_to_tree(tree, "w7SecretBuffer1", [0x0a, 0x02])
add_to_tree(tree, "w7SecretBuffer2", [0x0a, 0x03])
add_to_tree(tree, "speed", [0x0c, 0x00])
add_to_tree(tree, "num1", [0x0c, 0x08])
add_to_tree(tree, "opt", [0x0c, 0x10])
add_to_tree(tree, "stop", [0x0c, 0x18])
add_to_tree(tree, "pos", [0x0c, 0x20])
add_to_tree(tree, "heartpiece", [0x0c, 0x28])
add_to_tree(tree, "num2", [0x0c, 0x30])
add_to_tree(tree, "slow", [0x0c, 0x38])
add_to_tree(tree, "wait", [0x0d, 0x00])
add_to_tree(tree, "sfx", [0x0e, 0x00])
add_to_tree(tree, "call", [0x0f, 0x00])
add_to_tree(tree, "", [0xb8, 0xb9])
add_to_tree(tree, "", [0xba, 0xbb])
return tree
# --- Trie Data Structure ---
class TrieNode:
def __init__(self):
self.children = defaultdict(lambda: TrieNode())
self.code = None
# --- Global Caches ---
encode_current_trie: Optional[TrieNode] = None
encode_current_encoding: Optional[dict[str, list[int]]] = None
encode_last_ids = (None, None)
control_keywords = {
"link_name", "child_name", "w7SecretBuffer1", "w7SecretBuffer2",
"num1", "opt", "stop", "heartpiece", "num2", "slow"
}
control_functions = {
"jump", "cmd", "col", "charsfx", "speed", "pos", "wait", "sfx", "call"
}
def next_character(text: str, index: int) -> tuple[Union[str, tuple[str, int]], int]:
if index >= len(text):
return "\0", 1
if text[index] != "\\":
return text[index], 1
# Try parsing a function-style command: \name(hex)
for name in control_functions:
if text.startswith(f"\\{name}(", index):
start = index + len(name) + 2 # skip past '\name('
end = start + 2
value = text[start:end]
return (name, int(value, 16)), end + 1 - index
# Try keyword match (e.g. \opt)
for name in control_keywords:
if text.startswith(f"\\{name}", index):
return name, 1 + len(name)
raise Exception()
def build_trie(dictionary: dict[str, str]) -> TrieNode:
root = TrieNode()
for key, value in dictionary.items():
node = root
i = 0
while i < len(value):
token, length = next_character(value, i)
node = node.children[token]
i += length
node.code = [2 + int(key[4]), int(key[6:8], 16)]
return root
@lru_cache
def recursive_encode(text: str, index: int) -> tuple[int]:
if index >= len(text):
return (0,)
token, length = next_character(text, index)
if isinstance(token, tuple):
encoded = list(encode_current_encoding[token[0]])
encoded[-1] += token[1]
if token[0] == "jump":
return tuple(encoded)
else:
if token not in encode_current_encoding:
token = "" # Use a white square to denote unknown characters
encoded = encode_current_encoding[token]
best = list(encoded) + list(recursive_encode(text, index + length))
if token not in encode_current_trie.children:
# No dict entry
return tuple(best)
node = encode_current_trie.children[token]
i = index + length
depth = 1
while i < len(text):
token2, tlen = next_character(text, i)
if token2 not in node.children:
break
node = node.children[token2]
i += tlen
depth += 1
if node.code:
candidate = node.code + list(recursive_encode(text, i))
if len(candidate) < len(best):
best = candidate
return tuple(best)
# --- Main Function ---
def encode_text(text: str, encoding: dict[str, List[int]], dictionary: dict[str, str]) -> List[int]:
global encode_current_trie, encode_current_encoding, encode_last_ids
id_dict = id(dictionary)
id_enc = id(encoding)
# Rebuild trie/cache if dictionary/encoding changed
if encode_last_ids != (id_dict, id_enc):
encode_current_trie = build_trie(dictionary)
encode_current_encoding = encoding
encode_last_ids = (id_dict, id_enc)
result = list(recursive_encode(text, 0))
return result
def encode_dict(text_data: dict[str, str], dictionary: Optional[dict[str, str]] = None) -> dict[str, list[int]]:
if dictionary is None:
dictionary = {}
encoding_dict = build_encoding_dict()
encoded_dict = {}
for key in text_data:
encoded_text = encode_text(text_data[key], encoding_dict, dictionary)
encoded_dict[key] = encoded_text
recursive_encode.cache_clear()
return encoded_dict
def build_compact_table(data: dict[str, list[int]]) -> tuple[list[int], dict[str, int]]:
sorted_items = sorted(data.items(), key=lambda kv: -len(kv[1]))
compact = []
offsets = {}
for key, seq in sorted_items:
for key2 in offsets:
string_end = offsets[key2] + len(data[key2])
if compact[string_end - len(seq):string_end] == seq:
offset = string_end - len(seq)
break
else:
offset = len(compact)
compact.extend(seq)
offsets[key] = offset
assert len(compact) <= 0xffff
return compact, offsets
def write_text_data(rom: RomData, dictionary: dict[str, str], texts: dict[str, str], seasons: bool):
if seasons:
text_offset_split_index = text_offset_split_index_seasons
text_offset_1 = GameboyAddress(rom.read_byte(text_offset_1_table_address_seasons), rom.read_word(text_offset_1_table_address_seasons + 1))
text_offset_2 = GameboyAddress(rom.read_byte(text_offset_2_table_address_seasons), rom.read_word(text_offset_2_table_address_seasons + 1))
text_table_eng_address = text_table_eng_address_seasons
text_addresses_limit = text_addresses_limit_seasons
else:
text_offset_split_index = text_offset_split_index_ages
text_offset_1 = GameboyAddress(rom.read_byte(text_offset_1_table_address_ages), rom.read_word(text_offset_1_table_address_ages + 1))
text_offset_2 = GameboyAddress(rom.read_byte(text_offset_2_table_address_ages), rom.read_word(text_offset_2_table_address_ages + 1))
text_table_eng_address = text_table_eng_address_ages
text_addresses_limit = text_addresses_limit_ages
dict1 = {}
dict2 = {}
for key in texts:
if int(key[3:5], 16) < text_offset_split_index - 4:
dict1[key] = texts[key]
else:
dict2[key] = texts[key]
encoded_dict1 = encode_dict(dict1, dictionary)
encoded_dict1.update(encode_dict(dictionary))
encoded_dict2 = encode_dict(dict2, dictionary)
offset_table_length = (len(encoded_dict1) + len(encoded_dict2)) * 2
text_offset_1_address = text_offset_1.address_in_rom()
text_offset_2_address = text_offset_2.address_in_rom()
text_table_current_address = text_table_eng_address
tx_table_current_address = text_table_eng_address + 0x64 * 2
text_offset_1_offset = text_table_eng_address + 0x64 * 2 + offset_table_length - text_offset_1_address
assert text_offset_1_offset >= 0
compact_table1, compact_offsets1 = build_compact_table(encoded_dict1)
rom.write_bytes(text_offset_1_address + text_offset_1_offset, compact_table1)
for i in range(4):
rom.write_word(text_table_current_address, tx_table_current_address - text_table_eng_address)
text_table_current_address += 2
for j in range(0, 0x100):
entry_name = f"DICT{i}_{simple_hex(j)}"
rom.write_word(tx_table_current_address, compact_offsets1[entry_name] + text_offset_1_offset)
tx_table_current_address += 2
for i in range(text_offset_split_index - 4):
start_address = tx_table_current_address
subid = 0
while True:
tx = f"TX_{simple_hex(i)}{simple_hex(subid)}"
if tx not in dict1:
break
subid += 1
rom.write_word(tx_table_current_address, compact_offsets1[tx] + text_offset_1_offset)
tx_table_current_address += 2
if subid > 0:
rom.write_word(text_table_current_address, start_address - text_table_eng_address)
else:
rom.write_word(text_table_current_address, 0)
text_table_current_address += 2
if __debug__ and False:
sorted_dict = sorted(list(encoded_dict1.items()) + list(encoded_dict2.items()), key=lambda kv: -len(kv[1]))
for entry in sorted_dict:
if entry[0] in dict1:
print(entry[0], dict1[entry[0]], len(entry[1]))
elif entry[0] in dict2:
print(entry[0], dict2[entry[0]], len(entry[1]))
text_offset_2_offset = max(0, text_offset_1_address + text_offset_1_offset + len(compact_table1) - text_offset_2_address)
compact_table2, compact_offsets2 = build_compact_table(encoded_dict2)
assert text_offset_2_address + text_offset_2_offset + len(compact_table2) < text_addresses_limit, \
f"Text is too long ({text_offset_2_address + text_offset_2_offset + len(compact_table2) - text_addresses_limit} too many bytes)"
print(f"Free text bytes: {text_addresses_limit - text_offset_2_address - text_offset_2_offset - len(compact_table2)}")
rom.write_bytes(text_offset_2_address + text_offset_2_offset, compact_table2)
for i in range(text_offset_split_index - 4, 0x60):
start_address = tx_table_current_address
subid = 0
while True:
tx = f"TX_{simple_hex(i)}{simple_hex(subid)}"
if tx not in dict2:
break
subid += 1
rom.write_word(tx_table_current_address, compact_offsets2[tx] + text_offset_2_offset)
tx_table_current_address += 2
if subid > 0:
rom.write_word(text_table_current_address, start_address - text_table_eng_address)
else:
rom.write_word(text_table_current_address, 0)
text_table_current_address += 2