forked from mirror/Archipelago
Some checks failed
Analyze modified files / flake8 (push) Failing after 2m28s
Build / build-win (push) Has been cancelled
Build / build-ubuntu2204 (push) Has been cancelled
ctest / Test C++ ubuntu-latest (push) Has been cancelled
ctest / Test C++ windows-latest (push) Has been cancelled
Analyze modified files / mypy (push) Has been cancelled
Build and Publish Docker Images / Push Docker image to Docker Hub (push) Successful in 5m4s
Native Code Static Analysis / scan-build (push) Failing after 5m2s
type check / pyright (push) Successful in 1m7s
unittests / Test Python 3.11.2 ubuntu-latest (push) Failing after 16m23s
unittests / Test Python 3.12 ubuntu-latest (push) Failing after 28m19s
unittests / Test Python 3.13 ubuntu-latest (push) Failing after 14m49s
unittests / Test hosting with 3.13 on ubuntu-latest (push) Successful in 5m0s
unittests / Test Python 3.13 macos-latest (push) Has been cancelled
unittests / Test Python 3.11 windows-latest (push) Has been cancelled
unittests / Test Python 3.13 windows-latest (push) Has been cancelled
312 lines
11 KiB
Python
312 lines
11 KiB
Python
import re
|
|
from collections import defaultdict
|
|
from functools import lru_cache
|
|
from typing import List, Union, Optional
|
|
from . import char_table, kanji_table, text_offset_split_index_seasons, text_offset_1_table_address_seasons, text_offset_2_table_address_seasons, \
|
|
text_table_eng_address_seasons, \
|
|
text_addresses_limit_seasons, text_offset_split_index_ages, text_offset_1_table_address_ages, text_offset_2_table_address_ages, text_table_eng_address_ages, \
|
|
text_addresses_limit_ages
|
|
from ..RomData import RomData
|
|
from ..Util import simple_hex
|
|
from ..z80asm.Assembler import GameboyAddress
|
|
|
|
control_sequence_pattern = re.compile(r"""
|
|
\\
|
|
(jump|cmd|col|charsfx|speed|pos|wait|sfx|call)
|
|
\(([^)]+)\) |
|
|
\\(link_name|child_name|w7SecretBuffer1|w7SecretBuffer2|
|
|
num1|opt|stop|heartpiece|num2|slow)
|
|
""", re.VERBOSE)
|
|
dict_pattern = re.compile(r"DICT(\d+)_([0-9a-f]+)")
|
|
|
|
|
|
def add_to_tree(tree: dict[str, list[int]], char: str, keys: list[int]):
|
|
tree[char] = keys
|
|
|
|
|
|
def build_encoding_dict() -> dict[str, list[int]]:
|
|
tree = {}
|
|
for i in range(len(char_table)):
|
|
char = char_table[i]
|
|
if char != "🚫" and char != "∅":
|
|
add_to_tree(tree, char, [i])
|
|
|
|
for i in range(len(kanji_table)):
|
|
char = kanji_table[i]
|
|
if char != "∅":
|
|
add_to_tree(tree, char, [0x06, i])
|
|
|
|
add_to_tree(tree, "jump", [0x07, 0x00])
|
|
add_to_tree(tree, "cmd", [0x08, 0x00])
|
|
|
|
add_to_tree(tree, "⬜", [0x09, 0x00])
|
|
add_to_tree(tree, "🟥", [0x09, 0x01])
|
|
add_to_tree(tree, "🟧", [0x09, 0x02])
|
|
add_to_tree(tree, "🟦", [0x09, 0x03])
|
|
add_to_tree(tree, "🟩", [0x09, 0x04])
|
|
add_to_tree(tree, "col", [0x09, 0x00])
|
|
|
|
add_to_tree(tree, "link_name", [0x0a, 0x00])
|
|
add_to_tree(tree, "child_name", [0x0a, 0x01])
|
|
add_to_tree(tree, "w7SecretBuffer1", [0x0a, 0x02])
|
|
add_to_tree(tree, "w7SecretBuffer2", [0x0a, 0x03])
|
|
|
|
add_to_tree(tree, "speed", [0x0c, 0x00])
|
|
add_to_tree(tree, "num1", [0x0c, 0x08])
|
|
add_to_tree(tree, "opt", [0x0c, 0x10])
|
|
add_to_tree(tree, "stop", [0x0c, 0x18])
|
|
add_to_tree(tree, "pos", [0x0c, 0x20])
|
|
add_to_tree(tree, "heartpiece", [0x0c, 0x28])
|
|
add_to_tree(tree, "num2", [0x0c, 0x30])
|
|
add_to_tree(tree, "slow", [0x0c, 0x38])
|
|
|
|
add_to_tree(tree, "wait", [0x0d, 0x00])
|
|
add_to_tree(tree, "sfx", [0x0e, 0x00])
|
|
add_to_tree(tree, "call", [0x0f, 0x00])
|
|
|
|
add_to_tree(tree, "Ⓐ", [0xb8, 0xb9])
|
|
add_to_tree(tree, "Ⓑ", [0xba, 0xbb])
|
|
|
|
return tree
|
|
|
|
|
|
# --- Trie Data Structure ---
|
|
class TrieNode:
|
|
def __init__(self):
|
|
self.children = defaultdict(lambda: TrieNode())
|
|
self.code = None
|
|
|
|
|
|
# --- Global Caches ---
|
|
encode_current_trie: Optional[TrieNode] = None
|
|
encode_current_encoding: Optional[dict[str, list[int]]] = None
|
|
encode_last_ids = (None, None)
|
|
|
|
control_keywords = {
|
|
"link_name", "child_name", "w7SecretBuffer1", "w7SecretBuffer2",
|
|
"num1", "opt", "stop", "heartpiece", "num2", "slow"
|
|
}
|
|
|
|
control_functions = {
|
|
"jump", "cmd", "col", "charsfx", "speed", "pos", "wait", "sfx", "call"
|
|
}
|
|
|
|
|
|
def next_character(text: str, index: int) -> tuple[Union[str, tuple[str, int]], int]:
|
|
if index >= len(text):
|
|
return "\0", 1
|
|
|
|
if text[index] != "\\":
|
|
return text[index], 1
|
|
|
|
# Try parsing a function-style command: \name(hex)
|
|
for name in control_functions:
|
|
if text.startswith(f"\\{name}(", index):
|
|
start = index + len(name) + 2 # skip past '\name('
|
|
end = start + 2
|
|
value = text[start:end]
|
|
return (name, int(value, 16)), end + 1 - index
|
|
|
|
# Try keyword match (e.g. \opt)
|
|
for name in control_keywords:
|
|
if text.startswith(f"\\{name}", index):
|
|
return name, 1 + len(name)
|
|
|
|
raise Exception()
|
|
|
|
|
|
def build_trie(dictionary: dict[str, str]) -> TrieNode:
|
|
root = TrieNode()
|
|
for key, value in dictionary.items():
|
|
node = root
|
|
i = 0
|
|
while i < len(value):
|
|
token, length = next_character(value, i)
|
|
node = node.children[token]
|
|
i += length
|
|
node.code = [2 + int(key[4]), int(key[6:8], 16)]
|
|
return root
|
|
|
|
|
|
@lru_cache
|
|
def recursive_encode(text: str, index: int) -> tuple[int]:
|
|
if index >= len(text):
|
|
return (0,)
|
|
|
|
token, length = next_character(text, index)
|
|
if isinstance(token, tuple):
|
|
encoded = list(encode_current_encoding[token[0]])
|
|
encoded[-1] += token[1]
|
|
if token[0] == "jump":
|
|
return tuple(encoded)
|
|
else:
|
|
if token not in encode_current_encoding:
|
|
token = "口" # Use a white square to denote unknown characters
|
|
encoded = encode_current_encoding[token]
|
|
|
|
best = list(encoded) + list(recursive_encode(text, index + length))
|
|
|
|
if token not in encode_current_trie.children:
|
|
# No dict entry
|
|
return tuple(best)
|
|
|
|
node = encode_current_trie.children[token]
|
|
i = index + length
|
|
depth = 1
|
|
|
|
while i < len(text):
|
|
token2, tlen = next_character(text, i)
|
|
if token2 not in node.children:
|
|
break
|
|
node = node.children[token2]
|
|
i += tlen
|
|
depth += 1
|
|
if node.code:
|
|
candidate = node.code + list(recursive_encode(text, i))
|
|
if len(candidate) < len(best):
|
|
best = candidate
|
|
|
|
return tuple(best)
|
|
|
|
|
|
# --- Main Function ---
|
|
def encode_text(text: str, encoding: dict[str, List[int]], dictionary: dict[str, str]) -> List[int]:
|
|
global encode_current_trie, encode_current_encoding, encode_last_ids
|
|
id_dict = id(dictionary)
|
|
id_enc = id(encoding)
|
|
|
|
# Rebuild trie/cache if dictionary/encoding changed
|
|
if encode_last_ids != (id_dict, id_enc):
|
|
encode_current_trie = build_trie(dictionary)
|
|
encode_current_encoding = encoding
|
|
encode_last_ids = (id_dict, id_enc)
|
|
|
|
result = list(recursive_encode(text, 0))
|
|
return result
|
|
|
|
|
|
def encode_dict(text_data: dict[str, str], dictionary: Optional[dict[str, str]] = None) -> dict[str, list[int]]:
|
|
if dictionary is None:
|
|
dictionary = {}
|
|
encoding_dict = build_encoding_dict()
|
|
encoded_dict = {}
|
|
for key in text_data:
|
|
encoded_text = encode_text(text_data[key], encoding_dict, dictionary)
|
|
encoded_dict[key] = encoded_text
|
|
recursive_encode.cache_clear()
|
|
return encoded_dict
|
|
|
|
|
|
def build_compact_table(data: dict[str, list[int]]) -> tuple[list[int], dict[str, int]]:
|
|
sorted_items = sorted(data.items(), key=lambda kv: -len(kv[1]))
|
|
compact = []
|
|
offsets = {}
|
|
|
|
for key, seq in sorted_items:
|
|
for key2 in offsets:
|
|
string_end = offsets[key2] + len(data[key2])
|
|
if compact[string_end - len(seq):string_end] == seq:
|
|
offset = string_end - len(seq)
|
|
break
|
|
else:
|
|
offset = len(compact)
|
|
compact.extend(seq)
|
|
offsets[key] = offset
|
|
assert len(compact) <= 0xffff
|
|
|
|
return compact, offsets
|
|
|
|
|
|
def write_text_data(rom: RomData, dictionary: dict[str, str], texts: dict[str, str], seasons: bool):
|
|
if seasons:
|
|
text_offset_split_index = text_offset_split_index_seasons
|
|
text_offset_1 = GameboyAddress(rom.read_byte(text_offset_1_table_address_seasons), rom.read_word(text_offset_1_table_address_seasons + 1))
|
|
text_offset_2 = GameboyAddress(rom.read_byte(text_offset_2_table_address_seasons), rom.read_word(text_offset_2_table_address_seasons + 1))
|
|
text_table_eng_address = text_table_eng_address_seasons
|
|
text_addresses_limit = text_addresses_limit_seasons
|
|
else:
|
|
text_offset_split_index = text_offset_split_index_ages
|
|
text_offset_1 = GameboyAddress(rom.read_byte(text_offset_1_table_address_ages), rom.read_word(text_offset_1_table_address_ages + 1))
|
|
text_offset_2 = GameboyAddress(rom.read_byte(text_offset_2_table_address_ages), rom.read_word(text_offset_2_table_address_ages + 1))
|
|
text_table_eng_address = text_table_eng_address_ages
|
|
text_addresses_limit = text_addresses_limit_ages
|
|
|
|
dict1 = {}
|
|
dict2 = {}
|
|
for key in texts:
|
|
if int(key[3:5], 16) < text_offset_split_index - 4:
|
|
dict1[key] = texts[key]
|
|
else:
|
|
dict2[key] = texts[key]
|
|
|
|
encoded_dict1 = encode_dict(dict1, dictionary)
|
|
encoded_dict1.update(encode_dict(dictionary))
|
|
encoded_dict2 = encode_dict(dict2, dictionary)
|
|
|
|
offset_table_length = (len(encoded_dict1) + len(encoded_dict2)) * 2
|
|
text_offset_1_address = text_offset_1.address_in_rom()
|
|
text_offset_2_address = text_offset_2.address_in_rom()
|
|
text_table_current_address = text_table_eng_address
|
|
tx_table_current_address = text_table_eng_address + 0x64 * 2
|
|
text_offset_1_offset = text_table_eng_address + 0x64 * 2 + offset_table_length - text_offset_1_address
|
|
assert text_offset_1_offset >= 0
|
|
|
|
compact_table1, compact_offsets1 = build_compact_table(encoded_dict1)
|
|
rom.write_bytes(text_offset_1_address + text_offset_1_offset, compact_table1)
|
|
|
|
for i in range(4):
|
|
rom.write_word(text_table_current_address, tx_table_current_address - text_table_eng_address)
|
|
text_table_current_address += 2
|
|
for j in range(0, 0x100):
|
|
entry_name = f"DICT{i}_{simple_hex(j)}"
|
|
rom.write_word(tx_table_current_address, compact_offsets1[entry_name] + text_offset_1_offset)
|
|
tx_table_current_address += 2
|
|
|
|
for i in range(text_offset_split_index - 4):
|
|
start_address = tx_table_current_address
|
|
subid = 0
|
|
while True:
|
|
tx = f"TX_{simple_hex(i)}{simple_hex(subid)}"
|
|
if tx not in dict1:
|
|
break
|
|
subid += 1
|
|
|
|
rom.write_word(tx_table_current_address, compact_offsets1[tx] + text_offset_1_offset)
|
|
tx_table_current_address += 2
|
|
if subid > 0:
|
|
rom.write_word(text_table_current_address, start_address - text_table_eng_address)
|
|
else:
|
|
rom.write_word(text_table_current_address, 0)
|
|
text_table_current_address += 2
|
|
|
|
if __debug__ and False:
|
|
sorted_dict = sorted(list(encoded_dict1.items()) + list(encoded_dict2.items()), key=lambda kv: -len(kv[1]))
|
|
for entry in sorted_dict:
|
|
if entry[0] in dict1:
|
|
print(entry[0], dict1[entry[0]], len(entry[1]))
|
|
elif entry[0] in dict2:
|
|
print(entry[0], dict2[entry[0]], len(entry[1]))
|
|
text_offset_2_offset = max(0, text_offset_1_address + text_offset_1_offset + len(compact_table1) - text_offset_2_address)
|
|
compact_table2, compact_offsets2 = build_compact_table(encoded_dict2)
|
|
assert text_offset_2_address + text_offset_2_offset + len(compact_table2) < text_addresses_limit, \
|
|
f"Text is too long ({text_offset_2_address + text_offset_2_offset + len(compact_table2) - text_addresses_limit} too many bytes)"
|
|
print(f"Free text bytes: {text_addresses_limit - text_offset_2_address - text_offset_2_offset - len(compact_table2)}")
|
|
rom.write_bytes(text_offset_2_address + text_offset_2_offset, compact_table2)
|
|
|
|
for i in range(text_offset_split_index - 4, 0x60):
|
|
start_address = tx_table_current_address
|
|
subid = 0
|
|
while True:
|
|
tx = f"TX_{simple_hex(i)}{simple_hex(subid)}"
|
|
if tx not in dict2:
|
|
break
|
|
subid += 1
|
|
|
|
rom.write_word(tx_table_current_address, compact_offsets2[tx] + text_offset_2_offset)
|
|
tx_table_current_address += 2
|
|
if subid > 0:
|
|
rom.write_word(text_table_current_address, start_address - text_table_eng_address)
|
|
else:
|
|
rom.write_word(text_table_current_address, 0)
|
|
text_table_current_address += 2
|