1400 lines
44 KiB
Python
1400 lines
44 KiB
Python
# The idea is to parse through all of the game's korean strings and
|
|
# determine the glyph combinations that are used to construct the syllable blocks
|
|
|
|
# In terms of font setting, korean syllable blocks have 6 main configurations
|
|
# not all jamos are relevant for all these configurations, which reduces the complexity
|
|
# For example, the first 3 configurations involve no third jamo.
|
|
|
|
# 1 (2) - left -> right. For example 가
|
|
# 2 (2) - top -> middle. For example 고
|
|
# 3 (2) - left + right + middle. For example 과
|
|
#
|
|
# 4 (3) - left + right + bottom (no combined middle). For example 갈
|
|
# 5 (3) - top down. For example 골
|
|
# 6 (3) - left + right + bottom with middle. For example 괄
|
|
|
|
# Thinking about the characters this way eliminates a ton of permutations
|
|
# and allows us to use existing glyphs to fill in the gaps
|
|
|
|
# The hope is that by analyzing all of the korean text, we can atleast find where every glyph is used
|
|
# and then any glyphs left over we can do manually.
|
|
|
|
# read in the `game_text.txt` file and extract all the korean strings
|
|
from itertools import product
|
|
from pprint import pprint
|
|
import json
|
|
|
|
with open(
|
|
"../decompiler_out/jak2/assets/game_text.txt", mode="r", encoding="utf-8"
|
|
) as f:
|
|
game_text_lines = f.readlines()
|
|
|
|
korean_lines = {}
|
|
i = 0
|
|
while i < len(game_text_lines):
|
|
curr_line = game_text_lines[i].strip()
|
|
if curr_line.startswith("(#x"):
|
|
id = curr_line.split("(#x")[1]
|
|
korean_lines[id] = game_text_lines[i + 7].strip().replace("\\c", ",0x")[2:-1]
|
|
i = i + 1
|
|
|
|
# also parse subtitles
|
|
with open(
|
|
"../decompiler_out/jak2/assets/subtitles_raw.txt", mode="r", encoding="utf-8"
|
|
) as f:
|
|
subtitle_text_lines = f.readlines()
|
|
for line in subtitle_text_lines:
|
|
parts = line.split("::")
|
|
text = parts[2].replace('"', "").replace(" ", ",")
|
|
# pad with 0s for single hex digits
|
|
for c in [
|
|
"1",
|
|
"2",
|
|
"3",
|
|
"4",
|
|
"5",
|
|
"6",
|
|
"7",
|
|
"8",
|
|
"9",
|
|
"a",
|
|
"b",
|
|
"c",
|
|
"d",
|
|
"e",
|
|
"f",
|
|
]:
|
|
text = text.replace(f"0x{c},", f"0x0{c},")
|
|
key = f"{parts[0]}_{parts[1]}"
|
|
korean_lines[key] = text[:-1]
|
|
|
|
print(f"Analyzing {len(korean_lines)} lines of korean text")
|
|
|
|
# we will fill up this structure, which will allow us to recreate / fill in the gaps in their korean encoding
|
|
jamo_combinations = {
|
|
# jamo : 6 orientations
|
|
# if the orientation is irrelvevant, null instead of a list
|
|
# the reason we use lists is because multiple glyphs may be used to represent the same jamo
|
|
# even in the same orientation, depending on the surrounding characters
|
|
#
|
|
# we will store these surrounding "context" characters to deduce a pattern at the end
|
|
# ie. 0x06 is always used for ᄀ unless if it becomes before ᅦ then it uses 0x33
|
|
#
|
|
# Choseong (initial)
|
|
"ᄀ": [[], [], [], [], [], []],
|
|
"ᄁ": [[], [], [], [], [], []],
|
|
"ᄂ": [[], [], [], [], [], []],
|
|
"ᄃ": [[], [], [], [], [], []],
|
|
"ᄄ": [[], [], [], [], [], []],
|
|
"ᄅ": [[], [], [], [], [], []],
|
|
"ᄆ": [[], [], [], [], [], []],
|
|
"ᄇ": [[], [], [], [], [], []],
|
|
"ᄈ": [[], [], [], [], [], []],
|
|
"ᄉ": [[], [], [], [], [], []],
|
|
"ᄊ": [[], [], [], [], [], []],
|
|
"ᄋ": [[], [], [], [], [], []],
|
|
"ᄌ": [[], [], [], [], [], []],
|
|
"ᄍ": [[], [], [], [], [], []],
|
|
"ᄎ": [[], [], [], [], [], []],
|
|
"ᄏ": [[], [], [], [], [], []],
|
|
"ᄐ": [[], [], [], [], [], []],
|
|
"ᄑ": [[], [], [], [], [], []],
|
|
"ᄒ": [[], [], [], [], [], []],
|
|
# Jungseong (middle)
|
|
# - verticals
|
|
"ᅵ": [[], None, None, [], None, None],
|
|
"ᅡ": [[], None, None, [], None, None],
|
|
"ᅢ": [[], None, None, [], None, None],
|
|
"ᅣ": [[], None, None, [], None, None],
|
|
"ᅤ": [[], None, None, [], None, None],
|
|
"ᅥ": [[], None, None, [], None, None],
|
|
"ᅦ": [[], None, None, [], None, None],
|
|
"ᅧ": [[], None, None, [], None, None],
|
|
"ᅨ": [[], None, None, [], None, None],
|
|
# - horizontals
|
|
"ᅩ": [None, [], None, None, [], None],
|
|
"ᅭ": [None, [], None, None, [], None],
|
|
"ᅮ": [None, [], None, None, [], None],
|
|
"ᅲ": [None, [], None, None, [], None],
|
|
"ᅳ": [None, [], None, None, [], None],
|
|
# - combinations
|
|
"ᅪ": [None, None, [], None, None, []],
|
|
"ᅫ": [None, None, [], None, None, []],
|
|
"ᅬ": [None, None, [], None, None, []],
|
|
"ᅯ": [None, None, [], None, None, []],
|
|
"ᅰ": [None, None, [], None, None, []],
|
|
"ᅱ": [None, None, [], None, None, []],
|
|
"ᅴ": [None, None, [], None, None, []],
|
|
# Jongseong (final)
|
|
"ᆨ": [None, None, None, [], [], []],
|
|
"ᆩ": [None, None, None, [], [], []],
|
|
"ᆪ": [None, None, None, [], [], []],
|
|
"ᆫ": [None, None, None, [], [], []],
|
|
"ᆬ": [None, None, None, [], [], []],
|
|
"ᆭ": [None, None, None, [], [], []],
|
|
"ᆮ": [None, None, None, [], [], []],
|
|
"ᆯ": [None, None, None, [], [], []],
|
|
"ᆰ": [None, None, None, [], [], []],
|
|
"ᆱ": [None, None, None, [], [], []],
|
|
"ᆲ": [None, None, None, [], [], []],
|
|
"ᆳ": [None, None, None, [], [], []],
|
|
"ᆴ": [None, None, None, [], [], []],
|
|
"ᆵ": [None, None, None, [], [], []],
|
|
"ᆶ": [None, None, None, [], [], []],
|
|
"ᆷ": [None, None, None, [], [], []],
|
|
"ᆸ": [None, None, None, [], [], []],
|
|
"ᆹ": [None, None, None, [], [], []],
|
|
"ᆺ": [None, None, None, [], [], []],
|
|
"ᆻ": [None, None, None, [], [], []],
|
|
"ᆼ": [None, None, None, [], [], []],
|
|
"ᆽ": [None, None, None, [], [], []],
|
|
"ᆾ": [None, None, None, [], [], []],
|
|
"ᆿ": [None, None, None, [], [], []],
|
|
"ᇀ": [None, None, None, [], [], []],
|
|
"ᇁ": [None, None, None, [], [], []],
|
|
"ᇂ": [None, None, None, [], [], []],
|
|
}
|
|
# we will be reading byte strings from the game, these correspond to the font glyphs
|
|
# we can deduce all of these
|
|
jamo_glyph_mappings = {
|
|
"0x06": "ᄀ",
|
|
"0x07": "ᄁ",
|
|
"0x08": "ᄂ",
|
|
"0x09": "ᄃ",
|
|
"0x0a": "ᄄ",
|
|
"0x0b": "ᄅ",
|
|
"0x0c": "ᄆ",
|
|
"0x0d": "ᄇ",
|
|
"0x0e": "ᄈ",
|
|
"0x0f": "ᄋ",
|
|
"0x10": "ᄌ",
|
|
"0x11": "ᄍ",
|
|
"0x12": "ᄏ",
|
|
"0x13": "ᄐ",
|
|
"0x14": "ᄑ",
|
|
"0x15": "ᅡ",
|
|
"0x16": "ᅣ",
|
|
"0x17": "ᅵ",
|
|
"0x18": "ᅥ",
|
|
"0x19": "ᅧ",
|
|
"0x1a": "ᅡ",
|
|
"0x1b": "ᅣ",
|
|
"0x1c": "ᅵ",
|
|
"0x1d": "ᅥ",
|
|
"0x1e": "ᅧ",
|
|
"0x1f": "ᄂ",
|
|
"0x20": "ᄉ",
|
|
"0x21": "ᄊ",
|
|
"0x22": "ᅥ",
|
|
"0x23": "ᅧ",
|
|
"0x24": "ᅥ",
|
|
"0x25": "ᅧ",
|
|
"0x26": "ᄃ",
|
|
"0x27": "ᄄ",
|
|
"0x28": "ᄅ",
|
|
"0x29": "ᄐ",
|
|
"0x2a": "ᄑ",
|
|
"0x2b": "ᅧ",
|
|
"0x2c": "ᅧ",
|
|
"0x2d": "ᄎ",
|
|
"0x2e": "ᄒ",
|
|
"0x2f": "ᅥ",
|
|
"0x30": "ᅧ",
|
|
"0x31": "ᅥ",
|
|
"0x32": "ᅧ",
|
|
"0x33": "ᄀ",
|
|
"0x34": "ᄁ",
|
|
"0x35": "ᄂ",
|
|
"0x36": "ᄃ",
|
|
"0x37": "ᄄ",
|
|
"0x38": "ᄅ",
|
|
"0x39": "ᄆ",
|
|
"0x3a": "ᄇ",
|
|
"0x3b": "ᄈ",
|
|
"0x3c": "ᄋ",
|
|
"0x3d": "ᄌ",
|
|
"0x3e": "ᄍ",
|
|
"0x3f": "ᄏ",
|
|
"0x40": "ᄐ",
|
|
"0x41": "ᄑ",
|
|
"0x42": "ᅢ",
|
|
"0x43": "ᅤ",
|
|
"0x44": "ᅦ",
|
|
"0x45": "ᅨ",
|
|
"0x46": "ᅢ",
|
|
"0x47": "ᅤ",
|
|
"0x48": "ᅦ",
|
|
"0x49": "ᅨ",
|
|
"0x4a": "ᄂ",
|
|
"0x4b": "ᄉ",
|
|
"0x4c": "ᄊ",
|
|
"0x4d": "ᅦ",
|
|
"0x4e": "ᅨ",
|
|
"0x4f": "ᅦ",
|
|
"0x50": "ᅨ",
|
|
"0x51": "ᄃ",
|
|
"0x52": "ᄄ",
|
|
"0x53": "ᄅ",
|
|
"0x54": "ᄐ",
|
|
"0x55": "ᄑ",
|
|
"0x56": "ᅨ",
|
|
"0x57": "ᅨ",
|
|
"0x58": "ᄎ",
|
|
"0x59": "ᄒ",
|
|
"0x5a": "ᅦ",
|
|
"0x5b": "ᅨ",
|
|
"0x5c": "ᅦ",
|
|
"0x5d": "ᅨ",
|
|
"0x5e": "ᄀ",
|
|
"0x5f": "ᄏ",
|
|
"0x60": "ᅩ",
|
|
"0x61": "ᅭ",
|
|
"0x62": "ᅩ",
|
|
"0x63": "ᅭ",
|
|
"0x64": ["ᄁ", "ᅫ"],
|
|
"0x65": ["ᄁ", "ᅩ"],
|
|
"0x66": ["ᄁ", "ᅩ"],
|
|
"0x67": ["ᄁ", "ᅭ"],
|
|
"0x68": "ᄁ",
|
|
"0x69": "ᄂ",
|
|
"0x6a": "ᅳ",
|
|
"0x6b": "ᅮ",
|
|
"0x6c": "ᅲ",
|
|
"0x6d": "ᅳ",
|
|
"0x6e": "ᅮ",
|
|
"0x6f": "ᅲ",
|
|
"0x70": "ᄃ",
|
|
"0x71": "ᄄ",
|
|
"0x72": "ᄅ",
|
|
"0x73": "ᄆ",
|
|
"0x74": "ᄇ",
|
|
"0x75": "ᄈ",
|
|
"0x76": "ᄉ",
|
|
"0x77": "ᄊ",
|
|
"0x78": "ᄋ",
|
|
"0x79": "ᄌ",
|
|
"0x7a": "ᄍ",
|
|
"0x7b": "ᄎ",
|
|
"0x7c": "ᄐ",
|
|
"0x7d": "ᄑ",
|
|
"0x7e": "ᄒ",
|
|
"0x7f": "ᅩ",
|
|
"0x80": "ᅭ",
|
|
"0x81": "ᅳ",
|
|
"0x82": "ᅩ",
|
|
"0x83": "ᅭ",
|
|
"0x84": "ᅳ",
|
|
"0x85": "ᅮ",
|
|
"0x86": "ᅲ",
|
|
"0x87": "ᅮ",
|
|
"0x88": "ᅲ",
|
|
"0x89": ["ᅮ", "ᆫ"],
|
|
"0x8a": ["ᅲ", "ᆫ"],
|
|
"0x8b": "ᄀ",
|
|
"0x8c": "ᄏ",
|
|
"0x8d": "ᅩ",
|
|
"0x8e": "ᅩ",
|
|
"0x8f": ["ᄁ", "ᅩ"],
|
|
"0x90": ["ᄁ", "ᅩ"],
|
|
"0x91": "ᄁ",
|
|
"0x92": "ᄂ",
|
|
"0x93": "ᄃ",
|
|
"0x94": "ᄄ",
|
|
"0x95": "ᄅ",
|
|
"0x96": "ᄆ",
|
|
"0x97": "ᄇ",
|
|
"0x98": "ᄈ",
|
|
"0x99": "ᄉ",
|
|
"0x9a": "ᄊ",
|
|
"0x9b": "ᄋ",
|
|
"0x9c": "ᄌ",
|
|
"0x9d": "ᄍ",
|
|
"0x9e": "ᄎ",
|
|
"0x9f": "ᄐ",
|
|
"0xa0": "ᄑ",
|
|
"0xa1": "ᄒ",
|
|
"0xa2": "ᅩ",
|
|
"0xa3": "ᅳ",
|
|
"0xa4": "ᅩ",
|
|
"0xa5": "ᅳ",
|
|
"0xa6": "ᅡ",
|
|
"0xa7": "ᅵ",
|
|
"0xa8": "ᅡ",
|
|
"0xa9": "ᅵ",
|
|
"0xaa": "ᅯ",
|
|
"0xab": "ᅱ",
|
|
"0xac": "ᅯ",
|
|
"0xad": "ᅱ",
|
|
"0xae": "ᄀ",
|
|
"0xaf": "ᄏ",
|
|
"0xb0": "ᅫ",
|
|
"0xb1": "ᅫ",
|
|
"0xb2": "ᄆ",
|
|
"0xb3": "ᄁ",
|
|
"0xb4": "ᄂ",
|
|
"0xb5": "ᄃ",
|
|
"0xb6": "ᄄ",
|
|
"0xb7": "ᄅ",
|
|
"0xb8": "ᄇ",
|
|
"0xb9": "ᄉ",
|
|
"0xba": "ᄊ",
|
|
"0xbb": "ᄋ",
|
|
"0xbc": "ᄌ",
|
|
"0xbd": "ᄍ",
|
|
"0xbe": "ᄎ",
|
|
"0xbf": "ᄐ",
|
|
"0xc0": "ᄒ",
|
|
"0xc1": "ᅫ",
|
|
"0xc2": "ᅫ",
|
|
"0xc3": "ᅰ",
|
|
"0xc4": "ᅰ",
|
|
"0xc5": "ᆨ",
|
|
"0xc6": "ᆩ",
|
|
"0xc7": "ᆪ",
|
|
"0xc8": "ᆫ",
|
|
"0xc9": "ᆬ",
|
|
"0xca": "ᆭ",
|
|
"0xcb": "ᆮ",
|
|
"0xcc": "ᆯ",
|
|
"0xcd": "ᆰ",
|
|
"0xce": "ᆱ",
|
|
"0xcf": "ᆲ",
|
|
"0xd0": "ᆴ",
|
|
"0xd1": "ᆶ",
|
|
"0xd2": "ᆷ",
|
|
"0xd3": "ᆸ",
|
|
"0xd4": "ᆹ",
|
|
"0xd5": "ᆺ",
|
|
"0xd6": "ᆻ",
|
|
"0xd7": "ᆼ",
|
|
"0xd8": "ᆽ",
|
|
"0xd9": "ᆾ",
|
|
"0xda": "ᆿ",
|
|
"0xdb": "ᇀ",
|
|
"0xdc": "ᇁ",
|
|
"0xdd": "ᇂ",
|
|
"0xde": "ᆨ",
|
|
"0xdf": "ᆩ",
|
|
"0xe0": "ᆪ",
|
|
"0xe1": "ᆫ",
|
|
"0xe2": "ᆭ",
|
|
"0xe3": "ᆮ",
|
|
"0xe4": "ᆯ",
|
|
"0xe5": "ᆰ",
|
|
"0xe6": "ᆱ",
|
|
"0xe7": "ᆳ",
|
|
"0xe8": "ᆴ",
|
|
"0xe9": "ᆵ",
|
|
"0xea": "ᆶ",
|
|
"0xeb": "ᆷ",
|
|
"0xec": "ᆸ",
|
|
"0xed": "ᆺ",
|
|
"0xee": "ᆼ",
|
|
"0xef": "ᆽ",
|
|
"0xf0": "ᆾ",
|
|
"0xf1": "ᆿ",
|
|
"0xf2": "ᇀ",
|
|
"0xf3": "ᇁ",
|
|
"0xf4": "ᇂ",
|
|
"0xf5": "ᆨ",
|
|
"0xf6": "ᆫ",
|
|
"0xf7": "ᆯ",
|
|
"0xf8": "ᆱ",
|
|
"0xf9": "ᆷ",
|
|
"0xfa": "ᆸ",
|
|
"0xfb": "ᆺ",
|
|
"0xfc": "ᆻ",
|
|
"0xfd": "ᆼ",
|
|
"0xfe": "ᆨ",
|
|
"0xff": "ᆫ",
|
|
"extra_0x86": "ᆯ",
|
|
"extra_0x87": "ᆷ",
|
|
"extra_0x88": "ᆸ",
|
|
"extra_0x89": "ᆺ",
|
|
"extra_0x8a": "ᆻ",
|
|
"extra_0x8b": "ᆼ",
|
|
}
|
|
jamo_groupings = {
|
|
"initial": [
|
|
"ᄀ",
|
|
"ᄁ",
|
|
"ᄂ",
|
|
"ᄃ",
|
|
"ᄄ",
|
|
"ᄅ",
|
|
"ᄆ",
|
|
"ᄇ",
|
|
"ᄈ",
|
|
"ᄉ",
|
|
"ᄊ",
|
|
"ᄋ",
|
|
"ᄌ",
|
|
"ᄍ",
|
|
"ᄎ",
|
|
"ᄏ",
|
|
"ᄐ",
|
|
"ᄑ",
|
|
"ᄒ",
|
|
],
|
|
"median": [
|
|
"ᅡ",
|
|
"ᅢ",
|
|
"ᅣ",
|
|
"ᅤ",
|
|
"ᅥ",
|
|
"ᅦ",
|
|
"ᅧ",
|
|
"ᅨ",
|
|
"ᅩ",
|
|
"ᅪ",
|
|
"ᅫ",
|
|
"ᅬ",
|
|
"ᅭ",
|
|
"ᅮ",
|
|
"ᅯ",
|
|
"ᅰ",
|
|
"ᅱ",
|
|
"ᅲ",
|
|
"ᅳ",
|
|
"ᅴ",
|
|
"ᅵ",
|
|
],
|
|
"final": [
|
|
"ᆨ",
|
|
"ᆩ",
|
|
"ᆪ",
|
|
"ᆫ",
|
|
"ᆬ",
|
|
"ᆭ",
|
|
"ᆮ",
|
|
"ᆯ",
|
|
"ᆰ",
|
|
"ᆱ",
|
|
"ᆲ",
|
|
"ᆳ",
|
|
"ᆴ",
|
|
"ᆵ",
|
|
"ᆶ",
|
|
"ᆷ",
|
|
"ᆸ",
|
|
"ᆹ",
|
|
"ᆺ",
|
|
"ᆻ",
|
|
"ᆼ",
|
|
"ᆽ",
|
|
"ᆾ",
|
|
"ᆿ",
|
|
"ᇀ",
|
|
"ᇁ",
|
|
"ᇂ",
|
|
],
|
|
}
|
|
median_jamo_groupings = {
|
|
"right": ["ᅡ", "ᅢ", "ᅣ", "ᅤ", "ᅥ", "ᅦ", "ᅧ", "ᅨ", "ᅵ"],
|
|
"bottom": ["ᅩ", "ᅭ", "ᅮ", "ᅲ", "ᅳ"],
|
|
"combined": ["ᅪ", "ᅫ", "ᅬ", "ᅯ", "ᅰ", "ᅱ", "ᅴ"],
|
|
}
|
|
median_combos = {
|
|
"ᅪ": ["ᅩ", "ᅡ"],
|
|
"ᅫ": ["ᅩ", "ᅢ"],
|
|
"ᅬ": ["ᅩ", "ᅵ"],
|
|
"ᅯ": ["ᅮ", "ᅥ"],
|
|
"ᅰ": ["ᅮ", "ᅦ"],
|
|
"ᅱ": ["ᅮ", "ᅵ"],
|
|
"ᅴ": ["ᅳ", "ᅵ"],
|
|
}
|
|
|
|
|
|
def derive_syllable_block_info(glyph_list):
|
|
jamos = []
|
|
# iterate the glyphs, convert them into their mappings
|
|
for glyph in glyph_list:
|
|
if glyph not in jamo_glyph_mappings:
|
|
print(f"{glyph} not in mapping dictionary, fix it")
|
|
exit(1)
|
|
mapping = jamo_glyph_mappings[glyph]
|
|
# TODO - ugly for figuring out what glyphs by jamo!
|
|
if isinstance(mapping, list):
|
|
# there are a few select glyphs that are multiple jamos
|
|
for jamo in mapping:
|
|
jamos.append([jamo, glyph])
|
|
else:
|
|
jamos.append([mapping, glyph])
|
|
# Associate each jamo with it's initial/median/final grouping
|
|
jamo_info = []
|
|
found_medians = []
|
|
for jamo_and_glyph in jamos:
|
|
jamo = jamo_and_glyph[0]
|
|
glyph = jamo_and_glyph[1]
|
|
for [grouping, jamos_in_group] in jamo_groupings.items():
|
|
if jamo in jamos_in_group:
|
|
jamo_grouping = grouping
|
|
break
|
|
if jamo_grouping == "median":
|
|
found_medians.append([jamo, glyph])
|
|
jamo_info.append({"jamo": jamo, "grouping": jamo_grouping, "glyph": glyph})
|
|
if len(found_medians) > 2:
|
|
print(f"found more than 2 median vowels in {jamo_info}")
|
|
exit(1)
|
|
# Consolidate median vowels, as jak typically typically draws them as a combination of two
|
|
# glyphs
|
|
if len(found_medians) > 1:
|
|
combined_median = None
|
|
combined_glyphs = None
|
|
for [vowel, vowel_parts] in median_combos.items():
|
|
if (
|
|
found_medians[0][0] in vowel_parts
|
|
and found_medians[1][0] in vowel_parts
|
|
):
|
|
combined_median = vowel
|
|
combined_glyphs = [found_medians[0][1], found_medians[1][1]]
|
|
break
|
|
if combined_median == None:
|
|
print(f"unable to combine median in {jamo_info}")
|
|
exit(1)
|
|
new_jamo_info = []
|
|
skip_rest = False
|
|
for info in jamo_info:
|
|
if info["grouping"] != "median":
|
|
new_jamo_info.append(info)
|
|
elif not skip_rest:
|
|
new_jamo_info.append(
|
|
{
|
|
"jamo": combined_median,
|
|
"grouping": "median",
|
|
"glyph": combined_glyphs,
|
|
}
|
|
)
|
|
skip_rest = True
|
|
jamo_info = new_jamo_info
|
|
# Now we can consolidate median vowels and determine the orientation
|
|
if len(jamo_info) == 2:
|
|
for [grouping, jamos_in_group] in median_jamo_groupings.items():
|
|
if jamo_info[1]["jamo"] in jamos_in_group:
|
|
median_group = grouping
|
|
break
|
|
if median_group == "right":
|
|
writing_orientation = 0
|
|
elif median_group == "bottom":
|
|
writing_orientation = 1
|
|
elif median_group == "combined":
|
|
writing_orientation = 2
|
|
else:
|
|
print(f"couldnt figure out median group for {jamo_info}")
|
|
exit(1)
|
|
elif len(jamo_info) == 3:
|
|
for [grouping, jamos_in_group] in median_jamo_groupings.items():
|
|
if jamo_info[1]["jamo"] in jamos_in_group:
|
|
median_group = grouping
|
|
break
|
|
if median_group == "right":
|
|
writing_orientation = 3
|
|
elif median_group == "bottom":
|
|
writing_orientation = 4
|
|
elif median_group == "combined":
|
|
writing_orientation = 5
|
|
else:
|
|
print(f"couldnt figure out median group for {jamo_info}")
|
|
exit(1)
|
|
else:
|
|
print(f"unhandled jamo configuration {jamo_info}")
|
|
exit(1)
|
|
return {"writingOrientation": writing_orientation, "jamos": jamo_info}
|
|
|
|
|
|
# finally start going through the real text to figure out the mappings
|
|
total_syllable_blocks = 0
|
|
for [id, game_text_line] in korean_lines.items():
|
|
# print()
|
|
# print(game_text_line)
|
|
# split the bytes into characters, sound the alarm if we see a `0x05`
|
|
# NOTE - hopefully this is not a hack (seems like the font textures dont start until 0x6...how conveniant!)
|
|
game_text_line = game_text_line.replace("0x05,", "extra_")
|
|
text_bytes = game_text_line.split(",")
|
|
syllable_blocks = []
|
|
i = 0
|
|
while i < len(text_bytes):
|
|
curr_byte = text_bytes[i]
|
|
if curr_byte == "0x04":
|
|
total_syllable_blocks = total_syllable_blocks + 1
|
|
expected_num_glyphs = int(text_bytes[i + 1], 16)
|
|
syllable_blocks.append(
|
|
{
|
|
"numGlyphs": expected_num_glyphs,
|
|
"rawGlyphs": text_bytes[i + 2 : i + 2 + expected_num_glyphs],
|
|
}
|
|
)
|
|
i = i + 2 + expected_num_glyphs
|
|
continue
|
|
i = i + 1
|
|
# now we will inspect the choice of glyphs (which are individual jamo or jamo combinations)
|
|
# to determine the jamo and the writing orientation
|
|
for block in syllable_blocks:
|
|
jamo_info = derive_syllable_block_info(block["rawGlyphs"])
|
|
block["jamos"] = jamo_info["jamos"]
|
|
block["writingOrientation"] = jamo_info["writingOrientation"]
|
|
|
|
# pprint(syllable_blocks)
|
|
|
|
# The (almost) final step, store this information in our big jamo combination
|
|
# "database"
|
|
#
|
|
# We now effectively have an encoding, and we can process that to further refine it and
|
|
# see what we have to do manually
|
|
for block in syllable_blocks:
|
|
writing_orientation = block["writingOrientation"]
|
|
for jamo in block["jamos"]:
|
|
jamo_entry = jamo_combinations[jamo["jamo"]]
|
|
if jamo_entry[writing_orientation] == None:
|
|
print(f"something is very wrong with {block}")
|
|
exit(1)
|
|
new_entry = {"glyph": jamo["glyph"], "context": block["jamos"]}
|
|
if new_entry not in jamo_entry[writing_orientation]:
|
|
jamo_entry[writing_orientation].append(new_entry)
|
|
|
|
# Print some stats before finalizing the result
|
|
empty_cells = 0
|
|
glyph_list = set(jamo_glyph_mappings.keys())
|
|
for [jamo, orientations] in jamo_combinations.items():
|
|
for orientation in orientations:
|
|
if orientation is not None:
|
|
if len(orientation) == 0:
|
|
empty_cells = empty_cells + 1
|
|
for entry in orientation:
|
|
if isinstance(entry["glyph"], list):
|
|
for glyph in entry["glyph"]:
|
|
glyph_list.discard(glyph)
|
|
else:
|
|
glyph_list.discard(entry["glyph"])
|
|
|
|
print()
|
|
print(f"Analyzed {total_syllable_blocks} syllable blocks")
|
|
print(f"{empty_cells} empty jamo cells\n")
|
|
print(f"Did not see {len(glyph_list)} out of {len(jamo_glyph_mappings.keys())} glyphs:")
|
|
|
|
# with open("./jamo-db-before.json", mode="w", encoding="utf-8") as f:
|
|
# f.write(json.dumps(jamo_combinations, indent=2))
|
|
|
|
def format_alternative(curr_jamo, curr_glyph, full_glyph_context):
|
|
# Make a string key that represents the unicode jamos with a <GLYPH> placeholder to represent
|
|
# the jamo we are dealing with
|
|
# And the value is the glyph itself that gets used to draw this combination of jamos
|
|
key_parts = []
|
|
for glyph in full_glyph_context:
|
|
if curr_jamo == glyph["jamo"]:
|
|
key_parts.append("<G>")
|
|
else:
|
|
key_parts.append(glyph["jamo"])
|
|
formatted_curr_glyph = curr_glyph
|
|
if isinstance(curr_glyph, list):
|
|
formatted_curr_glyph = ",".join(curr_glyph)
|
|
return [",".join(key_parts), formatted_curr_glyph]
|
|
|
|
|
|
# Enumerate through the db, and consolidate duplicates / find the most common
|
|
# jamo for each position
|
|
for [jamo, orientations] in jamo_combinations.items():
|
|
for [index, orientation] in enumerate(orientations):
|
|
if orientation is not None:
|
|
result = {"defaultGlyph": "", "alternatives": {}}
|
|
glyph_frequencies = {}
|
|
alternatives = {}
|
|
if len(orientation) == 0:
|
|
empty_cells = empty_cells + 1
|
|
continue
|
|
for entry in orientation:
|
|
glyph_key = entry["glyph"]
|
|
if isinstance(entry["glyph"], list):
|
|
glyph_key = ",".join(entry["glyph"])
|
|
if glyph_key not in glyph_frequencies:
|
|
glyph_frequencies[glyph_key] = 0
|
|
glyph_frequencies[glyph_key] = glyph_frequencies[glyph_key] + 1
|
|
|
|
if glyph_key not in alternatives:
|
|
alternatives[glyph_key] = []
|
|
alternatives[glyph_key].append(
|
|
format_alternative(jamo, entry["glyph"], entry["context"])
|
|
)
|
|
# Consolidate
|
|
most_common_glyph = ""
|
|
most_common_glyph_times = -1
|
|
for [glyph, freq] in glyph_frequencies.items():
|
|
if freq > most_common_glyph_times:
|
|
most_common_glyph_times = freq
|
|
most_common_glyph = glyph
|
|
result["defaultGlyph"] = most_common_glyph
|
|
# TODO - handle if this is multiple glyphs
|
|
del alternatives[most_common_glyph]
|
|
# Flatten alternatives
|
|
for [glyph, alternatives] in alternatives.items():
|
|
for alternative in alternatives:
|
|
result["alternatives"][alternative[0]] = alternative[1]
|
|
# Overwrite the db value
|
|
jamo_combinations[jamo][index] = result
|
|
|
|
# These are found MANUALLY by iterating through all combinations and finding alternatives
|
|
# for jamo combinations to be legible
|
|
manual_encoding_additions = {
|
|
# Choseong (initial)
|
|
"ᄀ": [
|
|
[],
|
|
[],
|
|
["0xb3:<G>,ᅫ"],
|
|
["0x33:<G>,ᅤ,*", "0x33:<G>,ᅦ,*", "0x33:<G>,ᅨ,*"],
|
|
[],
|
|
["0xae:<G>,ᅴ,*"],
|
|
],
|
|
"ᄁ": [
|
|
["!0x34", "0x07:<G>,ᅵ"],
|
|
["0x67:<G>,ᅭ"],
|
|
[],
|
|
["!0x34"],
|
|
["!0x65", "0x68:<G>,ᅭ,*", "0x68:<G>,ᅮ,*", "0x68:<G>,ᅲ,*", "0x68:<G>,ᅳ,*"],
|
|
[
|
|
"!0x91",
|
|
"0x8f:<G>,ᅪ,*",
|
|
"0x64:<G>,ᅫ,*",
|
|
"0xb3:<G>,ᅰ,*",
|
|
"0x8f:<G>,ᅬ,*",
|
|
],
|
|
],
|
|
"ᄂ": [
|
|
["0x1f:<G>,ᅡ", "0x1f:<G>,ᅣ", "0x4a:<G>,ᅨ", "0x35:<G>,ᅤ"],
|
|
[],
|
|
["0xb4:<G>,ᅫ", "0xb4:<G>,ᅰ"],
|
|
[
|
|
"0x35:<G>,ᅢ,*",
|
|
"0x35:<G>,ᅤ,*",
|
|
"0x35:<G>,ᅦ,*",
|
|
"0x1f:<G>,ᅧ,*",
|
|
"0x1f:<G>,ᅥ,*",
|
|
"0x4a:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0xb4:<G>,ᅪ,*", "0xb4:<G>,ᅫ,*", "0xb4:<G>,ᅰ,*"],
|
|
],
|
|
"ᄃ": [
|
|
["0x26:<G>,ᅡ", "0x36:<G>,ᅤ", "0x26:<G>,ᅣ", "0x36:<G>,ᅧ", "0x51:<G>,ᅨ"],
|
|
[],
|
|
["0xb5:<G>,ᅪ", "0xb5:<G>,ᅰ"],
|
|
[
|
|
"0x36:<G>,ᅢ,*",
|
|
"0x36:<G>,ᅤ,*",
|
|
"0x51:<G>,ᅦ,*",
|
|
"0x26:<G>,ᅧ,*",
|
|
"0x51:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0xb5:<G>,ᅪ,*", "0xb5:<G>,ᅫ,*", "0xb5:<G>,ᅰ,*"],
|
|
],
|
|
"ᄄ": [
|
|
[
|
|
"0x27:<G>,ᅡ",
|
|
"0x52:<G>,ᅢ",
|
|
"0x52:<G>,ᅤ",
|
|
"0x27:<G>,ᅣ",
|
|
"0x27:<G>,ᅧ",
|
|
"0x52:<G>,ᅨ",
|
|
"0x27:<G>,ᅵ",
|
|
],
|
|
[],
|
|
["0xb6:<G>,ᅪ", "0xb6:<G>,ᅫ", "0xb6:<G>,ᅰ"],
|
|
[
|
|
"!0x0a",
|
|
"0x52:<G>,ᅢ,*",
|
|
"0x52:<G>,ᅣ,*",
|
|
"0x52:<G>,ᅤ,*",
|
|
"0x52:<G>,ᅦ,*",
|
|
"0x52:<G>,ᅧ,*",
|
|
"0x52:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0xb6:<G>,ᅪ,*", "0xb6:<G>,ᅫ,*", "0xb6:<G>,ᅰ,*"],
|
|
],
|
|
"ᄅ": [
|
|
["0x0b:<G>,ᅣ", "0x38:<G>,ᅤ"],
|
|
[],
|
|
["0xb7:<G>,ᅪ", "0xb7:<G>,ᅫ", "0xb7:<G>,ᅰ"],
|
|
[
|
|
"0x0b:<G>,ᅵ,*",
|
|
"0x0b:<G>,ᅡ,*",
|
|
"0x38:<G>,ᅢ,*",
|
|
"0x38:<G>,ᅤ,*",
|
|
"0x53:<G>,ᅦ,*",
|
|
"0x53:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0x95", "0xb7:<G>,ᅪ,*", "0xb7:<G>,ᅫ,*", "0xb7:<G>,ᅰ,*"],
|
|
],
|
|
"ᄆ": [
|
|
["0x39:<G>,ᅤ", "0x39:<G>,ᅨ"],
|
|
[],
|
|
["0xb2:<G>,ᅪ", "0xb2:<G>,ᅫ", "0xb2:<G>,ᅰ"],
|
|
["0x39:<G>,ᅢ,*", "0x39:<G>,ᅤ,*", "0x39:<G>,ᅦ,*", "0x39:<G>,ᅨ,*"],
|
|
[],
|
|
["0xb2:<G>,ᅪ,*", "0xb2:<G>,ᅫ,*", "0xb2:<G>,ᅰ,*"],
|
|
],
|
|
"ᄇ": [
|
|
["0x3a:<G>,ᅤ", "0x3a:<G>,ᅨ"],
|
|
[],
|
|
["0xb8:<G>,ᅪ", "0xb8:<G>,ᅫ", "0xb8:<G>,ᅰ"],
|
|
["0x3a:<G>,ᅢ,*", "0x3a:<G>,ᅤ,*", "0x3a:<G>,ᅦ,*", "0x3a:<G>,ᅨ,*"],
|
|
[],
|
|
["0xb8:<G>,ᅪ,*", "0xb8:<G>,ᅫ,*", "0xb8:<G>,ᅰ,*"],
|
|
],
|
|
"ᄈ": [
|
|
["!0x3b", "0x0e:<G>,ᅥ", "0x0e:<G>,ᅵ"],
|
|
[],
|
|
["0x98"],
|
|
[
|
|
"0x3b:<G>,ᅢ,*",
|
|
"0x3b:<G>,ᅣ,*",
|
|
"0x3b:<G>,ᅤ,*",
|
|
"0x3b:<G>,ᅦ,*",
|
|
"0x3b:<G>,ᅧ,*",
|
|
"0x3b:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0x98"],
|
|
],
|
|
"ᄉ": [
|
|
["0x4b:<G>,ᅤ", "0x4b:<G>,ᅨ"],
|
|
[],
|
|
["0xb9:<G>,ᅪ", "0xb9:<G>,ᅰ"],
|
|
["0x4b:<G>,ᅢ,*", "0x4b:<G>,ᅤ,*", "0x4b:<G>,ᅦ,*", "0x4b:<G>,ᅨ,*"],
|
|
[],
|
|
["0xb9:<G>,ᅪ,*", "0xb9:<G>,ᅫ,*", "0xb9:<G>,ᅰ,*"],
|
|
],
|
|
"ᄊ": [
|
|
[
|
|
"0x4c:<G>,ᅢ",
|
|
"0x4c:<G>,ᅤ",
|
|
"0x4c:<G>,ᅥ",
|
|
"0x4c:<G>,ᅧ",
|
|
"0x4c:<G>,ᅨ",
|
|
],
|
|
[],
|
|
["0xba:<G>,ᅪ", "0xba:<G>,ᅫ", "0xba:<G>,ᅰ"],
|
|
[
|
|
"0x4c:<G>,ᅢ,*",
|
|
"0x4c:<G>,ᅣ,*",
|
|
"0x4c:<G>,ᅤ,*",
|
|
"0x4c:<G>,ᅦ,*",
|
|
"0x4c:<G>,ᅧ,*",
|
|
"0x4c:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0x9a", "0xba:<G>,ᅪ,*", "0xba:<G>,ᅫ,*", "0xba:<G>,ᅰ,*"],
|
|
],
|
|
"ᄋ": [
|
|
[],
|
|
[],
|
|
["0xbb:<G>,ᅪ"],
|
|
["0x3c:<G>,ᅢ,*", "0x3c:<G>,ᅤ,*", "0x3c:<G>,ᅦ,*", "0x3c:<G>,ᅨ,*"],
|
|
[],
|
|
["0xbb:<G>,ᅪ,*", "0xbb:<G>,ᅫ,*", "0xbb:<G>,ᅰ,*"],
|
|
],
|
|
"ᄌ": [
|
|
["0x3d:<G>,ᅨ"],
|
|
[],
|
|
["0xbc:<G>,ᅪ", "0xbc:<G>,ᅫ", "0xbc:<G>,ᅰ"],
|
|
["0x3d:<G>,ᅢ,*", "0x3d:<G>,ᅤ,*", "0x3d:<G>,ᅦ,*", "0x3d:<G>,ᅨ,*"],
|
|
[],
|
|
["0xbc:<G>,ᅪ,*", "0xbc:<G>,ᅫ,*", "0xbc:<G>,ᅰ,*"],
|
|
],
|
|
"ᄍ": [
|
|
["!0x3e", "0x11:<G>,ᅵ"],
|
|
[],
|
|
["0xbd:<G>,ᅪ", "0xbd:<G>,ᅫ", "0xbd:<G>,ᅰ"],
|
|
[
|
|
"0x3e:<G>,ᅢ,*",
|
|
"0x3e:<G>,ᅣ,*",
|
|
"0x3e:<G>,ᅤ,*",
|
|
"0x3e:<G>,ᅦ,*",
|
|
"0x3e:<G>,ᅧ,*",
|
|
"0x3e:<G>,ᅨ,*",
|
|
"0x3e:<G>,ᅵ,*",
|
|
],
|
|
[],
|
|
["0xbd:<G>,ᅪ,*", "0xbd:<G>,ᅫ,*", "0xbd:<G>,ᅰ,*"],
|
|
],
|
|
"ᄎ": [
|
|
["0x58:<G>,ᅤ", "0x58:<G>,ᅨ"],
|
|
[],
|
|
["0xbe:<G>,ᅪ", "0xbe:<G>,ᅫ", "0xbe:<G>,ᅰ"],
|
|
["0x58:<G>,ᅢ,*", "0x58:<G>,ᅤ,*", "0x58:<G>,ᅦ,*", "0x58:<G>,ᅨ,*"],
|
|
[],
|
|
["0x9e", "0xbe:<G>,ᅪ,*", "0xbe:<G>,ᅫ,*", "0xbe:<G>,ᅰ,*"],
|
|
],
|
|
"ᄏ": [
|
|
["0x3f:<G>,ᅤ", "0x3f:<G>,ᅨ"],
|
|
[],
|
|
["0xaf:<G>,ᅪ", "0xaf:<G>,ᅫ", "0xaf:<G>,ᅰ"],
|
|
["0x3f:<G>,ᅢ,*", "0x3f:<G>,ᅤ,*", "0x3f:<G>,ᅦ,*", "0x3f:<G>,ᅨ,*"],
|
|
[],
|
|
["0xaf:<G>,ᅪ,*", "0xaf:<G>,ᅫ,*", "0xaf:<G>,ᅰ,*"],
|
|
],
|
|
"ᄐ": [
|
|
["0x13:<G>,ᅣ", "0x40:<G>,ᅤ", "0x54:<G>,ᅨ"],
|
|
[],
|
|
["0xbf:<G>,ᅪ", "0xbf:<G>,ᅫ", "0xbf:<G>,ᅰ"],
|
|
[
|
|
"0x40:<G>,ᅢ,*",
|
|
"0x40:<G>,ᅤ,*",
|
|
"0x54:<G>,ᅦ,*",
|
|
"0x40:<G>,ᅧ,*",
|
|
"0x54:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0x9f", "0xbf:<G>,ᅪ,*", "0xbf:<G>,ᅫ,*", "0xbf:<G>,ᅰ,*"],
|
|
],
|
|
"ᄑ": [
|
|
[
|
|
"0x2a:<G>,ᅡ",
|
|
"0x41:<G>,ᅤ",
|
|
"0x2a:<G>,ᅣ",
|
|
"0x2a:<G>,ᅧ",
|
|
],
|
|
[],
|
|
["0xa0"],
|
|
[
|
|
"0x41:<G>,ᅢ,*",
|
|
"0x41:<G>,ᅣ,*",
|
|
"0x41:<G>,ᅤ,*",
|
|
"0x2a:<G>,ᅥ,*",
|
|
"0x55:<G>,ᅦ,*",
|
|
"0x41:<G>,ᅧ,*",
|
|
"0x55:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0xa0"],
|
|
],
|
|
"ᄒ": [
|
|
["0x59:<G>,ᅤ"],
|
|
[],
|
|
["0xc0:<G>,ᅪ", "0xc0:<G>,ᅫ", "0xc0:<G>,ᅰ"],
|
|
[
|
|
"0x59:<G>,ᅡ,*",
|
|
"0x59:<G>,ᅢ,*",
|
|
"0x59:<G>,ᅣ,*",
|
|
"0x59:<G>,ᅤ,*",
|
|
"0x59:<G>,ᅦ,*",
|
|
"0x59:<G>,ᅧ,*",
|
|
"0x59:<G>,ᅨ,*",
|
|
],
|
|
[],
|
|
["0xc0:<G>,ᅪ,*", "0xc0:<G>,ᅫ,*", "0xc0:<G>,ᅰ,*"],
|
|
],
|
|
# Jungseong (middle)
|
|
"ᅡ": [[], None, None, ["0x1a:*,<G>,ᆫ"], None, None],
|
|
"ᅢ": [[], None, None, ["0x46:*,<G>,ᆫ"], None, None],
|
|
"ᅣ": [
|
|
[],
|
|
None,
|
|
None,
|
|
[],
|
|
None,
|
|
None,
|
|
],
|
|
"ᅤ": [[], None, None, ["0x43", "0x47:*,<G>,ᆫ"], None, None],
|
|
"ᅥ": [
|
|
[],
|
|
None,
|
|
None,
|
|
["0x2f:ᄎ,<G>,*", "0x2f:ᄒ,<G>,*", "0x1d:*[^ᄎ;ᄐ;ᄒ],<G>,ᆫ"],
|
|
None,
|
|
None,
|
|
],
|
|
"ᅦ": [[], None, None, ["0x5a:ᄒ,<G>,*", "0x48:*,<G>,ᆫ"], None, None],
|
|
"ᅧ": [
|
|
[],
|
|
None,
|
|
None,
|
|
["0x25:ᄂ,<G>,*", "0x1e:ᄎ,<G>,*", "0x30:ᄒ,<G>,*", "0x1e:*,<G>,ᆫ"],
|
|
None,
|
|
None,
|
|
],
|
|
"ᅨ": [
|
|
[
|
|
"0x50:ᄂ,<G>",
|
|
"0x50:ᄃ,<G>",
|
|
"0x5d:ᄄ,<G>",
|
|
"0x50:ᄅ,<G>",
|
|
"0x50:ᄆ,<G>",
|
|
"0x50:ᄈ,<G>",
|
|
],
|
|
None,
|
|
None,
|
|
["0x49:*,<G>,ᆫ"],
|
|
None,
|
|
None,
|
|
],
|
|
"ᅩ": [
|
|
None,
|
|
[
|
|
"0x62:ᄂ,<G>",
|
|
"0x62:ᄄ,<G>",
|
|
"0x62:ᄅ,<G>",
|
|
"0x62:ᄋ,<G>",
|
|
"0x62:ᄐ,<G>",
|
|
"0x62:ᄒ,<G>",
|
|
],
|
|
None,
|
|
None,
|
|
["!0x82", "0x82:*,<G>,*"],
|
|
None,
|
|
],
|
|
"ᅪ": [
|
|
None,
|
|
None,
|
|
["0x8e,0xa8:ᄁ,<G>"],
|
|
None,
|
|
None,
|
|
[
|
|
"0x8e,0xa6:ᄀ,<G>,*",
|
|
"0x8e,0xa8:ᄀ,<G>,ᆫ",
|
|
"0x8f,0xa6:ᄁ,<G>,*",
|
|
"0x8f,0xa8:ᄁ,<G>,ᆫ",
|
|
],
|
|
],
|
|
"ᅫ": [
|
|
None,
|
|
None,
|
|
[],
|
|
None,
|
|
None,
|
|
[
|
|
"!0xc1",
|
|
"0xc2:*,<G>,ᆫ",
|
|
"0x8e,0x42:ᄀ,<G>,*",
|
|
"0x8e,0x46:ᄀ,<G>,ᆫ",
|
|
"0x64:ᄁ,<G>,*",
|
|
"0x64:ᄁ,<G>,ᆫ",
|
|
],
|
|
],
|
|
"ᅬ": [
|
|
None,
|
|
None,
|
|
["0x8e,0xa9:ᄁ,<G>"],
|
|
None,
|
|
None,
|
|
[
|
|
"0xa2,0xa9:*,<G>,ᆫ",
|
|
"0x8e,0xa7:ᄀ,<G>,*",
|
|
"0x8e,0xa9:ᄀ,<G>,ᆫ",
|
|
"0x8f,0xa7:ᄁ,<G>,*",
|
|
"0x8f,0xa9:ᄁ,<G>,ᆫ",
|
|
],
|
|
],
|
|
"ᅭ": [
|
|
None,
|
|
[
|
|
"0x61:ᄀ,<G>",
|
|
"0x67:ᄁ,<G>",
|
|
"0x63:ᄂ,<G>",
|
|
"0x63:ᄃ,<G>",
|
|
"0x63:ᄄ,<G>",
|
|
"0x63:ᄅ,<G>",
|
|
"0x63:ᄈ,<G>",
|
|
"0x63:ᄊ,<G>",
|
|
"0x63:ᄋ,<G>",
|
|
"0x63:ᄏ,<G>",
|
|
"0x63:ᄐ,<G>",
|
|
"0x63:ᄒ,<G>",
|
|
],
|
|
None,
|
|
None,
|
|
["!0x83"],
|
|
None,
|
|
],
|
|
"ᅮ": [None, ["0x6e:ᄒ,<G>"], None, None, ["0x85:*,<G>,*", "0x89:*,<G>,ᆫ"], None],
|
|
"ᅯ": [None, None, [], None, None, ["0xaa:*,<G>,*", "0xac:*,<G>,ᆫ"]],
|
|
"ᅰ": [None, None, [], None, None, ["0xc4:*,<G>,ᆫ"]],
|
|
"ᅱ": [None, None, [], None, None, ["0xad:*,<G>,ᆫ"]],
|
|
"ᅲ": [
|
|
None,
|
|
[
|
|
"0x6f:ᄂ,<G>",
|
|
"0x6f:ᄄ,<G>",
|
|
"0x6f:ᄅ,<G>",
|
|
"0x6f:ᄆ,<G>",
|
|
"0x6f:ᄇ,<G>",
|
|
"0x6f:ᄈ,<G>",
|
|
"0x6f:ᄉ,<G>",
|
|
"0x6f:ᄊ,<G>",
|
|
"0x6f:ᄋ,<G>",
|
|
"0x6f:ᄌ,<G>",
|
|
"0x6f:ᄍ,<G>",
|
|
"0x6f:ᄎ,<G>",
|
|
"0x6f:ᄏ,<G>",
|
|
"0x6f:ᄐ,<G>",
|
|
"0x6f:ᄑ,<G>",
|
|
"0x6f:ᄒ,<G>",
|
|
],
|
|
None,
|
|
None,
|
|
["0x8a:*,<G>,ᆫ"],
|
|
None,
|
|
],
|
|
"ᅳ": [None, ["0x6d:ᄐ,<G>", "0x6d:ᄒ,<G>"], None, None, ["0x84:*,<G>,*"], None],
|
|
"ᅴ": [None, None, [], None, None, ["!0xa3,0xa7", "0xa3,0xa9:*,<G>,ᆫ"]],
|
|
"ᅵ": [[], None, None, ["0x1c:*,<G>,ᆫ"], None, None],
|
|
# Jongseong (final)
|
|
"ᆨ": [None, None, None, [], [], ["!0xfe"]],
|
|
"ᆩ": [None, None, None, [], [], ["0xdf"]],
|
|
"ᆪ": [None, None, None, ["0xc7"], [], ["0xe0"]],
|
|
"ᆫ": [None, None, None, [], ["0xe1:*,*,<G>"], ["!0xff"]],
|
|
"ᆬ": [None, None, None, [], ["0xc9"], ["0xc9"]],
|
|
"ᆭ": [None, None, None, [], ["0xe2"], ["0xe2"]],
|
|
"ᆮ": [None, None, None, [], [], ["0xe3"]],
|
|
"ᆯ": [None, None, None, [], [], ["extra_0x86:*,*,<G>"]],
|
|
"ᆰ": [None, None, None, [], [], ["0xe5"]],
|
|
"ᆱ": [None, None, None, [], [], ["0xf8"]],
|
|
"ᆲ": [None, None, None, [], ["0xcf"], ["0xcf"]],
|
|
"ᆳ": [None, None, None, ["0xe7"], ["0xe7"], ["0xe7"]],
|
|
"ᆴ": [None, None, None, ["0xd0"], ["0xe8"], ["0xe8"]],
|
|
"ᆵ": [None, None, None, ["0xe9"], ["0xe9"], ["0xe9"]],
|
|
"ᆶ": [None, None, None, [], [], ["0xea"]],
|
|
"ᆷ": [None, None, None, [], [], ["extra_0x87", "extra_0x87:*,*,<G>"]],
|
|
"ᆸ": [None, None, None, [], [], ["extra_0x88:*,*,<G>"]],
|
|
"ᆹ": [None, None, None, [], ["0xd4"], ["0xd4"]],
|
|
"ᆺ": [None, None, None, [], [], ["extra_0x89:*,*,<G>"]],
|
|
"ᆻ": [None, None, None, [], ["0xfc"], ["extra_0x8a:*,*,<G>"]],
|
|
"ᆼ": [None, None, None, [], [], ["extra_0x8b:*,*,<G>"]],
|
|
"ᆽ": [None, None, None, [], [], ["0xef"]],
|
|
"ᆾ": [None, None, None, [], ["0xf0"], ["0xf0"]],
|
|
"ᆿ": [None, None, None, ["0xda"], ["0xf1"], ["0xf1"]],
|
|
"ᇀ": [None, None, None, [], [], ["0xf2"]],
|
|
"ᇁ": [None, None, None, [], [], ["0xf3"]],
|
|
"ᇂ": [None, None, None, [], [], ["0xf4"]],
|
|
}
|
|
|
|
# Print the results
|
|
with open("./jamo-db.json", mode="w", encoding="utf-8") as f:
|
|
f.write(json.dumps(jamo_combinations, indent=2))
|
|
|
|
# Fill in the rest of the encoding table with manually identified alternatives / additions
|
|
# Most of these are additions, but some override the original encoding (because it looked terrible)
|
|
#
|
|
# These are provided in a specific formats:
|
|
# - 0x01:<G>,ᅤ -- means use 0x01 glyph for drawing the jamo (in position shown, in this case, only before ᅤ
|
|
# - 0x01:<G>,* -- means use 0x01 no matter what comes after
|
|
# - 0x01:<G>,*[^ᄎ;ᄐ] -- means use 0x01 no matter what comes after (except ᄐ and ᄎ)
|
|
# - 0x01,0x02:<G>,* -- sometimes a jamo requires multiple glyphs
|
|
# - !0x01 -- means to override the default glyph, this is used very rarely
|
|
# - 0x01 -- means to set the default -- some jamos never were used at all. If a default is already set, this should throw an error
|
|
#
|
|
# Note that all of these changes will override any existing alternatives, so order matters even in these lists
|
|
# - 0x01:<G>,* and
|
|
# - 0x02:<G>,ᄎ
|
|
# will replace the ᄎ involving entry that uses 0x01 because of ordering!
|
|
jamo_replacements = {
|
|
0: jamo_groupings["initial"],
|
|
1: jamo_groupings["median"],
|
|
2: jamo_groupings["final"],
|
|
}
|
|
|
|
added_defaults = 0
|
|
replaced_defaults = 0
|
|
for [jamo, orientations] in jamo_combinations.items():
|
|
for [index, orientation] in enumerate(orientations):
|
|
if orientation is None:
|
|
continue
|
|
new_alternative_list = manual_encoding_additions[jamo][index]
|
|
if new_alternative_list is None or len(new_alternative_list) == 0:
|
|
continue
|
|
# enumerate new list and collect info / generate list of new alternatives
|
|
new_default = None
|
|
force_default = False
|
|
alternative_list = []
|
|
for item in new_alternative_list:
|
|
# - !0x01 -- means to override the default glyph, this is used very rarely
|
|
# - 0x01 -- means to set the default -- some jamos never were used at all. If a default is already set, this should throw an error
|
|
if ":" not in item:
|
|
if item.startswith("!"):
|
|
force_default = True
|
|
new_default = item[1:]
|
|
else:
|
|
new_default = item
|
|
continue
|
|
parts = item.split(":")
|
|
glyph = parts[0]
|
|
pattern = parts[1]
|
|
# order matters, so we have to append to a list for now
|
|
# 0x01:<G>,ᅤ
|
|
if "*" not in pattern:
|
|
alternative_list.append({"glyph": glyph, "jamo_combination": pattern})
|
|
# 0x01:<G>,*
|
|
else:
|
|
# 0x01:<G>,*[^ᄎ;ᄐ]
|
|
# exclusion lists
|
|
exclusion_list = set()
|
|
# we generate all alternatives in order from left-to-right
|
|
tokens = pattern.split(",")
|
|
# cleanup each item first
|
|
new_tokens = []
|
|
for token in tokens:
|
|
if token.startswith("*[^"):
|
|
for exclude_jamo in token[3:-1].split(","):
|
|
exclusion_list.add(exclude_jamo)
|
|
new_tokens.append("*")
|
|
else:
|
|
new_tokens.append(token)
|
|
wildcard_indices = [i for i, token in enumerate(tokens) if token == "*"]
|
|
filtered_replacements = {
|
|
idx: [val for val in lst if val not in exclusion_list]
|
|
for idx, lst in jamo_replacements.items()
|
|
}
|
|
replacement_lists = [filtered_replacements[i] for i in wildcard_indices]
|
|
for combo in product(*replacement_lists):
|
|
generated = tokens.copy()
|
|
for idx, val in zip(wildcard_indices, combo):
|
|
generated[idx] = val
|
|
alternative_list.append(
|
|
{"glyph": glyph, "jamo_combination": ",".join(generated)}
|
|
)
|
|
# Ok, now we have our big list of new alternatives
|
|
# we go through them one by one, adding them to the existing list
|
|
# since alternatives is a map we don't have to concern ourselves with worrying about duplicates
|
|
# the last one wins
|
|
#
|
|
# We also must set the new default if applicable
|
|
new_orientation = orientation
|
|
if new_default is not None:
|
|
if isinstance(new_orientation, list) and len(new_orientation) == 0:
|
|
new_orientation = {"defaultGlyph": new_default, "alternatives": {}}
|
|
added_defaults = added_defaults + 1
|
|
elif force_default:
|
|
new_orientation["defaultGlyph"] = new_default
|
|
replaced_defaults = replaced_defaults + 1
|
|
else:
|
|
print(
|
|
f"Trying to replace the default {new_orientation["defaultGlyph"]} with {new_default} improperly"
|
|
)
|
|
exit(1)
|
|
# Alternatives
|
|
if len(alternative_list) > 0:
|
|
for new_alt in alternative_list:
|
|
new_orientation["alternatives"][new_alt["jamo_combination"]] = new_alt[
|
|
"glyph"
|
|
]
|
|
# Finally, update the DB
|
|
jamo_combinations[jamo][index] = new_orientation
|
|
|
|
# Print some Stats again!
|
|
empty_cells = 0
|
|
new_glyph_list = set(jamo_glyph_mappings.keys())
|
|
for [jamo, orientations] in jamo_combinations.items():
|
|
for orientation in orientations:
|
|
if isinstance(orientation, dict):
|
|
new_glyph_list.discard(orientation["defaultGlyph"])
|
|
for combo, glyph in orientation["alternatives"].items():
|
|
new_glyph_list.discard(glyph)
|
|
elif orientation is not None:
|
|
print(f"{jamo} - {orientation}")
|
|
empty_cells = empty_cells + 1
|
|
|
|
print()
|
|
print(f"Added {added_defaults} defaults")
|
|
print(f"Replaced {replaced_defaults} defaults")
|
|
print(f"{empty_cells} empty jamo cells\n")
|
|
print(
|
|
f"Still did not see {len(new_glyph_list)} out of {len(jamo_glyph_mappings.keys())} glyphs:"
|
|
)
|
|
print(
|
|
f"Used an additional {len(new_glyph_list.difference(glyph_list))} glyphs, never seen in the original game!"
|
|
)
|
|
print("Never Used Glyphs:")
|
|
print(new_glyph_list)
|
|
|
|
# Print the results
|
|
with open("./jamo-db.json", mode="w", encoding="utf-8") as f:
|
|
f.write(json.dumps(jamo_combinations, indent=None))
|
|
|
|
# pprint(jamo_combinations)
|
|
|
|
# Export some CSV results so that we can fill in the rest of the encoding using excel (easier to keep track of
|
|
# what's missing)
|
|
# This CSV table will only include the most common for each as:
|
|
# - we already have the alternatives, we aren't going to check those
|
|
# - we will add a new alternative, only if the common glyphs don't match (and we don't already have one, which i can manually check)
|
|
# Use the lists so we have a consistent ordering
|
|
csv_lines = []
|
|
for jamo in jamo_groupings["initial"]:
|
|
cells_in_line = []
|
|
for orientation in jamo_combinations[jamo]:
|
|
if orientation is None:
|
|
cells_in_line.append("N/A")
|
|
elif isinstance(orientation, list) and len(orientation) == 0:
|
|
cells_in_line.append("")
|
|
else:
|
|
alternative_entries = []
|
|
for [context, alternative_glyph] in orientation["alternatives"].items():
|
|
alternative_entries.append(
|
|
f"- {alternative_glyph} for {context.replace("<GLYPH>", "<G>")}"
|
|
)
|
|
alternatives = "\n".join(alternative_entries)
|
|
if len(alternatives) > 0:
|
|
cells_in_line.append(
|
|
f"\"{orientation['defaultGlyph'].replace(",", " ")}\n{alternatives.replace(",", ";")}\""
|
|
)
|
|
else:
|
|
cells_in_line.append(
|
|
f"\"{orientation['defaultGlyph'].replace(",", " ")}\""
|
|
)
|
|
csv_lines.append(",".join(cells_in_line) + "\n")
|
|
for jamo in jamo_groupings["median"]:
|
|
cells_in_line = []
|
|
for orientation in jamo_combinations[jamo]:
|
|
if orientation is None:
|
|
cells_in_line.append("N/A")
|
|
elif isinstance(orientation, list) and len(orientation) == 0:
|
|
cells_in_line.append("")
|
|
else:
|
|
alternative_entries = []
|
|
for [context, alternative_glyph] in orientation["alternatives"].items():
|
|
alternative_entries.append(
|
|
f"- {alternative_glyph} for {context.replace("<GLYPH>", "<G>")}"
|
|
)
|
|
alternatives = "\n".join(alternative_entries)
|
|
if len(alternatives) > 0:
|
|
cells_in_line.append(
|
|
f"\"{orientation['defaultGlyph'].replace(",", " ")}\n{alternatives.replace(",", ";")}\""
|
|
)
|
|
else:
|
|
cells_in_line.append(
|
|
f"\"{orientation['defaultGlyph'].replace(",", " ")}\""
|
|
)
|
|
csv_lines.append(",".join(cells_in_line) + "\n")
|
|
for jamo in jamo_groupings["final"]:
|
|
cells_in_line = []
|
|
for orientation in jamo_combinations[jamo]:
|
|
if orientation is None:
|
|
cells_in_line.append("N/A")
|
|
elif isinstance(orientation, list) and len(orientation) == 0:
|
|
cells_in_line.append("")
|
|
else:
|
|
alternative_entries = []
|
|
for [context, alternative_glyph] in orientation["alternatives"].items():
|
|
alternative_entries.append(
|
|
f"- {alternative_glyph} for {context.replace("<GLYPH>", "<G>")}"
|
|
)
|
|
alternatives = "\n".join(alternative_entries)
|
|
if len(alternatives) > 0:
|
|
cells_in_line.append(
|
|
f"\"{orientation['defaultGlyph'].replace(",", " ")}\n{alternatives.replace(",", ";")}\""
|
|
)
|
|
else:
|
|
cells_in_line.append(
|
|
f"\"{orientation['defaultGlyph'].replace(",", " ")}\""
|
|
)
|
|
csv_lines.append(",".join(cells_in_line) + "\n")
|
|
# with open("./jamo-db.csv", mode="w", encoding="utf-8") as f:
|
|
# f.writelines(csv_lines)
|
|
|
|
# game -> UTF-8
|
|
# - convert glyphs into individual jamo (and sometimes ascii) (lookup table)
|
|
# - compose jamo into syllable blocks (python lib)
|
|
# UTF-8 -> game
|
|
# - decompose syllable blocks into jamo (python lib)
|
|
# - convert jamo into glyphs using our lookup DB |