From 4018d15fde6b282c5dc4c1964c49eaf8e495340e Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Sun, 25 Jun 2023 14:13:32 -0500 Subject: [PATCH] ci/translations: Add a linter to check for invalid characters, fix current issues (#2774) --- .github/workflows/linting.yaml | 3 + .vscode/launch.json | 4 +- common/util/FontUtils.cpp | 1 + .../jak1/subtitle/subtitle_lines_sv-SE.json | 6 +- .../jak1/text/game_base_text_hu-HU.json | 18 +-- .../jak1/text/game_custom_text_da-DK.json | 2 +- .../jak1/text/game_custom_text_hu-HU.json | 2 +- .../jak1/text/game_custom_text_ja-JP.json | 8 +- .../jak1/text/game_custom_text_no-NO.json | 2 +- .../jak1/text/game_custom_text_sv-SE.json | 8 +- scripts/ci/lint-characters.py | 109 ++++++++++++++++++ 11 files changed, 138 insertions(+), 25 deletions(-) create mode 100644 scripts/ci/lint-characters.py diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml index 1ea2f4d614..c4466643ee 100644 --- a/.github/workflows/linting.yaml +++ b/.github/workflows/linting.yaml @@ -38,3 +38,6 @@ jobs: - name: Check for Incorrect Asserts run: python ./scripts/ci/check-for-asserts.py + + - name: Check for Invalid Translation Characters + run: python ./scripts/ci/lint-characters.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 386fbb96f7..13f3c82f1c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,9 +8,9 @@ "name": "Append File Docs", "type": "python", "request": "launch", - "program": "${workspaceFolder}/temp/convert_subs.py", + "program": "${workspaceFolder}/scripts/ci/lint-characters.py", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/temp", + "cwd": "${workspaceFolder}", "args": [] }, ] diff --git a/common/util/FontUtils.cpp b/common/util/FontUtils.cpp index b2e8448707..ae59e954fd 100644 --- a/common/util/FontUtils.cpp +++ b/common/util/FontUtils.cpp @@ -551,6 +551,7 @@ static std::vector s_replace_info_jak1 = { // other {"A~Y~-21H~-5Vº~Z", "Å"}, {"N~Y~-6Hº~Z~+10H", "Nº"}, + {"O~Y~-16H~-1V/~Z", "Ø"}, // tildes {"N~Y~-22H~-4V~Z", "Ñ"}, diff --git a/game/assets/jak1/subtitle/subtitle_lines_sv-SE.json b/game/assets/jak1/subtitle/subtitle_lines_sv-SE.json index 1a1b5fe276..8de369c8a6 100644 --- a/game/assets/jak1/subtitle/subtitle_lines_sv-SE.json +++ b/game/assets/jak1/subtitle/subtitle_lines_sv-SE.json @@ -456,7 +456,7 @@ ], "geologist-reminder-moles": [ "HAR NI LYCKATS FÖSA DE BLIXTANDE MULLVADARNA TILLBAKA NER UNDER MARKEN?", - "NI BORDE SKYNDA PÅ – DIREKT SOLLJUS ÄR INTE RIKTIGT BRA FÖR DEM!" + "NI BORDE SKYNDA PÅ - DIREKT SOLLJUS ÄR INTE RIKTIGT BRA FÖR DEM!" ], "geologist-reminder-money": [ "JAG BEHÖVER DE DÄR KULORNA OM JAG SKA KUNNA FORSÄTTA MED MIN FORSKNING!" @@ -798,7 +798,7 @@ "ELLER SKA DU KUNNA HJÄLPA MIG UT UR DEN HÄR RÖRAN!?", "JAG KOMMER FORTSÄTTA BABBLA! FÖR ENLIGT MIN PROFESSIONELLA ÅSIKT", "ÄNDRINGEN ÄR EN FÖRBÄTTRING.", - "OCH DESSUTOM… JAK SKULLE INTE KUNNA HJÄLPA DIG ÄVEN OM JAG KUNDE DET.", + "OCH DESSUTOM... JAK SKULLE INTE KUNNA HJÄLPA DIG ÄVEN OM JAG KUNDE DET.", "VA!?", "DET FINNS BARA EN PERSON SOM HAR STUDERAT MÖRK ECO TILLRÄCKLIGT LÄNGE", "FÖR ATT KUNNA HA EN CHANS ATT ÄNDRA DIG TILLBAKA TILL DIN FÖRRA FORM:", @@ -2474,7 +2474,7 @@ "VI SKULLE KANSKE KUNNA DUCKA FRÅN FLADDERMÖSSEN!" ], "sksp0157": [ - "SÅG DU DET DÄR`? BARA TRE ANKARE KVAR!" + "SÅG DU DET DÄR? BARA TRE ANKARE KVAR!" ], "sksp0158": [ "WOW! BARA TVÅ ANKARE TILL.." diff --git a/game/assets/jak1/text/game_base_text_hu-HU.json b/game/assets/jak1/text/game_base_text_hu-HU.json index ebfdeca441..9930ee8a02 100644 --- a/game/assets/jak1/text/game_base_text_hu-HU.json +++ b/game/assets/jak1/text/game_base_text_hu-HU.json @@ -34,11 +34,11 @@ "012d": "50HZ", "012e": "JAK AND DAXTER", "012f": "TALÁLD MEG A REJTETT ENERGIACELLÁT", - "0130": "NINCS ELEGENDŐ HELY A MEMORY_CARD_SLOT_~D-BE ILLESZTETT MEMORY_CARD_(PS2)-N", - "0131": "NINCS MEMORY_CARD_(PS2) A MEMORY_CARD_SLOT_~D-BEN", - "0132": "MEMORY_CARD_(PS2) A MEMORY_CARD_SLOT_~D-BEN NINCS MEGFORMÁZVA", + "0130": "NINCS ELEGENDŐ HELY A MEMORY CARD SLOT ~D-BE ILLESZTETT MEMORY CARD (PS2)-N", + "0131": "NINCS MEMORY CARD (PS2) A MEMORY CARD SLOT ~D-BEN", + "0132": "MEMORY CARD (PS2) A MEMORY CARD SLOT ~D-BEN NINCS MEGFORMÁZVA", "0133": "JAK AND DAXTER ~DKB SZABAD HELYET IGÉNYEL", - "0134": "HELYEZZ BE EGY ELEGENDŐ SZABAD HELLYEL RENDELKEZŐ MEMORY_CARD_(PS2)-T, VAGY NEM FOGOD TUDNI ELMENTENI A JÁTÉKOT", + "0134": "HELYEZZ BE EGY ELEGENDŐ SZABAD HELLYEL RENDELKEZŐ MEMORY CARD (PS2)-T, VAGY NEM FOGOD TUDNI ELMENTENI A JÁTÉKOT", "0135": "HA NEM FORMÁZOD MEG, AKKOR NEM FOGOD TUDNI ELMENTENI A JÁTÉKOT", "0136": "ADATOK MENTÉSE", "0137": "ADATOK BETÖLTÉSE", @@ -51,7 +51,7 @@ "0140": "VÁLASSZ FÁJLT A MENTÉSHEZ", "0141": "VÁLASSZ FÁJLT A BETÖLTÉSHEZ", "0142": "EGY JAK AND DAXTER MENTÉS MÁR LÉTEZIK AZ ÁLTALAD KIVÁLASZTOTT FÁJLON", - "0143": "HELYEZZ BE EGY MEMORY_CARD_(PS2)-T A MEMORY_CARD_SLOT_~D-BE AMI TARTALMAZ JAK AND DAXTER MENTÉST", + "0143": "HELYEZZ BE EGY MEMORY CARD (PS2)-T A MEMORY CARD SLOT ~D-BE AMI TARTALMAZ JAK AND DAXTER MENTÉST", "0144": "FOLYTATOD?", "014b": "JÁTÉK BETÖLTÉSE", "014c": "JÁTÉK MENTÉSE", @@ -63,19 +63,19 @@ "0152": "HIBA A MENTÉS KÖZBEN", "0153": "HIBA A FORMÁZÁS KÖZBEN", "0154": "HIBA A MENTÉSFÁJL LÉTREHOZÁSA KÖZBEN", - "0156": "MEMORY_CARD_(PS2) A MEMORY_CARD_SLOT_~D-BEN EL LETT TÁVOLÍTVA", + "0156": "MEMORY CARD (PS2) A MEMORY CARD SLOT ~D-BEN EL LETT TÁVOLÍTVA", "0157": "AZ AUTOMATIKUS MENTÉS KI LETT KAPCSOLVA", "0158": "HASZNÁLD A JÁTÉK MENTÉSE OPCIÓT HOGY MANUÁLISAN ELMENTSD A JÁTÉKOT ÉS VISSZAKAPCSOLD AZ AUTOMATIKUS MENTÉST", - "0159": "NINCS JAK AND DAXTER JÁTÉK ADAT A MEMORY_CARD_SLOT_~D-BEN LÉVŐ MEMORY_CARD_(PS2)-N", + "0159": "NINCS JAK AND DAXTER JÁTÉK ADAT A MEMORY CARD SLOT ~D-BEN LÉVŐ MEMORY CARD (PS2)-N", "015a": "SZERETNÉL LÉTREHOZNI EGY ÚJ JAK AND DAXTER MENTÉSFÁJLT?", - "015b": "ELLENŐRIZD A MEMORY_CARD_SLOT_~D-BEN LÉVŐ MEMORY_CARD_(PS2)-T", + "015b": "ELLENŐRIZD A MEMORY CARD SLOT ~D-BEN LÉVŐ MEMORY CARD (PS2)-T", "015c": "ÚJ JÁTÉK", "015d": "VISSZAMÉSZ?", "015e": "OKÉ", "015f": "KILÉPÉS A DEMÓBÓL", "0160": "AMIKOR A KÖVETKEZŐ IKON MEGJELENIK, AKKOR A HALADÁSOD MENTÉS ALATT VAN", "0162": "FELADAT TELJESÍTVE", - "0163": "ELLENŐRIZD A MEMORY_CARD_SLOT_~D-BEN LÉVŐ MEMORY_CARD_(PS2)-T ÉS PRÓBÁLD ÚJRA", + "0163": "ELLENŐRIZD A MEMORY CARD SLOT ~D-BEN LÉVŐ MEMORY CARD (PS2)-T ÉS PRÓBÁLD ÚJRA", "0164": "A KÉPERNYŐ MOST 60HZ-RE FOG VÁLTANI", "0165": "HA NEM MEGFELELŐEN JELENIK MEG A KÉP, AKKOR A KIJELZŐD NEM TÁMOGATJA EZT A MÓDOT", "0166": "HA ÍGY VAN, AKKOR VÁRJ 10 MÁSODPERCET ÉS A KÉP VISSZA FOG VÁLTANI", diff --git a/game/assets/jak1/text/game_custom_text_da-DK.json b/game/assets/jak1/text/game_custom_text_da-DK.json index b4e8f52b01..8845bc8ef1 100644 --- a/game/assets/jak1/text/game_custom_text_da-DK.json +++ b/game/assets/jak1/text/game_custom_text_da-DK.json @@ -103,7 +103,7 @@ "103c": "4X3 (PS2)", "103d": "16X9 (PS2)", "103e": "~DX~D", - "103f": "TRYK på FOR AT SLÅ UNDERTEKTER TIL ELLER FRA", + "103f": "TRYK PÅ FOR AT SLÅ UNDERTEKTER TIL ELLER FRA", "107a": "UNDERTEKSTSPROG", "107b": "VIS TALER MED UNDERTEKSTER", "107c": "ALTID", diff --git a/game/assets/jak1/text/game_custom_text_hu-HU.json b/game/assets/jak1/text/game_custom_text_hu-HU.json index 722003cf81..f068bc7f42 100644 --- a/game/assets/jak1/text/game_custom_text_hu-HU.json +++ b/game/assets/jak1/text/game_custom_text_hu-HU.json @@ -80,7 +80,7 @@ "1105": "A HARC VÉGE", "1106": "VÁLTOZAT 1", "1107": "VÁLTOZAT 2", - "1110": "ENGLISH (UK},", + "1110": "ENGLISH (UK)", "1111": "PORTUGUÊS", "1112": "PORTUGUÊS (BRASIL)", "1113": "SUOMI", diff --git a/game/assets/jak1/text/game_custom_text_ja-JP.json b/game/assets/jak1/text/game_custom_text_ja-JP.json index a71dbac08d..834370be04 100644 --- a/game/assets/jak1/text/game_custom_text_ja-JP.json +++ b/game/assets/jak1/text/game_custom_text_ja-JP.json @@ -16,7 +16,7 @@ "1024": "ミュージック ゲイン", "1025": "アクター カリング", "1026": "バックグラウンド カリング", - "1027": "環境マッピングを強制する", + "1027": "??マッピングを??する", "1030": "DISCORDリッチプレゼンス", "1031": "ディスプレー モード", "1032": "ウィンドウ", @@ -25,11 +25,11 @@ "1035": "ゲーム クォリティ", "1036": "~D X ~D", "1037": "PS2 アスペクト レーショ", - "1038": "PS2アスペクト比が有効になると、4×3と16×9しかのアスペクト比が選択できません。続行しますか?", + "1038": "PS2アスペクト?が??になると、4x3と16x9しかのアスペクト?が??できません??行しますか?", "1039": "アスペクト レーショ (PS2)", "1040": "サブタイトル オン", "1041": "サブタイトル オフ", - "1042": "テキストの言語", + "1042": "テキストの??", "1043": "ディスプレー", "1044": "ディスプレー ~D", "1050": "MSAA", @@ -158,7 +158,7 @@ "10f2": "END OF THE PASS", "10f3": "ヤミノ洞くつへ", "10f4": "ガイセツ山へ", - "10f5": "宝石堀りたち", + "10f5": "宝石?りたち", "10f6": "ROBOT CAVE SCAFFOLDING", "10f7": "PRECURSOR ROBOT TOP", "10f8": "メイン洞くつ", diff --git a/game/assets/jak1/text/game_custom_text_no-NO.json b/game/assets/jak1/text/game_custom_text_no-NO.json index 8474983f3f..2709a45d11 100644 --- a/game/assets/jak1/text/game_custom_text_no-NO.json +++ b/game/assets/jak1/text/game_custom_text_no-NO.json @@ -15,7 +15,7 @@ "1023": "MUSIKK TONE-UT", "1024": "MUSIKK TONE-INN", "1025": "SKUESPILLINGUTSLUTNING", - "1026": "\nBAKGRUNNSUTSLUTNING", + "1026": "BAKGRUNNSUTSLUTNING", "1027": "FORCE MILJØKARTLEGGING", "1030": "DISCORD RICH-PRESENCE", "1031": "SKJERMINNSTILLINGER", diff --git a/game/assets/jak1/text/game_custom_text_sv-SE.json b/game/assets/jak1/text/game_custom_text_sv-SE.json index 0bcb4a4af5..058b7f9e58 100644 --- a/game/assets/jak1/text/game_custom_text_sv-SE.json +++ b/game/assets/jak1/text/game_custom_text_sv-SE.json @@ -68,7 +68,7 @@ "1093": "OÄNDLIG GUL ECO", "1094": "BLÅ DAXTER", "1095": "ODÖDLIGHET", - "1096": "ALLA MUSIK LÅTAR\n", + "1096": "ALLA MUSIK LÅTAR", "1097": "ANVÄND SYSTEM TID", "1098": "NÅ 100% KLART", "1099": "KLARA SPELET", @@ -124,13 +124,13 @@ "10d0": "STANDARD", "10d1": "OANVÄND", "10d2": "GRÖN VIS MAN", - "10d3": "GRÖN VIS MAN’S HYDDA", + "10d3": "GRÖN VIS MAN'S HYDDA", "10d4": "FÅGELSKÅDARE", "10d5": "BONDE", "10d6": "KEIRA", "10d7": "BORGMÄSTARE", "10d8": "SKULPTÖR", - "10d9": "JAK’S FARBROR", + "10d9": "JAK'S FARBROR", "10da": "HAMN", "10db": "UTGÅNG TILL FÖRBJUDNA TEMPLET", "10dc": "LURKER MASKIN", @@ -153,7 +153,7 @@ "10ed": "OANVÄND 1", "10ee": "AVFYRNINGSFÄLLOR", "10ef": "OANVÄND 2", - "10f0": "DÖD MAN’S DAL", + "10f0": "DÖD MAN'S DAL", "10f1": "MITTEN AV PASSET", "10f2": "SLUET AV PASSET", "10f3": "TILL SPINDEL GROTTAN", diff --git a/scripts/ci/lint-characters.py b/scripts/ci/lint-characters.py new file mode 100644 index 0000000000..ae886c25e5 --- /dev/null +++ b/scripts/ci/lint-characters.py @@ -0,0 +1,109 @@ +import glob +import json +import re + +# TODO - add a way to make this auto replace bad characters with `?` + +# fmt: off +JAK1_ALLOWED_CHARACTERS = [ + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", + "'", "!", "(", ")", "+", "-", ",", ".", "/", ":", "=", "<", ">", "*", "%", "?", "\"", + "`", "ˇ", "¨", "º", "¡", "¿", "Æ", "Ç", "ß", "™", "、", " ", "Å", "Ø", + "Ñ", "Ã", "Õ", "Á", "É", "Í", "Ó", "Ú", "Ő", "Ű", "Â", "Ê", "Î", "Ô", "Û", "À", "È", "Ì", "Ò", "Ù", "Ä", "Ë", "Ï", "Ö", "ö", "Ü", + "海", "界", "学", "ワ", "ヲ", "ン", "岩", "旧", "空", "ヮ", "撃", "賢", "湖", "口", "行", "合", "士", "寺", "山", "者", "所", "書", "小", "沼", "上", "城", "場", "出", "闇", "遺", "黄", "屋", "下", "家", "火", "花", "レ", "Œ", "ロ", "青", "・", "゛", "゜", "ー", "『", "』", "宝", "石", "赤", "跡", "川", "戦", "村", "隊", "台", "長", "鳥", "艇", "洞", "道", "発", "飛", "噴", "池", "中", "塔", "島", "部", "砲", "産", "眷", "力", "緑", "岸", "像", "谷", "心", "森", "水", "船", "世", + "ぁ", "あ", "ぃ", "い", "ぅ", "う", "ぇ", "え", "ぉ", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "っ", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "ゃ", "や", "ゅ", "ゆ", "ょ", "よ", "ら", "り", "る", "れ", "ろ", "ゎ", "わ", "を", "ん", + "が", "ぎ", "ぐ", "げ", "ご", "ざ", "じ", "ず", "ぜ", "ぞ", "だ", "ぢ", "づ", "で", "ど", "ば", "び", "ぶ", "べ", "ぼ", + "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", + "ァ", "ア", "ィ", "イ", "ゥ", "ウ", "ェ", "エ", "ォ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ッ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ャ", "ヤ", "ュ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル", + "ヴ", "ガ", "ギ", "グ", "ゲ", "ゴ", "ザ", "ジ", "ズ", "ゼ", "ゾ", "ダ", "ヂ", "ヅ", "デ", "ド", "バ", "ビ", "ブ", "ベ", "ボ", + "パ", "ピ", "プ", "ペ", "ポ", + "~" +] + +JAK1_ALLOWED_CODES = [ + "", + "", "", "", "" +] +# fmt: on + +invalid_characters_found = False + + +def is_allowed_code(pos, text): + # Find any occurences of allowed codes in the string + # if the position overlaps with these occurrences, it's allowed + for code in JAK1_ALLOWED_CODES: + for match in re.finditer(code, text): + if pos >= match.start() and pos <= match.end(): + return match.end() + return -1 + + +def char_allowed(char): + return char in JAK1_ALLOWED_CHARACTERS + + +def lint_jak1_characters(text): + invalid_characters_found = False + pos = 0 + while pos < len(text): + character = text[pos] + if not char_allowed(character): + # Check to see if it's an allowed code + code_end_pos = is_allowed_code(pos, text) + if code_end_pos == -1: + print( + "Character '{}' not allowed - Found in {}".format(character, text) + ) + invalid_characters_found = True + pos = pos + 1 + else: + # advance to the end of the code and continue checking + pos = code_end_pos + else: + pos = pos + 1 + return invalid_characters_found + + +# Iterate through the translations making sure there are no characters that are not allowed +text_files = glob.glob("./game/assets/jak1/text/*.json") + +for text_file in text_files: + print("Checking {}...".format(text_file)) + with open(text_file, encoding="utf-8") as f: + file_data = json.load(f) + for id, text in file_data.items(): + invalid_chars_exist = lint_jak1_characters(text) + if invalid_chars_exist: + invalid_characters_found = True + +subtitle_files = glob.glob("./game/assets/jak1/subtitle/*lines*.json") + +for subtitle_file in subtitle_files: + print("Checking {}...".format(subtitle_file)) + with open(subtitle_file, encoding="utf-8") as f: + file_data = json.load(f) + # Check Speakers + for id, text in file_data["speakers"].items(): + invalid_chars_exist = lint_jak1_characters(text) + if invalid_chars_exist: + invalid_characters_found = True + # Check Lines + for id, lines in file_data["cutscenes"].items(): + for line in lines: + invalid_chars_exist = lint_jak1_characters(line) + if invalid_chars_exist: + invalid_characters_found = True + for id, lines in file_data["hints"].items(): + for line in lines: + invalid_chars_exist = lint_jak1_characters(line) + if invalid_chars_exist: + invalid_characters_found = True + +if invalid_characters_found: + print("Invalid characters were found, see above") + exit(1) +else: + print("No invalid characters found!")