From 31286e1c95bda1ea804fc112f4f5da81aaaba656 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 2 Nov 2023 21:50:10 -0700 Subject: [PATCH] Re-run `scripts/update_ambiguous_characters.py` (#8459) These weren't formatted consistently, and when I re-ran, the formatting changed a bit, so I'm editing the script to keep that file constant. --- scripts/update_ambiguous_characters.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/update_ambiguous_characters.py b/scripts/update_ambiguous_characters.py index 0d0c20543f..55604a94b0 100644 --- a/scripts/update_ambiguous_characters.py +++ b/scripts/update_ambiguous_characters.py @@ -36,6 +36,19 @@ def get_mapping_data() -> dict: return json.loads(json.loads(content)) +def format_number(number: int) -> str: + """Underscore-separate the digits of a number.""" + # For unknown historical reasons, numbers greater than 100,000 were + # underscore-delimited in the generated file, so we now preserve that property to + # avoid unnecessary churn. + if number > 100000: + number = str(number) + number = "_".join(number[i : i + 3] for i in range(0, len(number), 3)) + return f"{number}_u32" + + return f"{number}u32" + + def format_confusables_rs(raw_data: dict[str, list[int]]) -> str: """Format the downloaded data into a Rust source file.""" # The input data contains duplicate entries @@ -45,7 +58,10 @@ def format_confusables_rs(raw_data: dict[str, list[int]]) -> str: for i in range(0, len(items), 2): flattened_items.add((items[i], items[i + 1])) - tuples = [f" {left}u32 => {right},\n" for left, right in sorted(flattened_items)] + tuples = [ + f" {format_number(left)} => {right},\n" + for left, right in sorted(flattened_items) + ] print(f"{len(tuples)} confusable tuples.")