formatter: add tree-sitter dependency and commit early draft work on a proper code formatter (#2536)

2023-04-24 22:46:55 -05:00 · 2023-04-24 22:46:55 -05:00 · 0ffb912a04
parent 83f43b7153
commit 0ffb912a04
385 changed files with 71427 additions and 28 deletions
--- a/.vs/launch.vs.json
+++ b/.vs/launch.vs.json
@ -242,6 +242,20 @@
      "projectTarget" : "type_searcher.exe (bin\\type_searcher.exe)",
      "name" : "Tools - Type Searcher",
      "args" : ["--game", "jak2", "--output-path", "./search-results.json", "--size", 255, "--fields", "[{\\\"type\\\":\\\"quaternion\\\",\\\"offset\\\":48}]"]
+    },
+    {
+      "type" : "default",
+      "project" : "CMakeLists.txt",
+      "projectTarget" : "formatter.exe (bin\\formatter.exe)",
+      "name" : "Tools - Formatter",
+      "args" : ["--new", "--file", "C:\\Users\\xtvas\\Repos\\opengoal\\jak-project\\test-formatter.gc"]
+    },
+    {
+      "type": "default",
+      "project": "CMakeLists.txt",
+      "projectTarget": "goalc-test.exe (bin\\goalc-test.exe)",
+      "name": "Tests - Formatter",
+      "args": ["--gtest_brief=0", "--gtest_filter=\"*FormatterTests*\""]
    }
  ]
 }
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -182,6 +182,11 @@ include_directories(third-party/SQLiteCpp/include)
 add_subdirectory(third-party/SQLiteCpp)
 string(REPLACE " ${THIRDPARTY_IGNORED_WARNINGS} " "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

+# build tree-sitter parser
+include_directories(third-party/tree-sitter/tree-sitter/lib/include)
+include_directories(third-party/tree-sitter/tree-sitter-opengoal/include)
+add_subdirectory(third-party/tree-sitter EXCLUDE_FROM_ALL)
+
 # build common library
 add_subdirectory(common)

--- a/Taskfile.yml
+++ b/Taskfile.yml
@ -75,6 +75,7 @@ tasks:
  format:
    desc: "Format code"
    cmds:
+      - cmd: python ./scripts/cpp/format-includes.py
      - cmd: python ./third-party/run-clang-format/run-clang-format.py -r common decompiler game goalc test tools lsp -i
      # npm install -g prettier
      - cmd: npx prettier --write ./decompiler/config/jak1/**/*.jsonc
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@ -22,32 +22,34 @@ endfunction()
 write_revision_h()

 add_library(common
-        versions/versions.cpp
        audio/audio_formats.cpp
        cross_os_debug/xdbg.cpp
        cross_sockets/XSocket.cpp
-        cross_sockets/XSocketServer.cpp
        cross_sockets/XSocketClient.cpp
+        cross_sockets/XSocketServer.cpp
        custom_data/pack_helpers.cpp
        custom_data/TFrag3Data.cpp
-        dma/dma.cpp
        dma/dma_copy.cpp
+        dma/dma.cpp
        dma/gs.cpp
+        formatter/formatter.cpp
        global_profiler/GlobalProfiler.cpp
        goos/Interpreter.cpp
        goos/Object.cpp
        goos/ParseHelpers.cpp
-        goos/Printer.cpp
        goos/PrettyPrinter.cpp
        goos/PrettyPrinter2.cpp
+        goos/Printer.cpp
        goos/Reader.cpp
        goos/TextDB.cpp
-        repl/config.cpp
-        repl/util.cpp
        log/log.cpp
        math/geometry.cpp
+        repl/config.cpp
        repl/nrepl/ReplClient.cpp
        repl/nrepl/ReplServer.cpp
+        repl/util.cpp
+        serialization/subtitles/subtitles_deser.cpp
+        serialization/subtitles/subtitles_ser.cpp
        type_system/defenum.cpp
        type_system/deftype.cpp
        type_system/state.cpp
@ -55,8 +57,6 @@ add_library(common
        type_system/TypeFieldLookup.cpp
        type_system/TypeSpec.cpp
        type_system/TypeSystem.cpp
-        serialization/subtitles/subtitles_ser.cpp
-        serialization/subtitles/subtitles_deser.cpp
        util/Assert.cpp
        util/BitUtils.cpp
        util/compress.cpp
@ -67,18 +67,20 @@ add_library(common
        util/diff.cpp
        util/FileUtil.cpp
        util/FontUtils.cpp
+        util/FrameLimiter.cpp
        util/json_util.cpp
+        util/os.cpp
+        util/print_float.cpp
        util/read_iso_file.cpp
        util/SimpleThreadGroup.cpp
        util/string_util.cpp
+        util/term_util.cpp
        util/Timer.cpp
-        util/os.cpp
-        util/print_float.cpp
-        util/FrameLimiter.cpp
        util/unicode_util.cpp
-        util/term_util.cpp  )
+        versions/versions.cpp
+        )

-target_link_libraries(common fmt lzokay replxx libzstd_static)
+target_link_libraries(common fmt lzokay replxx libzstd_static tree-sitter)

 if(WIN32)
    target_link_libraries(common wsock32 ws2_32 windowsapp)
--- a/common/formatter/formatter.cpp
+++ b/common/formatter/formatter.cpp
@ -0,0 +1,161 @@
+#include "formatter.h"
+
+#include "common/util/FileUtil.h"
+#include "common/util/string_util.h"
+
+#include "tree_sitter/api.h"
+
+#include "third-party/fmt/core.h"
+
+// Declare the `tree_sitter_opengoal` function, which is
+// implemented by the `tree-sitter-opengoal` library.
+extern "C" {
+extern const TSLanguage* tree_sitter_opengoal();
+}
+
+void walk_tree(TSTreeCursor* cursor, std::string& output, const std::string& source_code) {
+  // an imperative breadth-first-search
+  while (true) {
+    // Process the node
+    const auto curr_node = ts_tree_cursor_current_node(cursor);
+    const std::string curr_node_type = ts_node_type(curr_node);
+    std::string curr_node_field_name;
+    if (ts_tree_cursor_current_field_name(cursor)) {
+      curr_node_field_name = ts_tree_cursor_current_field_name(cursor);
+    }
+    if (curr_node_field_name == "open") {
+      output += "(";
+    } else if (curr_node_field_name == "close") {
+      output.pop_back();
+      output += ") ";
+    }
+    if (curr_node_type == "sym_name" || curr_node_type == "num_lit" ||
+        curr_node_type == "str_lit") {
+      uint32_t start = ts_node_start_byte(curr_node);
+      uint32_t end = ts_node_end_byte(curr_node);
+      const char* type = ts_node_type(curr_node);
+      // TODO - if it's a string literal, take out any newlines and reflow the string to the
+      // line-length
+      const auto contents = source_code.substr(start, end - start);
+      output += contents + " ";
+    }
+
+    if (ts_tree_cursor_goto_first_child(cursor)) {
+      continue;
+    }
+
+    if (ts_tree_cursor_goto_next_sibling(cursor)) {
+      continue;
+    }
+
+    while (true) {
+      if (!ts_tree_cursor_goto_parent(cursor)) {
+        if (output.at(output.length() - 1) == ' ') {
+          output.pop_back();
+        }
+        return;
+      }
+      if (ts_tree_cursor_goto_next_sibling(cursor)) {
+        break;
+      }
+    }
+  }
+}
+
+// TODO - move this to str_util
+std::string repeat(size_t n, const std::string& str) {
+  if (n == 0 || str.empty())
+    return {};
+  if (n == 1)
+    return str;
+  const auto period = str.size();
+  if (period == 1)
+    return std::string(n, str.front());
+
+  std::string ret(str);
+  ret.reserve(period * n);
+  std::size_t m{2};
+  for (; m < n; m *= 2)
+    ret += ret;
+  ret.append(ret.c_str(), (n - (m / 2)) * period);
+  return ret;
+}
+
+// It's possible to walk a tree-sitter tree imperatively with a cursor
+// but the code for that is more verbose and less intuitive and I'm not sure how much
+// of a benefit I'd get out of it since for formatting i basically have to convert every
+// cursor to it's fat node
+//
+// But in any case, do it the easy way first and refactor later
+void format_code(const std::string& source,
+                 TSNode curr_node,
+                 std::string& output,
+                 std::string curr_form_head = "",
+                 int indent = 0) {
+  if (ts_node_child_count(curr_node) == 0) {
+    uint32_t start = ts_node_start_byte(curr_node);
+    uint32_t end = ts_node_end_byte(curr_node);
+    // TODO - if it's a string literal, take out any newlines and reflow the string to the
+    // line-length
+    const auto contents = source.substr(start, end - start);
+    if (contents == ")") {
+      output.pop_back();
+      output += ") ";
+    } else if (contents == "(") {
+      output += "(";
+    } else {
+      output += contents + " ";
+    }
+    return;
+  }
+  const std::string curr_node_type = ts_node_type(curr_node);
+  for (int i = 0; i < ts_node_child_count(curr_node); i++) {
+    auto child_node = ts_node_child(curr_node, i);
+    // If we are opening a list, peek at the first element in the list
+    // this is so we can properly handle indentation based on different forms
+    if (curr_node_type == "list_lit" && i == 1) {
+      uint32_t start = ts_node_start_byte(child_node);
+      uint32_t end = ts_node_end_byte(child_node);
+      // TODO - if it's a string literal, take out any newlines and reflow the string to the
+      // line-length
+      curr_form_head = source.substr(start, end - start);
+    }
+    std::string curr_node_field_name;
+    auto curr_field_name_raw = ts_node_field_name_for_child(
+        curr_node, i);  // TODO - why is this always returning `close` for the opening paren..
+    if (curr_field_name_raw) {
+      curr_node_field_name = curr_field_name_raw;
+    }
+    if (curr_form_head == "defun" && i == 4) {
+      indent += 2;
+      output += "\n" + repeat(indent, " ");
+    } else if (curr_form_head == "defun" && i == 5) {
+      output += "\n" + repeat(indent, " ");
+    }
+    format_code(source, child_node, output, curr_form_head, indent);
+    if (curr_node_type == "source") {
+      output += "\n\n";
+    }
+  }
+}
+
+std::string formatter::format_code(const std::string& source) {
+  // Create a parser.
+  std::shared_ptr<TSParser> parser(ts_parser_new(), TreeSitterParserDeleter());
+
+  // Set the parser's language (JSON in this case).
+  ts_parser_set_language(parser.get(), tree_sitter_opengoal());
+
+  // Build a syntax tree based on source code stored in a string.
+  std::shared_ptr<TSTree> tree(
+      ts_parser_parse_string(parser.get(), NULL, source.c_str(), source.length()),
+      TreeSitterTreeDeleter());
+
+  // Get the root node of the syntax tree.
+  TSNode root_node = ts_tree_root_node(tree.get());
+
+  std::string output = "";
+  format_code(source, root_node, output, "", 0);
+
+  return str_util::trim(output);
+}
--- a/common/formatter/formatter.h
+++ b/common/formatter/formatter.h
@ -0,0 +1,17 @@
+#pragma once
+
+#include <string>
+
+#include "tree_sitter/api.h"
+
+namespace formatter {
+struct TreeSitterParserDeleter {
+  void operator()(TSParser* ptr) const { ts_parser_delete(ptr); }
+};
+
+struct TreeSitterTreeDeleter {
+  void operator()(TSTree* ptr) const { ts_tree_delete(ptr); }
+};
+
+std::string format_code(const std::string& source);
+}  // namespace formatter
--- a/common/util/string_util.cpp
+++ b/common/util/string_util.cpp
@ -93,4 +93,12 @@ std::vector<std::string> regex_get_capture_groups(const std::string& str,
  }
  return groups;
 }
+
+bool replace(std::string& str, const std::string& from, const std::string& to) {
+  size_t start_pos = str.find(from);
+  if (start_pos == std::string::npos)
+    return false;
+  str.replace(start_pos, from.length(), to);
+  return true;
+}
 }  // namespace str_util
--- a/common/util/string_util.h
+++ b/common/util/string_util.h
@ -20,4 +20,5 @@ std::string diff(const std::string& lhs, const std::string& rhs);
 std::vector<std::string> split(const ::std::string& str, char delimiter = '\n');
 std::string join(const std::vector<std::string>& strs, const std::string& join_with);
 std::vector<std::string> regex_get_capture_groups(const std::string& str, const std::string& regex);
+bool replace(std::string& str, const std::string& from, const std::string& to);
 }  // namespace str_util
--- a/common/versions/versions.cpp
+++ b/common/versions/versions.cpp
@ -47,11 +47,11 @@ std::vector<std::string> valid_game_version_names() {
 }

 std::string build_revision() {
-  if (BUILT_TAG != "") {
-    return BUILT_TAG;
+  if (std::string(BUILT_TAG) != "") {
+    return std::string(BUILT_TAG);
  }
-  if (BUILT_SHA != "") {
-    return BUILT_SHA;
+  if (std::string(BUILT_SHA) != "") {
+    return std::string(BUILT_SHA);
  }
  return "Unknown Revision";
 }
--- a/decompiler/analysis/analyze_inspect_method.h
+++ b/decompiler/analysis/analyze_inspect_method.h
@ -3,8 +3,8 @@
 #include <string>
 #include <unordered_map>

-#include <decompiler/ObjectFile/ObjectFileDB.h>
 #include "decompiler/Function/Function.h"
+#include "decompiler/ObjectFile/ObjectFileDB.h"
 #include "decompiler/util/DecompilerTypeSystem.h"

 namespace decompiler {
--- a/decompiler/config/jak2/ko/anonymous_function_types.jsonc
+++ b/decompiler/config/jak2/ko/anonymous_function_types.jsonc
@ -1,5 +1,3 @@
 {
-  "hud": [
-    [14, "(function object :behavior hud)"]
-  ]
-}
+  "hud": [[14, "(function object :behavior hud)"]]
+}
--- a/game/main.cpp
+++ b/game/main.cpp
@ -9,6 +9,7 @@

 #include "runtime.h"

+#include "common/global_profiler/GlobalProfiler.h"
 #include "common/log/log.h"
 #include "common/util/FileUtil.h"
 #include "common/util/os.h"
--- a/game/sound/989snd/loader.cpp
+++ b/game/sound/989snd/loader.cpp
@ -11,7 +11,8 @@
 #include "common/log/log.h"

 #include "sfxblock2.h"
-#include <third-party/fmt/core.h>
+
+#include "third-party/fmt/core.h"

 namespace snd {
 enum chunk : u32 { bank, samples, midi };
--- a/game/sound/989snd/midi_handler.cpp
+++ b/game/sound/989snd/midi_handler.cpp
@ -7,7 +7,8 @@
 #include "common/log/log.h"

 #include "game/sound/989snd/util.h"
-#include <third-party/fmt/core.h>
+
+#include "third-party/fmt/core.h"

 namespace snd {
 /*
--- a/game/sound/989snd/player.cpp
+++ b/game/sound/989snd/player.cpp
@ -4,7 +4,7 @@

 #include <fstream>

-#include <third-party/fmt/core.h>
+#include "third-party/fmt/core.h"

 #ifdef _WIN32
 #include <combaseapi.h>
--- a/lsp/CMakeLists.txt
+++ b/lsp/CMakeLists.txt
@ -11,5 +11,5 @@ add_executable(lsp
  protocol/hover.cpp
  state/data/mips_instruction.cpp)

-target_link_libraries(lsp common decomp)
+target_link_libraries(lsp common decomp tree-sitter)

--- a/scripts/cpp/format-includes.py
+++ b/scripts/cpp/format-includes.py
@ -0,0 +1,34 @@
+# Visual Studio is dumb and doesn't let you customize the automatic include formats
+# so i'll do it myself.
+
+import glob
+import re
+
+folders_to_check = ["common", "decompiler", "game", "goalc", "test", "tools", "lsp"]
+
+for folder in folders_to_check:
+  files_to_check = glob.glob("./{}/**/*.cpp".format(folder), recursive=True)
+  files_to_check += glob.glob("./{}/**/*.h".format(folder), recursive=True)
+  for filename in files_to_check:
+    # Get the file contents
+    with open(filename, "r", encoding="utf-8") as f:
+      lines = f.readlines()
+      new_lines = []
+      need_to_write = False
+      for i, line in enumerate(lines):
+        include_match = re.search(r"#include <(.*)>", line)
+        if include_match:
+          include = include_match.groups()[0]
+          if include.startswith("sys/") or include.startswith("netinet/") or include.startswith("arpa/"):
+            new_lines.append(line)
+          elif "/" in include:
+            new_lines.append(line.replace("<", "\"").replace(">", "\""))
+            need_to_write = True
+          else:
+            new_lines.append(line)
+        else:
+          new_lines.append(line)
+      if need_to_write:
+        print("Fixing includes in {}".format(filename))
+        with open(filename, "w", encoding="utf-8") as f:
+          f.writelines(new_lines)
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -35,10 +35,12 @@ add_executable(goalc-test
        ${CMAKE_CURRENT_LIST_DIR}/decompiler/test_DisasmVifDecompile.cpp
        ${CMAKE_CURRENT_LIST_DIR}/decompiler/test_VuDisasm.cpp
        ${CMAKE_CURRENT_LIST_DIR}/game/test_newpad.cpp
+        ${CMAKE_CURRENT_LIST_DIR}/common/formatter/test_formatter.cpp
        ${GOALC_TEST_FRAMEWORK_SOURCES}
-        ${GOALC_TEST_CASES})
+        ${GOALC_TEST_CASES}
+        )

-target_link_libraries(goalc-test common runtime compiler gtest decomp Zydis libzstd_static)
+target_link_libraries(goalc-test common runtime compiler gtest decomp Zydis libzstd_static tree-sitter)

 if(WIN32)
  target_link_libraries(goalc-test mman)
--- a/test/common/formatter/corpus/functions.test
+++ b/test/common/formatter/corpus/functions.test
@ -0,0 +1,34 @@
+===
+Basic Function
+===
+
+(defun test-function ((hello string)) 
+"world hello" 
+ (+ 1 1))
+
+---
+
+(defun test-function ((hello string)) 
+  "world hello" 
+  (+ 1 1)) 
+
+===
+Two Functions
+===
+
+(defun test-function ((hello string)) 
+"world hello" 
+ (+ 1 1))
+(defun test-function ((hello string)) 
+"world hello" 
+ (+ 1 1))
+
+---
+
+(defun test-function ((hello string)) 
+  "world hello" 
+  (+ 1 1)) 
+
+(defun test-function ((hello string)) 
+  "world hello" 
+  (+ 1 1)) 
--- a/test/common/formatter/test_formatter.cpp
+++ b/test/common/formatter/test_formatter.cpp
@ -0,0 +1,105 @@
+// TODO - eventually replace our `goalc` tests with this setup
+// A simple test runner framework for debugging / iterating on the formatter
+// Tests are defined in files as such:
+
+/*
+===
+TEST NAME
+===
+
+INPUT
+
+---
+
+EXPECTED OUTPUT
+
+*/
+
+// Test files can contain multiple tests, upon running we will recurse a directory
+// looking for any `.test` files and run them through the framework
+//
+// Any differences will be diff'd and displayed
+
+#include "common/formatter/formatter.h"
+#include "common/util/FileUtil.h"
+#include "common/util/string_util.h"
+
+#include "gtest/gtest.h"
+
+#include "third-party/fmt/core.h"
+
+struct TestDefinition {
+  std::string name;
+  std::string input;
+  std::string output;
+};
+
+bool run_tests(fs::path file_path) {
+  // Read in the file, and run the test
+  const auto contents = str_util::split(file_util::read_text_file(file_path));
+  std::vector<TestDefinition> tests;
+  TestDefinition curr_test;
+  int i = 0;
+  while (i < contents.size()) {
+    const auto& line = contents.at(i);
+    if (line == "===") {
+      curr_test = TestDefinition();
+      curr_test.name = contents.at(i + 1);
+      i += 3;
+      continue;
+    }
+    // Parse the input and output
+    if (!curr_test.name.empty() && line.empty()) {
+      i++;
+      while (true) {
+        if (contents.at(i) == "---") {
+          i++;
+          curr_test.input = str_util::trim(curr_test.input);
+          break;
+        }
+        curr_test.input += contents.at(i) + "\n";
+        i++;
+      }
+      i++;
+      while (true) {
+        if (i == contents.size() || contents.at(i) == "===") {
+          curr_test.output = str_util::trim(curr_test.output);
+          tests.push_back(curr_test);
+          break;
+        }
+        curr_test.output += contents.at(i) + "\n";
+        i++;
+      }
+      continue;
+    }
+  }
+  // Run the tests, report successes and failures
+  bool test_failed = false;
+  fmt::print("{}:\n", file_util::base_name(file_path.string()));
+  for (const auto& test : tests) {
+    const auto formatted_result = formatter::format_code(test.input);
+    if (formatted_result != test.output) {
+      fmt::print("  ❌ - {}\n", test.name);
+      fmt::print("{}\n", str_util::diff(test.output, formatted_result));
+      test_failed = true;
+    } else {
+      fmt::print("  ✅ - {}\n", test.name);
+    }
+  }
+  return test_failed;
+}
+
+bool find_and_run_tests() {
+  // Enumerate test files
+  const auto test_files = file_util::find_files_recursively(
+      file_util::get_file_path({"test/common/formatter/corpus"}), std::regex("^.*\.test$"));
+  bool failed = false;
+  for (const auto& file : test_files) {
+    failed = run_tests(file);
+  }
+  return !failed;
+}
+
+TEST(Formatter, FormatterTests) {
+  EXPECT_TRUE(find_and_run_tests());
+}
--- a/third-party/tree-sitter/CMakeLists.txt
+++ b/third-party/tree-sitter/CMakeLists.txt
@ -0,0 +1,4 @@
+
+add_library(tree-sitter
+        tree-sitter/lib/src/lib.c
+        tree-sitter-opengoal/parser.c)
--- a/third-party/tree-sitter/tree-sitter-opengoal/grammar.json
+++ b/third-party/tree-sitter/tree-sitter-opengoal/grammar.json
@ -0,0 +1,669 @@
+{
+  "name": "opengoal",
+  "rules": {
+    "source": {
+      "type": "REPEAT",
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SYMBOL",
+            "name": "_form"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "_gap"
+          }
+        ]
+      }
+    },
+    "_gap": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "_ws"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "comment"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "comment_multiline"
+        }
+      ]
+    },
+    "_ws": {
+      "type": "TOKEN",
+      "content": {
+        "type": "REPEAT1",
+        "content": {
+          "type": "PATTERN",
+          "value": "[\\f\\n\\r\\t \\u000B\\u001C\\u001D\\u001E\\u001F\\u2028\\u2029\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008\\u2009\\u200a\\u205f\\u3000]"
+        }
+      }
+    },
+    "comment": {
+      "type": "TOKEN",
+      "content": {
+        "type": "PATTERN",
+        "value": "(;).*\\n?"
+      }
+    },
+    "comment_multiline": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "TOKEN",
+          "content": {
+            "type": "STRING",
+            "value": "#|"
+          }
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "CHOICE",
+            "members": [
+              {
+                "type": "PATTERN",
+                "value": "[^|#]+"
+              },
+              {
+                "type": "PATTERN",
+                "value": "#[^|]"
+              },
+              {
+                "type": "PATTERN",
+                "value": "[^#]\\|"
+              },
+              {
+                "type": "PATTERN",
+                "value": "[\\n\\r]+"
+              }
+            ]
+          }
+        },
+        {
+          "type": "TOKEN",
+          "content": {
+            "type": "STRING",
+            "value": "|#"
+          }
+        }
+      ]
+    },
+    "_form": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "num_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "kwd_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "str_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "char_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "null_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "bool_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "sym_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "list_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "quoting_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "quasi_quoting_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "unquote_splicing_lit"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "unquoting_lit"
+        }
+      ]
+    },
+    "num_lit": {
+      "type": "TOKEN",
+      "content": {
+        "type": "PREC",
+        "value": 10,
+        "content": {
+          "type": "SEQ",
+          "members": [
+            {
+              "type": "CHOICE",
+              "members": [
+                {
+                  "type": "PATTERN",
+                  "value": "[+-]"
+                },
+                {
+                  "type": "BLANK"
+                }
+              ]
+            },
+            {
+              "type": "CHOICE",
+              "members": [
+                {
+                  "type": "SEQ",
+                  "members": [
+                    {
+                      "type": "STRING",
+                      "value": "#x"
+                    },
+                    {
+                      "type": "REPEAT1",
+                      "content": {
+                        "type": "PATTERN",
+                        "value": "[0-9a-fA-F]"
+                      }
+                    }
+                  ]
+                },
+                {
+                  "type": "SEQ",
+                  "members": [
+                    {
+                      "type": "STRING",
+                      "value": "#b"
+                    },
+                    {
+                      "type": "REPEAT1",
+                      "content": {
+                        "type": "PATTERN",
+                        "value": "[0-1]"
+                      }
+                    }
+                  ]
+                },
+                {
+                  "type": "SEQ",
+                  "members": [
+                    {
+                      "type": "REPEAT1",
+                      "content": {
+                        "type": "PATTERN",
+                        "value": "[0-9]"
+                      }
+                    },
+                    {
+                      "type": "CHOICE",
+                      "members": [
+                        {
+                          "type": "SEQ",
+                          "members": [
+                            {
+                              "type": "STRING",
+                              "value": "."
+                            },
+                            {
+                              "type": "REPEAT",
+                              "content": {
+                                "type": "PATTERN",
+                                "value": "[0-9]"
+                              }
+                            }
+                          ]
+                        },
+                        {
+                          "type": "BLANK"
+                        }
+                      ]
+                    }
+                  ]
+                },
+                {
+                  "type": "SEQ",
+                  "members": [
+                    {
+                      "type": "REPEAT1",
+                      "content": {
+                        "type": "PATTERN",
+                        "value": "[0-9]"
+                      }
+                    }
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      }
+    },
+    "kwd_lit": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "_kwd_unqualified"
+        }
+      ]
+    },
+    "_kwd_unqualified": {
+      "type": "PREC",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {
+            "type": "FIELD",
+            "name": "marker",
+            "content": {
+              "type": "SYMBOL",
+              "name": "_kwd_marker"
+            }
+          },
+          {
+            "type": "FIELD",
+            "name": "name",
+            "content": {
+              "type": "ALIAS",
+              "content": {
+                "type": "TOKEN",
+                "content": {
+                  "type": "SEQ",
+                  "members": [
+                    {
+                      "type": "PATTERN",
+                      "value": "[^\\f\\n\\r\\t ()\\[\\]{}\"@~^;`\\\\,:/\\u000B\\u001C\\u001D\\u001E\\u001F\\u2028\\u2029\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008\\u2009\\u200a\\u205f\\u3000]"
+                    },
+                    {
+                      "type": "REPEAT",
+                      "content": {
+                        "type": "CHOICE",
+                        "members": [
+                          {
+                            "type": "PATTERN",
+                            "value": "[:']"
+                          },
+                          {
+                            "type": "PATTERN",
+                            "value": "[^\\f\\n\\r\\t ()\\[\\]{}\"@~^;`\\\\,:/\\u000B\\u001C\\u001D\\u001E\\u001F\\u2028\\u2029\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008\\u2009\\u200a\\u205f\\u3000]"
+                          }
+                        ]
+                      }
+                    }
+                  ]
+                }
+              },
+              "named": true,
+              "value": "kwd_name"
+            }
+          }
+        ]
+      }
+    },
+    "_kwd_marker": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "TOKEN",
+          "content": {
+            "type": "STRING",
+            "value": ":"
+          }
+        }
+      ]
+    },
+    "str_lit": {
+      "type": "TOKEN",
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {
+            "type": "STRING",
+            "value": "\""
+          },
+          {
+            "type": "REPEAT",
+            "content": {
+              "type": "PATTERN",
+              "value": "[^\"\\\\]"
+            }
+          },
+          {
+            "type": "REPEAT",
+            "content": {
+              "type": "SEQ",
+              "members": [
+                {
+                  "type": "STRING",
+                  "value": "\\"
+                },
+                {
+                  "type": "PATTERN",
+                  "value": "."
+                },
+                {
+                  "type": "REPEAT",
+                  "content": {
+                    "type": "PATTERN",
+                    "value": "[^\"\\\\]"
+                  }
+                }
+              ]
+            }
+          },
+          {
+            "type": "STRING",
+            "value": "\""
+          }
+        ]
+      }
+    },
+    "char_lit": {
+      "type": "TOKEN",
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {
+            "type": "STRING",
+            "value": "#\\"
+          },
+          {
+            "type": "CHOICE",
+            "members": [
+              {
+                "type": "PATTERN",
+                "value": ".|\\n"
+              },
+              {
+                "type": "STRING",
+                "value": "\\s"
+              },
+              {
+                "type": "STRING",
+                "value": "\\n"
+              },
+              {
+                "type": "STRING",
+                "value": "\\t"
+              }
+            ]
+          }
+        ]
+      }
+    },
+    "null_lit": {
+      "type": "TOKEN",
+      "content": {
+        "type": "STRING",
+        "value": "none"
+      }
+    },
+    "bool_lit": {
+      "type": "TOKEN",
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "STRING",
+            "value": "#f"
+          },
+          {
+            "type": "STRING",
+            "value": "#t"
+          }
+        ]
+      }
+    },
+    "sym_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "CHOICE",
+          "members": [
+            {
+              "type": "SYMBOL",
+              "name": "_sym_unqualified"
+            }
+          ]
+        }
+      ]
+    },
+    "_sym_unqualified": {
+      "type": "FIELD",
+      "name": "name",
+      "content": {
+        "type": "ALIAS",
+        "content": {
+          "type": "CHOICE",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "/"
+            },
+            {
+              "type": "TOKEN",
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {
+                    "type": "PATTERN",
+                    "value": "[^\\f\\n\\r\\t \\/()\\[\\]{}\"@~^;`\\\\,:#'0-9\\u000B\\u001C\\u001D\\u001E\\u001F\\u2028\\u2029\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008\\u2009\\u200a\\u205f\\u3000]"
+                  },
+                  {
+                    "type": "REPEAT",
+                    "content": {
+                      "type": "CHOICE",
+                      "members": [
+                        {
+                          "type": "PATTERN",
+                          "value": "[^\\f\\n\\r\\t \\/()\\[\\]{}\"@~^;`\\\\,:#'0-9\\u000B\\u001C\\u001D\\u001E\\u001F\\u2028\\u2029\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008\\u2009\\u200a\\u205f\\u3000]"
+                        },
+                        {
+                          "type": "PATTERN",
+                          "value": "[:#'0-9]"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            }
+          ]
+        },
+        "named": true,
+        "value": "sym_name"
+      }
+    },
+    "list_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "_bare_list_lit"
+        }
+      ]
+    },
+    "_bare_list_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "FIELD",
+          "name": "open",
+          "content": {
+            "type": "STRING",
+            "value": "("
+          }
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "CHOICE",
+            "members": [
+              {
+                "type": "FIELD",
+                "name": "value",
+                "content": {
+                  "type": "SYMBOL",
+                  "name": "_form"
+                }
+              },
+              {
+                "type": "SYMBOL",
+                "name": "_gap"
+              }
+            ]
+          }
+        },
+        {
+          "type": "FIELD",
+          "name": "close",
+          "content": {
+            "type": "STRING",
+            "value": ")"
+          }
+        }
+      ]
+    },
+    "quoting_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "FIELD",
+          "name": "marker",
+          "content": {
+            "type": "STRING",
+            "value": "'"
+          }
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_gap"
+          }
+        },
+        {
+          "type": "FIELD",
+          "name": "value",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_form"
+          }
+        }
+      ]
+    },
+    "quasi_quoting_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "FIELD",
+          "name": "marker",
+          "content": {
+            "type": "STRING",
+            "value": "`"
+          }
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_gap"
+          }
+        },
+        {
+          "type": "FIELD",
+          "name": "value",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_form"
+          }
+        }
+      ]
+    },
+    "unquote_splicing_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "FIELD",
+          "name": "marker",
+          "content": {
+            "type": "STRING",
+            "value": ",@"
+          }
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_gap"
+          }
+        },
+        {
+          "type": "FIELD",
+          "name": "value",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_form"
+          }
+        }
+      ]
+    },
+    "unquoting_lit": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "FIELD",
+          "name": "marker",
+          "content": {
+            "type": "STRING",
+            "value": ","
+          }
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_gap"
+          }
+        },
+        {
+          "type": "FIELD",
+          "name": "value",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_form"
+          }
+        }
+      ]
+    }
+  },
+  "extras": [],
+  "conflicts": [],
+  "precedences": [],
+  "externals": [],
+  "inline": [
+    "_kwd_unqualified",
+    "_sym_unqualified"
+  ],
+  "supertypes": []
+}
+
--- a/third-party/tree-sitter/tree-sitter-opengoal/node-types.json
+++ b/third-party/tree-sitter/tree-sitter-opengoal/node-types.json
@ -0,0 +1,614 @@
+[
+  {
+    "type": "comment_multiline",
+    "named": true,
+    "fields": {}
+  },
+  {
+    "type": "kwd_lit",
+    "named": true,
+    "fields": {
+      "marker": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": ":",
+            "named": false
+          }
+        ]
+      },
+      "name": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "kwd_name",
+            "named": true
+          }
+        ]
+      }
+    }
+  },
+  {
+    "type": "list_lit",
+    "named": true,
+    "fields": {
+      "close": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": ")",
+            "named": false
+          }
+        ]
+      },
+      "open": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "(",
+            "named": false
+          }
+        ]
+      },
+      "value": {
+        "multiple": true,
+        "required": false,
+        "types": [
+          {
+            "type": "bool_lit",
+            "named": true
+          },
+          {
+            "type": "char_lit",
+            "named": true
+          },
+          {
+            "type": "kwd_lit",
+            "named": true
+          },
+          {
+            "type": "list_lit",
+            "named": true
+          },
+          {
+            "type": "null_lit",
+            "named": true
+          },
+          {
+            "type": "num_lit",
+            "named": true
+          },
+          {
+            "type": "quasi_quoting_lit",
+            "named": true
+          },
+          {
+            "type": "quoting_lit",
+            "named": true
+          },
+          {
+            "type": "str_lit",
+            "named": true
+          },
+          {
+            "type": "sym_lit",
+            "named": true
+          },
+          {
+            "type": "unquote_splicing_lit",
+            "named": true
+          },
+          {
+            "type": "unquoting_lit",
+            "named": true
+          }
+        ]
+      }
+    },
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "comment",
+          "named": true
+        },
+        {
+          "type": "comment_multiline",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "quasi_quoting_lit",
+    "named": true,
+    "fields": {
+      "marker": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "`",
+            "named": false
+          }
+        ]
+      },
+      "value": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "bool_lit",
+            "named": true
+          },
+          {
+            "type": "char_lit",
+            "named": true
+          },
+          {
+            "type": "kwd_lit",
+            "named": true
+          },
+          {
+            "type": "list_lit",
+            "named": true
+          },
+          {
+            "type": "null_lit",
+            "named": true
+          },
+          {
+            "type": "num_lit",
+            "named": true
+          },
+          {
+            "type": "quasi_quoting_lit",
+            "named": true
+          },
+          {
+            "type": "quoting_lit",
+            "named": true
+          },
+          {
+            "type": "str_lit",
+            "named": true
+          },
+          {
+            "type": "sym_lit",
+            "named": true
+          },
+          {
+            "type": "unquote_splicing_lit",
+            "named": true
+          },
+          {
+            "type": "unquoting_lit",
+            "named": true
+          }
+        ]
+      }
+    },
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "comment",
+          "named": true
+        },
+        {
+          "type": "comment_multiline",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "quoting_lit",
+    "named": true,
+    "fields": {
+      "marker": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "'",
+            "named": false
+          }
+        ]
+      },
+      "value": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "bool_lit",
+            "named": true
+          },
+          {
+            "type": "char_lit",
+            "named": true
+          },
+          {
+            "type": "kwd_lit",
+            "named": true
+          },
+          {
+            "type": "list_lit",
+            "named": true
+          },
+          {
+            "type": "null_lit",
+            "named": true
+          },
+          {
+            "type": "num_lit",
+            "named": true
+          },
+          {
+            "type": "quasi_quoting_lit",
+            "named": true
+          },
+          {
+            "type": "quoting_lit",
+            "named": true
+          },
+          {
+            "type": "str_lit",
+            "named": true
+          },
+          {
+            "type": "sym_lit",
+            "named": true
+          },
+          {
+            "type": "unquote_splicing_lit",
+            "named": true
+          },
+          {
+            "type": "unquoting_lit",
+            "named": true
+          }
+        ]
+      }
+    },
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "comment",
+          "named": true
+        },
+        {
+          "type": "comment_multiline",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "source",
+    "named": true,
+    "fields": {},
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "bool_lit",
+          "named": true
+        },
+        {
+          "type": "char_lit",
+          "named": true
+        },
+        {
+          "type": "comment",
+          "named": true
+        },
+        {
+          "type": "comment_multiline",
+          "named": true
+        },
+        {
+          "type": "kwd_lit",
+          "named": true
+        },
+        {
+          "type": "list_lit",
+          "named": true
+        },
+        {
+          "type": "null_lit",
+          "named": true
+        },
+        {
+          "type": "num_lit",
+          "named": true
+        },
+        {
+          "type": "quasi_quoting_lit",
+          "named": true
+        },
+        {
+          "type": "quoting_lit",
+          "named": true
+        },
+        {
+          "type": "str_lit",
+          "named": true
+        },
+        {
+          "type": "sym_lit",
+          "named": true
+        },
+        {
+          "type": "unquote_splicing_lit",
+          "named": true
+        },
+        {
+          "type": "unquoting_lit",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "sym_lit",
+    "named": true,
+    "fields": {
+      "name": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "sym_name",
+            "named": true
+          }
+        ]
+      }
+    }
+  },
+  {
+    "type": "unquote_splicing_lit",
+    "named": true,
+    "fields": {
+      "marker": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": ",@",
+            "named": false
+          }
+        ]
+      },
+      "value": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "bool_lit",
+            "named": true
+          },
+          {
+            "type": "char_lit",
+            "named": true
+          },
+          {
+            "type": "kwd_lit",
+            "named": true
+          },
+          {
+            "type": "list_lit",
+            "named": true
+          },
+          {
+            "type": "null_lit",
+            "named": true
+          },
+          {
+            "type": "num_lit",
+            "named": true
+          },
+          {
+            "type": "quasi_quoting_lit",
+            "named": true
+          },
+          {
+            "type": "quoting_lit",
+            "named": true
+          },
+          {
+            "type": "str_lit",
+            "named": true
+          },
+          {
+            "type": "sym_lit",
+            "named": true
+          },
+          {
+            "type": "unquote_splicing_lit",
+            "named": true
+          },
+          {
+            "type": "unquoting_lit",
+            "named": true
+          }
+        ]
+      }
+    },
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "comment",
+          "named": true
+        },
+        {
+          "type": "comment_multiline",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "unquoting_lit",
+    "named": true,
+    "fields": {
+      "marker": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": ",",
+            "named": false
+          }
+        ]
+      },
+      "value": {
+        "multiple": false,
+        "required": true,
+        "types": [
+          {
+            "type": "bool_lit",
+            "named": true
+          },
+          {
+            "type": "char_lit",
+            "named": true
+          },
+          {
+            "type": "kwd_lit",
+            "named": true
+          },
+          {
+            "type": "list_lit",
+            "named": true
+          },
+          {
+            "type": "null_lit",
+            "named": true
+          },
+          {
+            "type": "num_lit",
+            "named": true
+          },
+          {
+            "type": "quasi_quoting_lit",
+            "named": true
+          },
+          {
+            "type": "quoting_lit",
+            "named": true
+          },
+          {
+            "type": "str_lit",
+            "named": true
+          },
+          {
+            "type": "sym_lit",
+            "named": true
+          },
+          {
+            "type": "unquote_splicing_lit",
+            "named": true
+          },
+          {
+            "type": "unquoting_lit",
+            "named": true
+          }
+        ]
+      }
+    },
+    "children": {
+      "multiple": true,
+      "required": false,
+      "types": [
+        {
+          "type": "comment",
+          "named": true
+        },
+        {
+          "type": "comment_multiline",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "#|",
+    "named": false
+  },
+  {
+    "type": "'",
+    "named": false
+  },
+  {
+    "type": "(",
+    "named": false
+  },
+  {
+    "type": ")",
+    "named": false
+  },
+  {
+    "type": ",",
+    "named": false
+  },
+  {
+    "type": ",@",
+    "named": false
+  },
+  {
+    "type": ":",
+    "named": false
+  },
+  {
+    "type": "`",
+    "named": false
+  },
+  {
+    "type": "bool_lit",
+    "named": true
+  },
+  {
+    "type": "char_lit",
+    "named": true
+  },
+  {
+    "type": "comment",
+    "named": true
+  },
+  {
+    "type": "kwd_name",
+    "named": true
+  },
+  {
+    "type": "null_lit",
+    "named": true
+  },
+  {
+    "type": "num_lit",
+    "named": true
+  },
+  {
+    "type": "str_lit",
+    "named": true
+  },
+  {
+    "type": "sym_name",
+    "named": true
+  },
+  {
+    "type": "|#",
+    "named": false
+  }
+]
--- a/third-party/tree-sitter/tree-sitter-opengoal/parser.c
+++ b/third-party/tree-sitter/tree-sitter-opengoal/parser.c
--- a/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/parser.h
+++ b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/parser.h
@ -0,0 +1,224 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+typedef uint16_t TSStateId;
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value            \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value,           \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {{                                             \
+    .reduce = {                                  \
+      .type = TSParseActionTypeReduce,           \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
+    },                                           \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
--- a/third-party/tree-sitter/tree-sitter/.gitattributes
+++ b/third-party/tree-sitter/tree-sitter/.gitattributes
@ -0,0 +1,5 @@
+/lib/src/unicode/*.h linguist-vendored
+/lib/src/unicode/LICENSE linguist-vendored
+
+/cli/src/generate/prepare_grammar/*.json -diff
+Cargo.lock -diff
--- a/third-party/tree-sitter/tree-sitter/.github/scripts/cross.sh
+++ b/third-party/tree-sitter/tree-sitter/.github/scripts/cross.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -x
+set -e
+
+if [ "$CROSS" != 1 ]; then
+    exit 111
+fi
+
+docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@"
--- a/third-party/tree-sitter/tree-sitter/.github/scripts/make.sh
+++ b/third-party/tree-sitter/tree-sitter/.github/scripts/make.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -x
+set -e
+
+if [ "$CROSS" = 1 ]; then
+    if [ -z "$CC" ]; then
+        echo "make.sh: CC is not set" >&2
+        exit 111
+    fi
+    if [ -z "$AR" ]; then
+        echo "make.sh: AR is not set" >&2
+        exit 111
+    fi
+
+    cross.sh make CC=$CC AR=$AR "$@"
+else
+    make "$@"
+fi
--- a/third-party/tree-sitter/tree-sitter/.github/scripts/tree-sitter.sh
+++ b/third-party/tree-sitter/tree-sitter/.github/scripts/tree-sitter.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -x
+set -e
+
+tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter
+
+if [ "$CROSS" = 1 ]; then
+    cross.sh $CROSS_RUNNER "$tree_sitter" "$@"
+else
+    "$tree_sitter" "$@"
+fi
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/CICD.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/CICD.yml
@ -0,0 +1,69 @@
+name: CICD
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - master
+      - check/*
+
+concurrency:
+  group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
+  cancel-in-progress: true
+
+jobs:
+  init:
+    name: Init
+    runs-on: ubuntu-latest
+    steps:
+      - name: Get PR head ref
+        if: ${{ github.event_name == 'pull_request' }}
+        id: ref
+        run: |
+          echo "ref=refs/pull/${{ github.event.pull_request.number }}/head" >> $GITHUB_OUTPUT
+    outputs:
+      ref: >-
+        ${{
+          (github.event_name == 'pull_request' && startsWith(github.head_ref, 'release/v'))
+          && steps.ref.outputs.ref
+          || github.ref
+        }}
+
+  fast_checks:
+    name: Fast checks
+    uses: ./.github/workflows/fast_checks.yml
+
+  full_checks:
+    name: Full Rust checks
+    needs: fast_checks
+    uses: ./.github/workflows/full_rust_checks.yml
+
+  min_version:
+    name: Minimum supported rust version
+    needs: fast_checks
+    uses: ./.github/workflows/msrv.yml
+    with:
+      package: tree-sitter-cli
+
+  build:
+    name: Build & Test
+    needs: [init, fast_checks]
+    uses: ./.github/workflows/build.yml
+    with:
+      ref: ${{ needs.init.outputs.ref }}
+
+  release:
+    name: Release
+    needs: [init, fast_checks, full_checks, min_version, build]
+    if: >
+      github.event.pull_request.head.repo.full_name == github.repository &&
+      startsWith(github.head_ref, 'release/v')
+    uses: ./.github/workflows/release.yml
+    with:
+      ref: ${{ needs.init.outputs.ref }}
+
+  publish:
+    name: Publish
+    needs: release
+    uses: ./.github/workflows/publish.yml
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/build.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/build.yml
@ -0,0 +1,171 @@
+name: Build & Test
+
+env:
+  CARGO_TERM_COLOR: always
+  RUSTFLAGS: "-D warnings"
+  CROSS_DEBUG: 1
+
+on:
+  workflow_call:
+    inputs:
+      ref:
+        default: ${{ github.ref }}
+        type: string
+
+jobs:
+  build:
+    name: ${{ matrix.job.name }} (${{ matrix.job.target }}) (${{ matrix.job.os }})
+    runs-on: ${{ matrix.job.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        job:
+          - { name: linux-aarch64 , target: aarch64-unknown-linux-gnu   , os: ubuntu-latest  , use-cross: true }
+          - { name: linux-arm     , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest  , use-cross: true }
+          - { name: linux-x64     , target: x86_64-unknown-linux-gnu    , os: ubuntu-latest                    }
+          - { name: linux-x86     , target: i686-unknown-linux-gnu      , os: ubuntu-latest  , use-cross: true }
+          - { name: windows-x64   , target: x86_64-pc-windows-msvc      , os: windows-latest                   }
+          - { name: windows-x86   , target: i686-pc-windows-msvc        , os: windows-latest                   }
+          - { name: macos-x64     , target: x86_64-apple-darwin         , os: macos-latest                     }
+
+    env:
+      BUILD_CMD: cargo
+
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+    - name: Checkout source code
+      uses: actions/checkout@v3
+      with:
+        ref: ${{ inputs.ref }}
+
+    - name: Read Emscripten version
+      run: |
+        echo "EMSCRIPTEN_VERSION=$(cat cli/emscripten-version)" >> $GITHUB_ENV
+
+    - name: Install Emscripten
+      uses: mymindstorm/setup-emsdk@v12
+      with:
+        version: ${{ env.EMSCRIPTEN_VERSION }}
+
+    - name: Install Rust toolchain
+      uses: dtolnay/rust-toolchain@stable
+      with:
+        targets: ${{ matrix.job.target }}
+
+    - name: Install cross
+      if: matrix.job.use-cross
+      uses: taiki-e/install-action@v2
+      with:
+        tool: cross
+
+    - name: Build custom cross image
+      if: ${{ matrix.job.use-cross && matrix.job.os == 'ubuntu-latest' }}
+      run: |
+        cd ..
+
+        target="${{ matrix.job.target }}"
+        image=ghcr.io/cross-rs/$target:custom
+        echo "CROSS_IMAGE=$image"                                  >> $GITHUB_ENV
+
+        echo "[target.$target]"                                    >> Cross.toml
+        echo "image = \"$image\""                                  >> Cross.toml
+        echo "CROSS_CONFIG=$PWD/Cross.toml"                        >> $GITHUB_ENV
+
+        echo "FROM ghcr.io/cross-rs/$target:edge"                  >> Dockerfile
+        echo "ENV DEBIAN_FRONTEND=noninteractive"                  >> Dockerfile
+        echo "RUN apt-get update && apt-get install -y nodejs"     >> Dockerfile
+        docker build -t $image .
+        docker images
+        docker run --rm $image env
+
+        cd -
+
+    - name: Setup extra env
+      run: |
+        PATH="$PWD/.github/scripts:$PATH"
+        echo "PATH=$PATH" >> $GITHUB_ENV
+        echo "ROOT=$PWD" >> $GITHUB_ENV
+        echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV
+
+        export TARGET=${{ matrix.job.target }}
+        echo "TARGET=$TARGET" >> $GITHUB_ENV
+
+        USE_CROSS="${{ matrix.job.use-cross }}"
+
+        if [ "$USE_CROSS" == "true" ]; then
+          echo "BUILD_CMD=cross" >> $GITHUB_ENV
+
+          export CROSS=1; echo "CROSS=$CROSS" >> $GITHUB_ENV
+
+          runner=$(cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'")
+          [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV
+          echo "runner: $runner"
+
+          case "$TARGET" in
+            i686-unknown-linux-gnu)      CC=i686-linux-gnu-gcc              AR=i686-linux-gnu-ar                  ;;
+            aarch64-unknown-linux-gnu)   CC=aarch64-linux-gnu-gcc           AR=aarch64-linux-gnu-ar               ;;
+            arm-unknown-linux-gnueabihf) CC=arm-unknown-linux-gnueabihf-gcc AR=arm-unknown-linux-gnueabihf-gcc-ar ;;
+          esac
+
+          [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV
+          [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV
+        fi
+
+        case "$TARGET" in
+          *-windows-*)
+            echo "RUST_TEST_THREADS=1" >> $GITHUB_ENV # See #2041 tree-sitter issue
+            ;;
+        esac
+
+    - name: Build C library
+      if: "!contains(matrix.job.os, 'windows')" # Requires an additional adapted Makefile for `cl.exe` compiler
+      run: make.sh CFLAGS="-Werror" -j
+
+    - name: Build wasm library
+      run: script/build-wasm
+
+    - name: Build CLI
+      run: $BUILD_CMD build --release --target=${{ matrix.job.target }}
+
+    - name: Fetch fixtures
+      run: script/fetch-fixtures
+
+    - name: Generate fixtures
+      run: script/generate-fixtures
+
+    - name: Generate WASM fixtures
+      if: "!matrix.job.use-cross"
+      run: script/generate-fixtures-wasm
+
+    - name: Run main tests
+      run: $BUILD_CMD test --target=${{ matrix.job.target }}
+
+    - name: Run wasm tests
+      if: "!matrix.job.use-cross" # TODO: Install Emscripten into custom cross images
+      run: script/test-wasm
+
+    - name: Run benchmarks
+      if: "!matrix.job.use-cross" # It doesn't make sense to benchmark something in an emulator
+      run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.job.target }}
+
+    - name: Upload CLI artifact
+      uses: actions/upload-artifact@v3
+      with:
+        name: tree-sitter.${{ matrix.job.name }}
+        path: target/${{ matrix.job.target }}/release/tree-sitter${{ contains(matrix.job.target, 'windows') && '.exe' || '' }}
+        if-no-files-found: error
+        retention-days: 7
+
+    - name: Upload WASM artifacts
+      if: ${{ matrix.job.name == 'linux-x64' }}
+      uses: actions/upload-artifact@v3
+      with:
+        name: tree-sitter.wasm
+        path: |
+          lib/binding_web/tree-sitter.js
+          lib/binding_web/tree-sitter.wasm
+        if-no-files-found: error
+        retention-days: 7
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/fast_checks.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/fast_checks.yml
@ -0,0 +1,31 @@
+name: Fast checks to fail fast on any simple code issues
+
+env:
+  CARGO_TERM_COLOR: always
+  RUSTFLAGS: "-D warnings"
+
+on:
+  workflow_call:
+
+jobs:
+  check_rust_formatting:
+    name: Check Rust formating
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout source code
+      uses: actions/checkout@v3
+
+    - name: Run cargo fmt
+      run: cargo fmt -- --check
+
+  check_c_warnings:
+    name: Check C warnings
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout source code
+      uses: actions/checkout@v3
+
+    - name: Make C library to check that it's able to compile without warnings
+      run: make -j CFLAGS="-Werror"
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/full_rust_checks.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/full_rust_checks.yml
@ -0,0 +1,32 @@
+name: Full Rust codebase checks
+
+env:
+  CARGO_TERM_COLOR: always
+  RUSTFLAGS: "-D warnings"
+
+on:
+  workflow_call:
+
+jobs:
+  run:
+    name: Run checks
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout source code
+      uses: actions/checkout@v3
+
+    - name: Install rust toolchain
+      uses: dtolnay/rust-toolchain@master
+      with:
+        toolchain: stable
+        components: clippy, rustfmt
+
+    - name: Run cargo fmt
+      run: cargo fmt -- --check
+
+    # - name: Run clippy
+    #   run: cargo clippy --all-targets
+
+    - name: Run cargo check
+      run: cargo check --workspace --examples --tests --benches --bins
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/msrv.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/msrv.yml
@ -0,0 +1,42 @@
+name: Minimum supported rust version
+
+env:
+  CARGO_TERM_COLOR: always
+  RUSTFLAGS: "-D warnings"
+
+on:
+  workflow_call:
+    inputs:
+      package:
+        description: Target cargo package name
+        required: true
+        type: string
+
+
+jobs:
+  run:
+    name: Run checks
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout source code
+      uses: actions/checkout@v3
+
+    - name: Get the MSRV from the package metadata
+      id: msrv
+      run: cargo metadata --no-deps --format-version 1 | jq -r '"version=" + (.packages[] | select(.name == "${{ inputs.package }}").rust_version)' >> $GITHUB_OUTPUT
+
+    - name: Install rust toolchain (v${{ steps.msrv.outputs.version }})
+      uses: dtolnay/rust-toolchain@master
+      with:
+        toolchain: ${{ steps.msrv.outputs.version }}
+        components: clippy, rustfmt
+
+    - name: Run cargo fmt
+      run: cargo fmt -- --check
+
+    # - name: Run clippy (on minimum supported rust version to prevent warnings we can't fix)
+    #   run: cargo clippy --all-targets
+
+    # - name: Run main tests
+    #   run: cargo test
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/publish.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/publish.yml
@ -0,0 +1,21 @@
+name: Publish to registries
+
+on:
+  workflow_call:
+
+jobs:
+  crates_io:
+    name: Publish to Crates.io
+    runs-on: ubuntu-latest
+    steps:
+      - name: Publish packages
+        run: |
+          echo "::warning::TODO: add a Crates.io publish logic"
+
+  npm:
+    name: Publish to npmjs.com
+    runs-on: ubuntu-latest
+    steps:
+      - name: Publish packages
+        run: |
+          echo "::warning::TODO: add a npmjs.com publish logic"
--- a/third-party/tree-sitter/tree-sitter/.github/workflows/release.yml
+++ b/third-party/tree-sitter/tree-sitter/.github/workflows/release.yml
@ -0,0 +1,101 @@
+name: Release
+
+on:
+  workflow_call:
+    inputs:
+      ref:
+        default: ${{ github.ref }}
+        type: string
+
+jobs:
+  permissions:
+    name: Check permissions
+    runs-on: ubuntu-latest
+    outputs:
+      release_allowed: ${{ steps.maintainer.outputs.is_maintainer == 'true' }}
+    steps:
+
+    - name: Is maintainer
+      id: maintainer
+      env:
+        GH_TOKEN: ${{ github.token }}
+        repo: ${{ github.repository }}
+        actor: ${{ github.actor }}
+      run: |
+        maintainer=$(
+          gh api "/repos/${repo}/collaborators" |
+          jq ".[] | {login, maintainer: .permissions | .maintain} | select(.login == \"${actor}\") | .maintainer"
+        );
+        if [ "$maintainer" == "true" ]; then
+          echo "@${actor} has maintainer level permissions :rocket:" >> $GITHUB_STEP_SUMMARY;
+          echo "is_maintainer=true" >> $GITHUB_OUTPUT
+        fi
+
+  release:
+    name: Release
+    needs: permissions
+    if: needs.permissions.outputs.release_allowed
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+
+      - name: Checkout source code
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ inputs.ref }}
+
+      - name: Download build artifacts
+        uses: actions/download-artifact@v3
+        with:
+          path: artifacts
+
+      - name: Display structure of downloaded files
+        run: ls -lR
+        working-directory: artifacts
+
+      - name: Prepare release artifacts
+        run: |
+          mkdir -p target
+          mv artifacts/tree-sitter.wasm/* target/
+          rm -r artifacts/tree-sitter.wasm
+          for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do
+            exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*)
+            gzip --stdout --name $exe > target/tree-sitter-$platform.gz
+          done
+          rm -rf artifacts
+          ls -l target/
+
+      - name: Get tag name from a release/v* branch name
+        id: tag_name
+        env:
+          tag: ${{ github.head_ref }}
+        run: echo "tag=${tag#release/}" >> $GITHUB_OUTPUT
+
+      - name: Add a release tag
+        env:
+          ref: ${{ inputs.ref }}
+          tag: ${{ steps.tag_name.outputs.tag }}
+          message: "Release ${{ steps.tag_name.outputs.tag }}"
+        run: |
+          git config user.name "${GITHUB_ACTOR}"
+          git config user.email "${GITHUB_ACTOR}@users.noreply.github.com"
+          git tag -a "$tag" HEAD -m "$message"
+          git push origin "$tag"
+
+      - name: Create release
+        uses: softprops/action-gh-release@v1
+        with:
+          name: ${{ steps.tag_name.outputs.tag }}
+          tag_name: ${{ steps.tag_name.outputs.tag }}
+          fail_on_unmatched_files: true
+          files: |
+            target/tree-sitter-*.gz
+            target/tree-sitter.wasm
+            target/tree-sitter.js
+
+      - name: Merge release PR
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          gh pr merge ${{ github.event.pull_request.html_url }} --match-head-commit $(git rev-parse HEAD) --merge --delete-branch
--- a/third-party/tree-sitter/tree-sitter/.gitignore
+++ b/third-party/tree-sitter/tree-sitter/.gitignore
@ -0,0 +1,27 @@
+log*.html
+
+.idea
+*.xcodeproj
+.vscode
+.cache
+
+fuzz-results
+
+test/fixtures/grammars/*
+!test/fixtures/grammars/.gitkeep
+package-lock.json
+node_modules
+
+docs/assets/js/tree-sitter.js
+
+/target
+*.rs.bk
+*.a
+*.dylib
+*.so
+*.so.[0-9]*
+*.o
+*.obj
+*.exp
+*.lib
+*.wasm
--- a/third-party/tree-sitter/tree-sitter/CONTRIBUTING.md
+++ b/third-party/tree-sitter/tree-sitter/CONTRIBUTING.md
@ -0,0 +1 @@
+docs/section-6-contributing.md
--- a/third-party/tree-sitter/tree-sitter/Cargo.lock
+++ b/third-party/tree-sitter/tree-sitter/Cargo.lock
--- a/third-party/tree-sitter/tree-sitter/Cargo.toml
+++ b/third-party/tree-sitter/tree-sitter/Cargo.toml
@ -0,0 +1,10 @@
+[workspace]
+default-members = ["cli"]
+members = ["cli", "lib"]
+resolver = "2"
+
+[workspace.package]
+rust-version = "1.65"
+
+[profile.release]
+strip = true
--- a/third-party/tree-sitter/tree-sitter/LICENSE
+++ b/third-party/tree-sitter/tree-sitter/LICENSE
@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2018-2021 Max Brunsfeld
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/third-party/tree-sitter/tree-sitter/Makefile
+++ b/third-party/tree-sitter/tree-sitter/Makefile
@ -0,0 +1,71 @@
+VERSION := 0.20.9
+
+# install directory layout
+PREFIX ?= /usr/local
+INCLUDEDIR ?= $(PREFIX)/include
+LIBDIR ?= $(PREFIX)/lib
+PCLIBDIR ?= $(LIBDIR)/pkgconfig
+
+# collect sources
+ifneq ($(AMALGAMATED),1)
+	SRC := $(wildcard lib/src/*.c)
+	# do not double-include amalgamation
+	SRC := $(filter-out lib/src/lib.c,$(SRC))
+else
+	# use amalgamated build
+	SRC := lib/src/lib.c
+endif
+OBJ := $(SRC:.c=.o)
+
+# define default flags, and override to append mandatory flags
+CFLAGS ?= -O3 -Wall -Wextra -Werror
+override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include
+
+# ABI versioning
+SONAME_MAJOR := 0
+SONAME_MINOR := 0
+
+# OS-specific bits
+ifeq ($(shell uname),Darwin)
+	SOEXT = dylib
+	SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
+	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
+	LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib
+else
+	SOEXT = so
+	SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
+	SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
+	LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR)
+endif
+ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
+	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
+endif
+
+all: libtree-sitter.a libtree-sitter.$(SOEXTVER)
+
+libtree-sitter.a: $(OBJ)
+	$(AR) rcs $@ $^
+
+libtree-sitter.$(SOEXTVER): $(OBJ)
+	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
+	ln -sf $@ libtree-sitter.$(SOEXT)
+	ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR)
+
+install: all
+	install -d '$(DESTDIR)$(LIBDIR)'
+	install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
+	install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
+	ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
+	ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
+	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
+	install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
+	install -d '$(DESTDIR)$(PCLIBDIR)'
+	sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
+	    -e 's|=$(PREFIX)|=$${prefix}|' \
+	    -e 's|@PREFIX@|$(PREFIX)|' \
+	    tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
+
+clean:
+	rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER)
+
+.PHONY: all install clean
--- a/third-party/tree-sitter/tree-sitter/README.md
+++ b/third-party/tree-sitter/tree-sitter/README.md
@ -0,0 +1,18 @@
+# tree-sitter
+
+[![CICD](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml)
+[![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618)
+
+Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:
+
+- **General** enough to parse any programming language
+- **Fast** enough to parse on every keystroke in a text editor
+- **Robust** enough to provide useful results even in the presence of syntax errors
+- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application
+
+## Links
+
+- [Documentation](https://tree-sitter.github.io)
+- [Rust binding](lib/binding_rust/README.md)
+- [WASM binding](lib/binding_web/README.md)
+- [Command-line interface](cli/README.md)
--- a/third-party/tree-sitter/tree-sitter/cli/Cargo.toml
+++ b/third-party/tree-sitter/tree-sitter/cli/Cargo.toml
@ -0,0 +1,82 @@
+[package]
+name = "tree-sitter-cli"
+description = "CLI tool for developing, testing, and using Tree-sitter parsers"
+version = "0.20.8"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2021"
+license = "MIT"
+readme = "README.md"
+keywords = ["incremental", "parsing"]
+categories = ["command-line-utilities", "parsing"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+rust-version.workspace = true
+
+[[bin]]
+name = "tree-sitter"
+path = "src/main.rs"
+
+[[bench]]
+name = "benchmark"
+harness = false
+
+[dependencies]
+ansi_term = "0.12"
+anyhow = "1.0"
+atty = "0.2"
+clap = "2.32"
+difference = "2.0"
+dirs = "3.0"
+glob = "0.3.0"
+html-escape = "0.2.6"
+indexmap = "1"
+lazy_static = "1.2.0"
+regex = "1"
+regex-syntax = "0.6.4"
+rustc-hash = "1"
+semver = "1.0"
+serde = { version = "1.0.130", features = ["derive"] }
+smallbitvec = "2.5.1"
+tiny_http = "0.12.0"
+walkdir = "2.3"
+webbrowser = "0.8.3"
+which = "4.1.0"
+
+[dependencies.tree-sitter]
+version = "0.20.10"
+path = "../lib"
+
+[dependencies.tree-sitter-config]
+version = "0.19.0"
+path = "config"
+
+[dependencies.tree-sitter-highlight]
+version = "0.20"
+path = "../highlight"
+
+[dependencies.tree-sitter-loader]
+version = "0.20"
+path = "loader"
+
+[dependencies.tree-sitter-tags]
+version = "0.20"
+path = "../tags"
+
+[dependencies.serde_json]
+version = "1.0"
+features = ["preserve_order"]
+
+[dependencies.log]
+version = "0.4.6"
+features = ["std"]
+
+[dev-dependencies]
+proc_macro = { path = "src/tests/proc_macro" }
+
+rand = "0.8"
+tempfile = "3"
+pretty_assertions = "0.7.2"
+ctor = "0.1"
+unindent = "0.2"
+
+[build-dependencies]
+toml = "0.5"
--- a/third-party/tree-sitter/tree-sitter/cli/README.md
+++ b/third-party/tree-sitter/tree-sitter/cli/README.md
@ -0,0 +1,37 @@
+Tree-sitter CLI
+===============
+
+[![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli)
+
+The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows.
+
+### Installation
+
+You can install the `tree-sitter-cli` with `cargo`:
+
+```sh
+cargo install tree-sitter-cli
+```
+
+or with `npm`:
+
+```sh
+npm install tree-sitter-cli
+```
+
+You can also download a pre-built binary for your platform from [the releases page](https://github.com/tree-sitter/tree-sitter/releases/latest).
+
+### Dependencies
+
+The `tree-sitter` binary itself has no dependencies, but specific commands have dependencies that must be present at runtime:
+
+* To generate a parser from a grammar, you must have [`node`](https://nodejs.org) on your PATH.
+* To run and test parsers, you must have a C and C++ compiler on your system.
+
+### Commands
+
+* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
+
+* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
+
+* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
--- a/third-party/tree-sitter/tree-sitter/cli/benches/benchmark.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/benches/benchmark.rs
@ -0,0 +1,214 @@
+use anyhow::Context;
+use lazy_static::lazy_static;
+use std::collections::BTreeMap;
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+use std::{env, fs, str, usize};
+use tree_sitter::{Language, Parser, Query};
+use tree_sitter_loader::Loader;
+
+include!("../src/tests/helpers/dirs.rs");
+
+lazy_static! {
+    static ref LANGUAGE_FILTER: Option<String> =
+        env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok();
+    static ref EXAMPLE_FILTER: Option<String> =
+        env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok();
+    static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
+        .map(|s| usize::from_str_radix(&s, 10).unwrap())
+        .unwrap_or(5);
+    static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
+    static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)> = {
+        fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
+            if dir.join("grammar.js").exists() {
+                let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
+                let (example_paths, query_paths) =
+                    result.entry(relative_path.to_owned()).or_default();
+
+                if let Ok(example_files) = fs::read_dir(&dir.join("examples")) {
+                    example_paths.extend(example_files.filter_map(|p| {
+                        let p = p.unwrap().path();
+                        if p.is_file() {
+                            Some(p.to_owned())
+                        } else {
+                            None
+                        }
+                    }));
+                }
+
+                if let Ok(query_files) = fs::read_dir(&dir.join("queries")) {
+                    query_paths.extend(query_files.filter_map(|p| {
+                        let p = p.unwrap().path();
+                        if p.is_file() {
+                            Some(p.to_owned())
+                        } else {
+                            None
+                        }
+                    }));
+                }
+            } else {
+                for entry in fs::read_dir(&dir).unwrap() {
+                    let entry = entry.unwrap().path();
+                    if entry.is_dir() {
+                        process_dir(result, &entry);
+                    }
+                }
+            }
+        }
+
+        let mut result = BTreeMap::new();
+        process_dir(&mut result, &GRAMMARS_DIR);
+        result
+    };
+}
+
+fn main() {
+    let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
+        .values()
+        .flat_map(|(e, q)| {
+            e.iter()
+                .chain(q.iter())
+                .map(|s| s.file_name().unwrap().to_str().unwrap().len())
+        })
+        .max()
+        .unwrap_or(0);
+
+    eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT);
+
+    let mut parser = Parser::new();
+    let mut all_normal_speeds = Vec::new();
+    let mut all_error_speeds = Vec::new();
+
+    for (language_path, (example_paths, query_paths)) in
+        EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
+    {
+        let language_name = language_path.file_name().unwrap().to_str().unwrap();
+
+        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+            if language_name != filter.as_str() {
+                continue;
+            }
+        }
+
+        eprintln!("\nLanguage: {}", language_name);
+        let language = get_language(language_path);
+        parser.set_language(language).unwrap();
+
+        eprintln!("  Constructing Queries");
+        for path in query_paths {
+            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                if !path.to_str().unwrap().contains(filter.as_str()) {
+                    continue;
+                }
+            }
+
+            parse(&path, max_path_length, |source| {
+                Query::new(language, str::from_utf8(source).unwrap())
+                    .expect("Failed to parse query");
+            });
+        }
+
+        eprintln!("  Parsing Valid Code:");
+        let mut normal_speeds = Vec::new();
+        for example_path in example_paths {
+            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                if !example_path.to_str().unwrap().contains(filter.as_str()) {
+                    continue;
+                }
+            }
+
+            normal_speeds.push(parse(example_path, max_path_length, |code| {
+                parser.parse(code, None).expect("Failed to parse");
+            }));
+        }
+
+        eprintln!("  Parsing Invalid Code (mismatched languages):");
+        let mut error_speeds = Vec::new();
+        for (other_language_path, (example_paths, _)) in
+            EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
+        {
+            if other_language_path != language_path {
+                for example_path in example_paths {
+                    if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                        if !example_path.to_str().unwrap().contains(filter.as_str()) {
+                            continue;
+                        }
+                    }
+
+                    error_speeds.push(parse(example_path, max_path_length, |code| {
+                        parser.parse(code, None).expect("Failed to parse");
+                    }));
+                }
+            }
+        }
+
+        if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
+            eprintln!("  Average Speed (normal): {} bytes/ms", average_normal);
+            eprintln!("  Worst Speed (normal):   {} bytes/ms", worst_normal);
+        }
+
+        if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
+            eprintln!("  Average Speed (errors): {} bytes/ms", average_error);
+            eprintln!("  Worst Speed (errors):   {} bytes/ms", worst_error);
+        }
+
+        all_normal_speeds.extend(normal_speeds);
+        all_error_speeds.extend(error_speeds);
+    }
+
+    eprintln!("\n  Overall");
+    if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
+        eprintln!("  Average Speed (normal): {} bytes/ms", average_normal);
+        eprintln!("  Worst Speed (normal):   {} bytes/ms", worst_normal);
+    }
+
+    if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
+        eprintln!("  Average Speed (errors): {} bytes/ms", average_error);
+        eprintln!("  Worst Speed (errors):   {} bytes/ms", worst_error);
+    }
+    eprintln!("");
+}
+
+fn aggregate(speeds: &Vec<usize>) -> Option<(usize, usize)> {
+    if speeds.is_empty() {
+        return None;
+    }
+    let mut total = 0;
+    let mut max = usize::MAX;
+    for speed in speeds.iter().cloned() {
+        total += speed;
+        if speed < max {
+            max = speed;
+        }
+    }
+    Some((total / speeds.len(), max))
+}
+
+fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
+    eprint!(
+        "    {:width$}\t",
+        path.file_name().unwrap().to_str().unwrap(),
+        width = max_path_length
+    );
+
+    let source_code = fs::read(path)
+        .with_context(|| format!("Failed to read {:?}", path))
+        .unwrap();
+    let time = Instant::now();
+    for _ in 0..*REPETITION_COUNT {
+        action(&source_code);
+    }
+    let duration = time.elapsed() / (*REPETITION_COUNT as u32);
+    let duration_ms = duration.as_millis();
+    let speed = source_code.len() as u128 / (duration_ms + 1);
+    eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed);
+    speed as usize
+}
+
+fn get_language(path: &Path) -> Language {
+    let src_dir = GRAMMARS_DIR.join(path).join("src");
+    TEST_LOADER
+        .load_language_at_path(&src_dir, &src_dir)
+        .with_context(|| format!("Failed to load language at path {:?}", src_dir))
+        .unwrap()
+}
--- a/third-party/tree-sitter/tree-sitter/cli/build.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/build.rs
@ -0,0 +1,126 @@
+use std::ffi::OsStr;
+use std::path::{Path, PathBuf};
+use std::{env, fs};
+
+fn main() {
+    if let Some(git_sha) = read_git_sha() {
+        println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
+    }
+
+    if web_playground_files_present() {
+        println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
+    }
+
+    let rust_binding_version = read_rust_binding_version();
+    println!(
+        "cargo:rustc-env={}={}",
+        "RUST_BINDING_VERSION", rust_binding_version,
+    );
+
+    let emscripten_version = fs::read_to_string("emscripten-version").unwrap();
+    println!(
+        "cargo:rustc-env={}={}",
+        "EMSCRIPTEN_VERSION", emscripten_version,
+    );
+}
+
+fn web_playground_files_present() -> bool {
+    let paths = [
+        "../docs/assets/js/playground.js",
+        "../lib/binding_web/tree-sitter.js",
+        "../lib/binding_web/tree-sitter.wasm",
+    ];
+
+    paths.iter().all(|p| Path::new(p).exists())
+}
+
+fn read_git_sha() -> Option<String> {
+    let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
+
+    let mut git_path;
+    loop {
+        git_path = repo_path.join(".git");
+        if git_path.exists() {
+            break;
+        } else if !repo_path.pop() {
+            return None;
+        }
+    }
+
+    let git_dir_path;
+    if git_path.is_dir() {
+        git_dir_path = git_path;
+    } else if let Ok(git_path_content) = fs::read_to_string(&git_path) {
+        git_dir_path = repo_path.join(git_path_content.get("gitdir: ".len()..).unwrap().trim_end());
+    } else {
+        return None;
+    }
+    let git_head_path = git_dir_path.join("HEAD");
+    if let Some(path) = git_head_path.to_str() {
+        println!("cargo:rerun-if-changed={}", path);
+    }
+    if let Ok(mut head_content) = fs::read_to_string(&git_head_path) {
+        if head_content.ends_with("\n") {
+            head_content.pop();
+        }
+
+        // If we're on a branch, read the SHA from the ref file.
+        if head_content.starts_with("ref: ") {
+            head_content.replace_range(0.."ref: ".len(), "");
+            let ref_filename = {
+                // Go to real non-worktree gitdir
+                let git_dir_path = git_dir_path
+                    .parent()
+                    .map(|p| {
+                        p.file_name()
+                            .map(|n| n == OsStr::new("worktrees"))
+                            .and_then(|x| x.then(|| p.parent()))
+                    })
+                    .flatten()
+                    .flatten()
+                    .unwrap_or(&git_dir_path);
+
+                let file = git_dir_path.join(&head_content);
+                if file.is_file() {
+                    file
+                } else {
+                    let packed_refs = git_dir_path.join("packed-refs");
+                    if let Ok(packed_refs_content) = fs::read_to_string(&packed_refs) {
+                        for line in packed_refs_content.lines() {
+                            if let Some((hash, r#ref)) = line.split_once(' ') {
+                                if r#ref == head_content {
+                                    if let Some(path) = packed_refs.to_str() {
+                                        println!("cargo:rerun-if-changed={}", path);
+                                    }
+                                    return Some(hash.to_string());
+                                }
+                            }
+                        }
+                    }
+                    return None;
+                }
+            };
+            if let Some(path) = ref_filename.to_str() {
+                println!("cargo:rerun-if-changed={}", path);
+            }
+            return fs::read_to_string(&ref_filename).ok();
+        }
+        // If we're on a detached commit, then the `HEAD` file itself contains the sha.
+        else if head_content.len() == 40 {
+            return Some(head_content);
+        }
+    }
+
+    None
+}
+
+fn read_rust_binding_version() -> String {
+    let path = "Cargo.toml";
+    let text = fs::read_to_string(path).unwrap();
+    let cargo_toml = toml::from_str::<toml::Value>(text.as_ref()).unwrap();
+    cargo_toml["dependencies"]["tree-sitter"]["version"]
+        .as_str()
+        .unwrap()
+        .trim_matches('"')
+        .to_string()
+}
--- a/third-party/tree-sitter/tree-sitter/cli/config/Cargo.toml
+++ b/third-party/tree-sitter/tree-sitter/cli/config/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "tree-sitter-config"
+description = "User configuration of tree-sitter's command line programs"
+version = "0.19.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2018"
+license = "MIT"
+readme = "README.md"
+keywords = ["incremental", "parsing"]
+categories = ["command-line-utilities", "parsing"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+rust-version.workspace = true
+
+[dependencies]
+anyhow = "1.0"
+dirs = "3.0"
+serde = { version = "1.0.130", features = ["derive"] }
+
+[dependencies.serde_json]
+version = "1.0.45"
+features = ["preserve_order"]
--- a/third-party/tree-sitter/tree-sitter/cli/config/README.md
+++ b/third-party/tree-sitter/tree-sitter/cli/config/README.md
@ -0,0 +1,5 @@
+# `tree-sitter-config`
+
+You can use a configuration file to control the behavior of the `tree-sitter`
+command-line program.  This crate implements the logic for finding and the
+parsing the contents of the configuration file.
--- a/third-party/tree-sitter/tree-sitter/cli/config/src/lib.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/config/src/lib.rs
@ -0,0 +1,131 @@
+//! Manages tree-sitter's configuration file.
+
+use anyhow::{anyhow, Context, Result};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::path::PathBuf;
+use std::{env, fs};
+
+/// Holds the contents of tree-sitter's configuration file.
+///
+/// The file typically lives at `~/.config/tree-sitter/config.json`, but see the [`Config::load`][]
+/// method for the full details on where it might be located.
+///
+/// This type holds the generic JSON content of the configuration file.  Individual tree-sitter
+/// components will use the [`Config::get`][] method to parse that JSON to extract configuration
+/// fields that are specific to that component.
+#[derive(Debug)]
+pub struct Config {
+    pub location: PathBuf,
+    pub config: Value,
+}
+
+impl Config {
+    pub fn find_config_file() -> Result<Option<PathBuf>> {
+        if let Ok(path) = env::var("TREE_SITTER_DIR") {
+            let mut path = PathBuf::from(path);
+            path.push("config.json");
+            if !path.exists() {
+                return Ok(None);
+            }
+            if path.is_file() {
+                return Ok(Some(path));
+            }
+        }
+
+        let xdg_path = Self::xdg_config_file()?;
+        if xdg_path.is_file() {
+            return Ok(Some(xdg_path));
+        }
+
+        let legacy_path = dirs::home_dir()
+            .ok_or(anyhow!("Cannot determine home directory"))?
+            .join(".tree-sitter")
+            .join("config.json");
+        if legacy_path.is_file() {
+            return Ok(Some(legacy_path));
+        }
+
+        Ok(None)
+    }
+
+    fn xdg_config_file() -> Result<PathBuf> {
+        let xdg_path = dirs::config_dir()
+            .ok_or(anyhow!("Cannot determine config directory"))?
+            .join("tree-sitter")
+            .join("config.json");
+        Ok(xdg_path)
+    }
+
+    /// Locates and loads in the user's configuration file.  We search for the configuration file
+    /// in the following locations, in order:
+    ///
+    ///   - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set
+    ///   - `tree-sitter/config.json` in your default user configuration directory, as determined
+    ///     by [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html)
+    ///   - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store
+    ///     its configuration
+    pub fn load() -> Result<Config> {
+        let location = match Self::find_config_file()? {
+            Some(location) => location,
+            None => return Config::initial(),
+        };
+        let content = fs::read_to_string(&location)
+            .with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
+        let config = serde_json::from_str(&content)
+            .with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
+        Ok(Config { location, config })
+    }
+
+    /// Creates an empty initial configuration file.  You can then use the [`Config::add`][] method
+    /// to add the component-specific configuration types for any components that want to add
+    /// content to the default file, and then use [`Config::save`][] to write the configuration to
+    /// disk.
+    ///
+    /// (Note that this is typically only done by the `tree-sitter init-config` command.)
+    pub fn initial() -> Result<Config> {
+        let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
+            let mut path = PathBuf::from(path);
+            path.push("config.json");
+            path
+        } else {
+            Self::xdg_config_file()?
+        };
+        let config = serde_json::json!({});
+        Ok(Config { location, config })
+    }
+
+    /// Saves this configuration to the file that it was originally loaded from.
+    pub fn save(&self) -> Result<()> {
+        let json = serde_json::to_string_pretty(&self.config)?;
+        fs::create_dir_all(self.location.parent().unwrap())?;
+        fs::write(&self.location, json)?;
+        Ok(())
+    }
+
+    /// Parses a component-specific configuration from the configuration file.  The type `C` must
+    /// be [deserializable](https://docs.rs/serde/*/serde/trait.Deserialize.html) from a JSON
+    /// object, and must only include the fields relevant to that component.
+    pub fn get<C>(&self) -> Result<C>
+    where
+        C: for<'de> Deserialize<'de>,
+    {
+        let config = serde_json::from_value(self.config.clone())?;
+        Ok(config)
+    }
+
+    /// Adds a component-specific configuration to the configuration file.  The type `C` must be
+    /// [serializable](https://docs.rs/serde/*/serde/trait.Serialize.html) into a JSON object, and
+    /// must only include the fields relevant to that component.
+    pub fn add<C>(&mut self, config: C) -> Result<()>
+    where
+        C: Serialize,
+    {
+        let mut config = serde_json::to_value(&config)?;
+        self.config
+            .as_object_mut()
+            .unwrap()
+            .append(config.as_object_mut().unwrap());
+        Ok(())
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/emscripten-version
+++ b/third-party/tree-sitter/tree-sitter/cli/emscripten-version
@ -0,0 +1 @@
+3.1.29
--- a/third-party/tree-sitter/tree-sitter/cli/loader/Cargo.toml
+++ b/third-party/tree-sitter/tree-sitter/cli/loader/Cargo.toml
@ -0,0 +1,37 @@
+[package]
+name = "tree-sitter-loader"
+description = "Locates, builds, and loads tree-sitter grammars at runtime"
+version = "0.20.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2018"
+license = "MIT"
+readme = "README.md"
+keywords = ["incremental", "parsing"]
+categories = ["command-line-utilities", "parsing"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+rust-version.workspace = true
+
+[dependencies]
+anyhow = "1.0"
+cc = "^1.0.58"
+dirs = "3.0"
+libloading = "0.7"
+once_cell = "1.7"
+regex = "1"
+serde = { version = "1.0.130", features = ["derive"] }
+
+[dependencies.serde_json]
+version = "1.0"
+features = ["preserve_order"]
+
+[dependencies.tree-sitter]
+version = "0.20"
+path = "../../lib"
+
+[dependencies.tree-sitter-highlight]
+version = "0.20"
+path = "../../highlight"
+
+[dependencies.tree-sitter-tags]
+version = "0.20"
+path = "../../tags"
--- a/third-party/tree-sitter/tree-sitter/cli/loader/README.md
+++ b/third-party/tree-sitter/tree-sitter/cli/loader/README.md
@ -0,0 +1,6 @@
+# `tree-sitter-loader`
+
+The `tree-sitter` command-line program will dynamically find and build grammars
+at runtime, if you have cloned the grammars' repositories to your local
+filesystem.  This helper crate implements that logic, so that you can use it in
+your own program analysis tools, as well.
--- a/third-party/tree-sitter/tree-sitter/cli/loader/build.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/loader/build.rs
@ -0,0 +1,6 @@
+fn main() {
+    println!(
+        "cargo:rustc-env=BUILD_TARGET={}",
+        std::env::var("TARGET").unwrap()
+    );
+}
--- a/third-party/tree-sitter/tree-sitter/cli/loader/src/lib.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/loader/src/lib.rs
@ -0,0 +1,853 @@
+use anyhow::{anyhow, Context, Error, Result};
+use libloading::{Library, Symbol};
+use once_cell::unsync::OnceCell;
+use regex::{Regex, RegexBuilder};
+use serde::{Deserialize, Deserializer, Serialize};
+use std::collections::HashMap;
+use std::io::BufReader;
+use std::ops::Range;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::sync::Mutex;
+use std::time::SystemTime;
+use std::{env, fs, mem};
+use tree_sitter::{Language, QueryError, QueryErrorKind};
+use tree_sitter_highlight::HighlightConfiguration;
+use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
+
+#[derive(Default, Deserialize, Serialize)]
+pub struct Config {
+    #[serde(default)]
+    #[serde(
+        rename = "parser-directories",
+        deserialize_with = "deserialize_parser_directories"
+    )]
+    pub parser_directories: Vec<PathBuf>,
+}
+
+// Replace `~` or `$HOME` with home path string.
+// (While paths like "~/.tree-sitter/config.json" can be deserialized,
+// they're not valid path for I/O modules.)
+fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
+    let home = match dirs::home_dir() {
+        Some(home) => home,
+        None => return Ok(paths),
+    };
+    let standardized = paths
+        .into_iter()
+        .map(|path| standardize_path(path, &home))
+        .collect();
+    Ok(standardized)
+}
+
+fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
+    if let Ok(p) = path.strip_prefix("~") {
+        return home.join(p);
+    }
+    if let Ok(p) = path.strip_prefix("$HOME") {
+        return home.join(p);
+    }
+    path
+}
+
+impl Config {
+    pub fn initial() -> Config {
+        let home_dir = dirs::home_dir().expect("Cannot determine home directory");
+        Config {
+            parser_directories: vec![
+                home_dir.join("github"),
+                home_dir.join("src"),
+                home_dir.join("source"),
+            ],
+        }
+    }
+}
+
+#[cfg(unix)]
+const DYLIB_EXTENSION: &'static str = "so";
+
+#[cfg(windows)]
+const DYLIB_EXTENSION: &'static str = "dll";
+
+const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
+
+pub struct LanguageConfiguration<'a> {
+    pub scope: Option<String>,
+    pub content_regex: Option<Regex>,
+    pub _first_line_regex: Option<Regex>,
+    pub injection_regex: Option<Regex>,
+    pub file_types: Vec<String>,
+    pub root_path: PathBuf,
+    pub highlights_filenames: Option<Vec<String>>,
+    pub injections_filenames: Option<Vec<String>>,
+    pub locals_filenames: Option<Vec<String>>,
+    pub tags_filenames: Option<Vec<String>>,
+    language_id: usize,
+    highlight_config: OnceCell<Option<HighlightConfiguration>>,
+    tags_config: OnceCell<Option<TagsConfiguration>>,
+    highlight_names: &'a Mutex<Vec<String>>,
+    use_all_highlight_names: bool,
+}
+
+pub struct Loader {
+    parser_lib_path: PathBuf,
+    languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
+    language_configurations: Vec<LanguageConfiguration<'static>>,
+    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
+    highlight_names: Box<Mutex<Vec<String>>>,
+    use_all_highlight_names: bool,
+    debug_build: bool,
+}
+
+unsafe impl Send for Loader {}
+unsafe impl Sync for Loader {}
+
+impl Loader {
+    pub fn new() -> Result<Self> {
+        let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") {
+            Ok(path) => PathBuf::from(path),
+            _ => dirs::cache_dir()
+                .ok_or(anyhow!("Cannot determine cache directory"))?
+                .join("tree-sitter")
+                .join("lib"),
+        };
+        Ok(Self::with_parser_lib_path(parser_lib_path))
+    }
+
+    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
+        Loader {
+            parser_lib_path,
+            languages_by_id: Vec::new(),
+            language_configurations: Vec::new(),
+            language_configuration_ids_by_file_type: HashMap::new(),
+            highlight_names: Box::new(Mutex::new(Vec::new())),
+            use_all_highlight_names: true,
+            debug_build: false,
+        }
+    }
+
+    pub fn configure_highlights(&mut self, names: &Vec<String>) {
+        self.use_all_highlight_names = false;
+        let mut highlights = self.highlight_names.lock().unwrap();
+        highlights.clear();
+        highlights.extend(names.iter().cloned());
+    }
+
+    pub fn highlight_names(&self) -> Vec<String> {
+        self.highlight_names.lock().unwrap().clone()
+    }
+
+    pub fn find_all_languages(&mut self, config: &Config) -> Result<()> {
+        if config.parser_directories.is_empty() {
+            eprintln!("Warning: You have not configured any parser directories!");
+            eprintln!("Please run `tree-sitter init-config` and edit the resulting");
+            eprintln!("configuration file to indicate where we should look for");
+            eprintln!("language grammars.");
+            eprintln!("");
+        }
+        for parser_container_dir in &config.parser_directories {
+            if let Ok(entries) = fs::read_dir(parser_container_dir) {
+                for entry in entries {
+                    let entry = entry?;
+                    if let Some(parser_dir_name) = entry.file_name().to_str() {
+                        if parser_dir_name.starts_with("tree-sitter-") {
+                            self.find_language_configurations_at_path(
+                                &parser_container_dir.join(parser_dir_name),
+                            )
+                            .ok();
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<Language>> {
+        if let Ok(configurations) = self.find_language_configurations_at_path(path) {
+            let mut language_ids = configurations
+                .iter()
+                .map(|c| c.language_id)
+                .collect::<Vec<_>>();
+            language_ids.sort();
+            language_ids.dedup();
+            language_ids
+                .into_iter()
+                .map(|id| self.language_for_id(id))
+                .collect::<Result<Vec<_>>>()
+        } else {
+            Ok(Vec::new())
+        }
+    }
+
+    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
+        self.language_configurations
+            .iter()
+            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
+            .collect()
+    }
+
+    pub fn language_configuration_for_scope(
+        &self,
+        scope: &str,
+    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
+        for configuration in &self.language_configurations {
+            if configuration.scope.as_ref().map_or(false, |s| s == scope) {
+                let language = self.language_for_id(configuration.language_id)?;
+                return Ok(Some((language, configuration)));
+            }
+        }
+        Ok(None)
+    }
+
+    pub fn language_configuration_for_file_name(
+        &self,
+        path: &Path,
+    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
+        // Find all the language configurations that match this file name
+        // or a suffix of the file name.
+        let configuration_ids = path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
+            .or_else(|| {
+                path.extension()
+                    .and_then(|extension| extension.to_str())
+                    .and_then(|extension| {
+                        self.language_configuration_ids_by_file_type.get(extension)
+                    })
+            });
+
+        if let Some(configuration_ids) = configuration_ids {
+            if !configuration_ids.is_empty() {
+                let configuration;
+
+                // If there is only one language configuration, then use it.
+                if configuration_ids.len() == 1 {
+                    configuration = &self.language_configurations[configuration_ids[0]];
+                }
+                // If multiple language configurations match, then determine which
+                // one to use by applying the configurations' content regexes.
+                else {
+                    let file_contents = fs::read(path)
+                        .with_context(|| format!("Failed to read path {:?}", path))?;
+                    let file_contents = String::from_utf8_lossy(&file_contents);
+                    let mut best_score = -2isize;
+                    let mut best_configuration_id = None;
+                    for configuration_id in configuration_ids {
+                        let config = &self.language_configurations[*configuration_id];
+
+                        // If the language configuration has a content regex, assign
+                        // a score based on the length of the first match.
+                        let score;
+                        if let Some(content_regex) = &config.content_regex {
+                            if let Some(mat) = content_regex.find(&file_contents) {
+                                score = (mat.end() - mat.start()) as isize;
+                            }
+                            // If the content regex does not match, then *penalize* this
+                            // language configuration, so that language configurations
+                            // without content regexes are preferred over those with
+                            // non-matching content regexes.
+                            else {
+                                score = -1;
+                            }
+                        } else {
+                            score = 0;
+                        }
+                        if score > best_score {
+                            best_configuration_id = Some(*configuration_id);
+                            best_score = score;
+                        }
+                    }
+
+                    configuration = &self.language_configurations[best_configuration_id.unwrap()];
+                }
+
+                let language = self.language_for_id(configuration.language_id)?;
+                return Ok(Some((language, configuration)));
+            }
+        }
+
+        Ok(None)
+    }
+
+    pub fn language_configuration_for_injection_string(
+        &self,
+        string: &str,
+    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
+        let mut best_match_length = 0;
+        let mut best_match_position = None;
+        for (i, configuration) in self.language_configurations.iter().enumerate() {
+            if let Some(injection_regex) = &configuration.injection_regex {
+                if let Some(mat) = injection_regex.find(string) {
+                    let length = mat.end() - mat.start();
+                    if length > best_match_length {
+                        best_match_position = Some(i);
+                        best_match_length = length;
+                    }
+                }
+            }
+        }
+
+        if let Some(i) = best_match_position {
+            let configuration = &self.language_configurations[i];
+            let language = self.language_for_id(configuration.language_id)?;
+            Ok(Some((language, configuration)))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn language_for_id(&self, id: usize) -> Result<Language> {
+        let (path, language) = &self.languages_by_id[id];
+        language
+            .get_or_try_init(|| {
+                let src_path = path.join("src");
+                self.load_language_at_path(&src_path, &src_path)
+            })
+            .map(|l| *l)
+    }
+
+    pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
+        let grammar_path = src_path.join("grammar.json");
+        let parser_path = src_path.join("parser.c");
+        let mut scanner_path = src_path.join("scanner.c");
+
+        #[derive(Deserialize)]
+        struct GrammarJSON {
+            name: String,
+        }
+        let mut grammar_file =
+            fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
+        let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
+            .with_context(|| "Failed to parse grammar.json")?;
+
+        let scanner_path = if scanner_path.exists() {
+            Some(scanner_path)
+        } else {
+            scanner_path.set_extension("cc");
+            if scanner_path.exists() {
+                Some(scanner_path)
+            } else {
+                None
+            }
+        };
+
+        self.load_language_from_sources(
+            &grammar_json.name,
+            &header_path,
+            &parser_path,
+            &scanner_path,
+        )
+    }
+
+    pub fn load_language_from_sources(
+        &self,
+        name: &str,
+        header_path: &Path,
+        parser_path: &Path,
+        scanner_path: &Option<PathBuf>,
+    ) -> Result<Language> {
+        let mut lib_name = name.to_string();
+        if self.debug_build {
+            lib_name.push_str(".debug._");
+        }
+        let mut library_path = self.parser_lib_path.join(lib_name);
+        library_path.set_extension(DYLIB_EXTENSION);
+
+        let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
+            .with_context(|| "Failed to compare source and binary timestamps")?;
+
+        if recompile {
+            fs::create_dir_all(&self.parser_lib_path)?;
+            let mut config = cc::Build::new();
+            config
+                .cpp(true)
+                .opt_level(2)
+                .cargo_metadata(false)
+                .target(BUILD_TARGET)
+                .host(BUILD_TARGET);
+            let compiler = config.get_compiler();
+            let mut command = Command::new(compiler.path());
+            for (key, value) in compiler.env() {
+                command.env(key, value);
+            }
+
+            if cfg!(windows) {
+                command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
+                if self.debug_build {
+                    command.arg("/Od");
+                } else {
+                    command.arg("/O2");
+                }
+                command.arg(parser_path);
+                if let Some(scanner_path) = scanner_path.as_ref() {
+                    command.arg(scanner_path);
+                }
+                command
+                    .arg("/link")
+                    .arg(format!("/out:{}", library_path.to_str().unwrap()));
+            } else {
+                command
+                    .arg("-shared")
+                    .arg("-fPIC")
+                    .arg("-fno-exceptions")
+                    .arg("-g")
+                    .arg("-I")
+                    .arg(header_path)
+                    .arg("-o")
+                    .arg(&library_path);
+
+                if self.debug_build {
+                    command.arg("-O0");
+                } else {
+                    command.arg("-O2");
+                }
+
+                // For conditional compilation of external scanner code when
+                // used internally by `tree-siteer parse` and other sub commands.
+                command.arg("-DTREE_SITTER_INTERNAL_BUILD");
+
+                if let Some(scanner_path) = scanner_path.as_ref() {
+                    if scanner_path.extension() == Some("c".as_ref()) {
+                        command.arg("-xc").arg("-std=c99").arg(scanner_path);
+                    } else {
+                        command.arg(scanner_path);
+                    }
+                }
+                command.arg("-xc").arg(parser_path);
+            }
+
+            let output = command
+                .output()
+                .with_context(|| "Failed to execute C compiler")?;
+            if !output.status.success() {
+                return Err(anyhow!(
+                    "Parser compilation failed.\nStdout: {}\nStderr: {}",
+                    String::from_utf8_lossy(&output.stdout),
+                    String::from_utf8_lossy(&output.stderr)
+                ));
+            }
+        }
+
+        let library = unsafe { Library::new(&library_path) }
+            .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
+        let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
+        let language = unsafe {
+            let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
+                .get(language_fn_name.as_bytes())
+                .with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
+            language_fn()
+        };
+        mem::forget(library);
+        Ok(language)
+    }
+
+    pub fn highlight_config_for_injection_string<'a>(
+        &'a self,
+        string: &str,
+    ) -> Option<&'a HighlightConfiguration> {
+        match self.language_configuration_for_injection_string(string) {
+            Err(e) => {
+                eprintln!(
+                    "Failed to load language for injection string '{}': {}",
+                    string, e
+                );
+                None
+            }
+            Ok(None) => None,
+            Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
+                Err(e) => {
+                    eprintln!(
+                        "Failed to load property sheet for injection string '{}': {}",
+                        string, e
+                    );
+                    None
+                }
+                Ok(None) => None,
+                Ok(Some(config)) => Some(config),
+            },
+        }
+    }
+
+    pub fn find_language_configurations_at_path<'a>(
+        &'a mut self,
+        parser_path: &Path,
+    ) -> Result<&[LanguageConfiguration]> {
+        #[derive(Deserialize)]
+        #[serde(untagged)]
+        enum PathsJSON {
+            Empty,
+            Single(String),
+            Multiple(Vec<String>),
+        }
+
+        impl Default for PathsJSON {
+            fn default() -> Self {
+                PathsJSON::Empty
+            }
+        }
+
+        impl PathsJSON {
+            fn into_vec(self) -> Option<Vec<String>> {
+                match self {
+                    PathsJSON::Empty => None,
+                    PathsJSON::Single(s) => Some(vec![s]),
+                    PathsJSON::Multiple(s) => Some(s),
+                }
+            }
+        }
+
+        #[derive(Deserialize)]
+        struct LanguageConfigurationJSON {
+            #[serde(default)]
+            path: PathBuf,
+            scope: Option<String>,
+            #[serde(rename = "file-types")]
+            file_types: Option<Vec<String>>,
+            #[serde(rename = "content-regex")]
+            content_regex: Option<String>,
+            #[serde(rename = "first-line-regex")]
+            first_line_regex: Option<String>,
+            #[serde(rename = "injection-regex")]
+            injection_regex: Option<String>,
+            #[serde(default)]
+            highlights: PathsJSON,
+            #[serde(default)]
+            injections: PathsJSON,
+            #[serde(default)]
+            locals: PathsJSON,
+            #[serde(default)]
+            tags: PathsJSON,
+        }
+
+        #[derive(Deserialize)]
+        struct PackageJSON {
+            #[serde(default)]
+            #[serde(rename = "tree-sitter")]
+            tree_sitter: Vec<LanguageConfigurationJSON>,
+        }
+
+        let initial_language_configuration_count = self.language_configurations.len();
+
+        if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) {
+            let package_json = serde_json::from_str::<PackageJSON>(&package_json_contents);
+            if let Ok(package_json) = package_json {
+                let language_count = self.languages_by_id.len();
+                for config_json in package_json.tree_sitter {
+                    // Determine the path to the parser directory. This can be specified in
+                    // the package.json, but defaults to the directory containing the package.json.
+                    let language_path = parser_path.join(config_json.path);
+
+                    // Determine if a previous language configuration in this package.json file
+                    // already uses the same language.
+                    let mut language_id = None;
+                    for (id, (path, _)) in
+                        self.languages_by_id.iter().enumerate().skip(language_count)
+                    {
+                        if language_path == *path {
+                            language_id = Some(id);
+                        }
+                    }
+
+                    // If not, add a new language path to the list.
+                    let language_id = language_id.unwrap_or_else(|| {
+                        self.languages_by_id.push((language_path, OnceCell::new()));
+                        self.languages_by_id.len() - 1
+                    });
+
+                    let configuration = LanguageConfiguration {
+                        root_path: parser_path.to_path_buf(),
+                        scope: config_json.scope,
+                        language_id,
+                        file_types: config_json.file_types.unwrap_or(Vec::new()),
+                        content_regex: Self::regex(config_json.content_regex),
+                        _first_line_regex: Self::regex(config_json.first_line_regex),
+                        injection_regex: Self::regex(config_json.injection_regex),
+                        injections_filenames: config_json.injections.into_vec(),
+                        locals_filenames: config_json.locals.into_vec(),
+                        tags_filenames: config_json.tags.into_vec(),
+                        highlights_filenames: config_json.highlights.into_vec(),
+                        highlight_config: OnceCell::new(),
+                        tags_config: OnceCell::new(),
+                        highlight_names: &*self.highlight_names,
+                        use_all_highlight_names: self.use_all_highlight_names,
+                    };
+
+                    for file_type in &configuration.file_types {
+                        self.language_configuration_ids_by_file_type
+                            .entry(file_type.to_string())
+                            .or_insert(Vec::new())
+                            .push(self.language_configurations.len());
+                    }
+
+                    self.language_configurations
+                        .push(unsafe { mem::transmute(configuration) });
+                }
+            }
+        }
+
+        if self.language_configurations.len() == initial_language_configuration_count
+            && parser_path.join("src").join("grammar.json").exists()
+        {
+            let configuration = LanguageConfiguration {
+                root_path: parser_path.to_owned(),
+                language_id: self.languages_by_id.len(),
+                file_types: Vec::new(),
+                scope: None,
+                content_regex: None,
+                _first_line_regex: None,
+                injection_regex: None,
+                injections_filenames: None,
+                locals_filenames: None,
+                highlights_filenames: None,
+                tags_filenames: None,
+                highlight_config: OnceCell::new(),
+                tags_config: OnceCell::new(),
+                highlight_names: &*self.highlight_names,
+                use_all_highlight_names: self.use_all_highlight_names,
+            };
+            self.language_configurations
+                .push(unsafe { mem::transmute(configuration) });
+            self.languages_by_id
+                .push((parser_path.to_owned(), OnceCell::new()));
+        }
+
+        Ok(&self.language_configurations[initial_language_configuration_count..])
+    }
+
+    fn regex(pattern: Option<String>) -> Option<Regex> {
+        pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok())
+    }
+
+    pub fn select_language(
+        &mut self,
+        path: &Path,
+        current_dir: &Path,
+        scope: Option<&str>,
+    ) -> Result<Language> {
+        if let Some(scope) = scope {
+            if let Some(config) = self
+                .language_configuration_for_scope(scope)
+                .with_context(|| format!("Failed to load language for scope '{}'", scope))?
+            {
+                Ok(config.0)
+            } else {
+                return Err(anyhow!("Unknown scope '{}'", scope));
+            }
+        } else if let Some((lang, _)) = self
+            .language_configuration_for_file_name(path)
+            .with_context(|| {
+                format!(
+                    "Failed to load language for file name {}",
+                    &path.file_name().unwrap().to_string_lossy()
+                )
+            })?
+        {
+            Ok(lang)
+        } else if let Some(lang) = self
+            .languages_at_path(&current_dir)
+            .with_context(|| "Failed to load language in current directory")?
+            .first()
+            .cloned()
+        {
+            Ok(lang)
+        } else {
+            Err(anyhow!("No language found"))
+        }
+    }
+
+    pub fn use_debug_build(&mut self, flag: bool) {
+        self.debug_build = flag;
+    }
+}
+
+impl<'a> LanguageConfiguration<'a> {
+    pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
+        return self
+            .highlight_config
+            .get_or_try_init(|| {
+                let (highlights_query, highlight_ranges) =
+                    self.read_queries(&self.highlights_filenames, "highlights.scm")?;
+                let (injections_query, injection_ranges) =
+                    self.read_queries(&self.injections_filenames, "injections.scm")?;
+                let (locals_query, locals_ranges) =
+                    self.read_queries(&self.locals_filenames, "locals.scm")?;
+
+                if highlights_query.is_empty() {
+                    Ok(None)
+                } else {
+                    let mut result = HighlightConfiguration::new(
+                        language,
+                        &highlights_query,
+                        &injections_query,
+                        &locals_query,
+                    )
+                    .map_err(|error| match error.kind {
+                        QueryErrorKind::Language => Error::from(error),
+                        _ => {
+                            if error.offset < injections_query.len() {
+                                Self::include_path_in_query_error(
+                                    error,
+                                    &injection_ranges,
+                                    &injections_query,
+                                    0,
+                                )
+                            } else if error.offset < injections_query.len() + locals_query.len() {
+                                Self::include_path_in_query_error(
+                                    error,
+                                    &locals_ranges,
+                                    &locals_query,
+                                    injections_query.len(),
+                                )
+                            } else {
+                                Self::include_path_in_query_error(
+                                    error,
+                                    &highlight_ranges,
+                                    &highlights_query,
+                                    injections_query.len() + locals_query.len(),
+                                )
+                            }
+                        }
+                    })?;
+                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
+                    if self.use_all_highlight_names {
+                        for capture_name in result.query.capture_names() {
+                            if !all_highlight_names.contains(capture_name) {
+                                all_highlight_names.push(capture_name.clone());
+                            }
+                        }
+                    }
+                    result.configure(&all_highlight_names.as_slice());
+                    Ok(Some(result))
+                }
+            })
+            .map(Option::as_ref);
+    }
+
+    pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
+        self.tags_config
+            .get_or_try_init(|| {
+                let (tags_query, tags_ranges) =
+                    self.read_queries(&self.tags_filenames, "tags.scm")?;
+                let (locals_query, locals_ranges) =
+                    self.read_queries(&self.locals_filenames, "locals.scm")?;
+                if tags_query.is_empty() {
+                    Ok(None)
+                } else {
+                    TagsConfiguration::new(language, &tags_query, &locals_query)
+                        .map(Some)
+                        .map_err(|error| {
+                            if let TagsError::Query(error) = error {
+                                if error.offset < locals_query.len() {
+                                    Self::include_path_in_query_error(
+                                        error,
+                                        &locals_ranges,
+                                        &locals_query,
+                                        0,
+                                    )
+                                } else {
+                                    Self::include_path_in_query_error(
+                                        error,
+                                        &tags_ranges,
+                                        &tags_query,
+                                        locals_query.len(),
+                                    )
+                                }
+                                .into()
+                            } else {
+                                error.into()
+                            }
+                        })
+                }
+            })
+            .map(Option::as_ref)
+    }
+
+    fn include_path_in_query_error<'b>(
+        mut error: QueryError,
+        ranges: &'b Vec<(String, Range<usize>)>,
+        source: &str,
+        start_offset: usize,
+    ) -> Error {
+        let offset_within_section = error.offset - start_offset;
+        let (path, range) = ranges
+            .iter()
+            .find(|(_, range)| range.contains(&offset_within_section))
+            .unwrap();
+        error.offset = offset_within_section - range.start;
+        error.row = source[range.start..offset_within_section]
+            .chars()
+            .filter(|c| *c == '\n')
+            .count();
+        Error::from(error).context(format!("Error in query file {:?}", path))
+    }
+
+    fn read_queries(
+        &self,
+        paths: &Option<Vec<String>>,
+        default_path: &str,
+    ) -> Result<(String, Vec<(String, Range<usize>)>)> {
+        let mut query = String::new();
+        let mut path_ranges = Vec::new();
+        if let Some(paths) = paths.as_ref() {
+            for path in paths {
+                let abs_path = self.root_path.join(path);
+                let prev_query_len = query.len();
+                query += &fs::read_to_string(&abs_path)
+                    .with_context(|| format!("Failed to read query file {:?}", path))?;
+                path_ranges.push((path.clone(), prev_query_len..query.len()));
+            }
+        } else {
+            let queries_path = self.root_path.join("queries");
+            let path = queries_path.join(default_path);
+            if path.exists() {
+                query = fs::read_to_string(&path)
+                    .with_context(|| format!("Failed to read query file {:?}", path))?;
+                path_ranges.push((default_path.to_string(), 0..query.len()));
+            }
+        }
+
+        Ok((query, path_ranges))
+    }
+}
+
+fn needs_recompile(
+    lib_path: &Path,
+    parser_c_path: &Path,
+    scanner_path: &Option<PathBuf>,
+) -> Result<bool> {
+    if !lib_path.exists() {
+        return Ok(true);
+    }
+    let lib_mtime = mtime(lib_path)?;
+    if mtime(parser_c_path)? > lib_mtime {
+        return Ok(true);
+    }
+    if let Some(scanner_path) = scanner_path {
+        if mtime(scanner_path)? > lib_mtime {
+            return Ok(true);
+        }
+    }
+    Ok(false)
+}
+
+fn mtime(path: &Path) -> Result<SystemTime> {
+    Ok(fs::metadata(path)?.modified()?)
+}
+
+fn replace_dashes_with_underscores(name: &str) -> String {
+    let mut result = String::with_capacity(name.len());
+    for c in name.chars() {
+        if c == '-' {
+            result.push('_');
+        } else {
+            result.push(c);
+        }
+    }
+    result
+}
--- a/third-party/tree-sitter/tree-sitter/cli/npm/.gitignore
+++ b/third-party/tree-sitter/tree-sitter/cli/npm/.gitignore
@ -0,0 +1,5 @@
+tree-sitter
+tree-sitter.exe
+*.gz
+*.tgz
+LICENSE
--- a/third-party/tree-sitter/tree-sitter/cli/npm/cli.js
+++ b/third-party/tree-sitter/tree-sitter/cli/npm/cli.js
@ -0,0 +1,12 @@
+#!/usr/bin/env node
+
+const path = require('path');
+const spawn = require("child_process").spawn;
+const executable = process.platform === 'win32'
+  ? 'tree-sitter.exe'
+  : 'tree-sitter';
+spawn(
+  path.join(__dirname, executable),
+  process.argv.slice(2),
+  {stdio: 'inherit'}
+).on('close', process.exit)
--- a/third-party/tree-sitter/tree-sitter/cli/npm/dsl.d.ts
+++ b/third-party/tree-sitter/tree-sitter/cli/npm/dsl.d.ts
@ -0,0 +1,369 @@
+type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
+type BlankRule = {type: 'BLANK'};
+type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
+type FieldRule = {type: 'FIELD'; name: string; content: Rule};
+type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
+type PatternRule = {type: 'PATTERN'; value: string};
+type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
+type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
+type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
+type PrecRule = {type: 'PREC'; content: Rule; value: number};
+type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
+type RepeatRule = {type: 'REPEAT'; content: Rule};
+type SeqRule = {type: 'SEQ'; members: Rule[]};
+type StringRule = {type: 'STRING'; value: string};
+type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
+type TokenRule = {type: 'TOKEN'; content: Rule};
+
+type Rule =
+  | AliasRule
+  | BlankRule
+  | ChoiceRule
+  | FieldRule
+  | ImmediateTokenRule
+  | PatternRule
+  | PrecDynamicRule
+  | PrecLeftRule
+  | PrecRightRule
+  | PrecRule
+  | Repeat1Rule
+  | RepeatRule
+  | SeqRule
+  | StringRule
+  | SymbolRule<string>
+  | TokenRule;
+
+type RuleOrLiteral = Rule | RegExp | string;
+
+type GrammarSymbols<RuleName extends string> = {
+  [name in RuleName]: SymbolRule<name>;
+} &
+  Record<string, SymbolRule<string>>;
+
+type RuleBuilder<RuleName extends string> = (
+  $: GrammarSymbols<RuleName>,
+) => RuleOrLiteral;
+
+type RuleBuilders<
+  RuleName extends string,
+  BaseGrammarRuleName extends string
+> = {
+  [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
+};
+
+interface Grammar<
+  RuleName extends string,
+  BaseGrammarRuleName extends string = never,
+  Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
+    RuleName,
+    BaseGrammarRuleName
+  >
+> {
+  /**
+   * Name of the grammar language.
+   */
+  name: string;
+
+  /** Mapping of grammar rule names to rule builder functions. */
+  rules: Rules;
+
+  /**
+   * An array of arrays of precedence names. Each inner array represents
+   * a *descending* ordering. Names listed earlier in one of these arrays
+   * have higher precedence than any names listed later in the same array.
+   */
+  precedences?: () => String[][],
+
+  /**
+   * An array of arrays of rule names. Each inner array represents a set of
+   * rules that's involved in an _LR(1) conflict_ that is _intended to exist_
+   * in the grammar. When these conflicts occur at runtime, Tree-sitter will
+   * use the GLR algorithm to explore all of the possible interpretations. If
+   * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
+   * whose corresponding rule has the highest total _dynamic precedence_.
+   *
+   * @param $ grammar rules
+   */
+  conflicts?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[][];
+
+  /**
+   * An array of token names which can be returned by an _external scanner_.
+   * External scanners allow you to write custom C code which runs during the
+   * lexing process in order to handle lexical rules (e.g. Python's indentation
+   * tokens) that cannot be described by regular expressions.
+   *
+   * @param $ grammar rules
+   * @param previous array of externals from the base schema, if any
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+   */
+  externals?: (
+    $: Record<string, SymbolRule<string>>,
+    previous: Rule[],
+  ) => SymbolRule<string>[];
+
+  /**
+   * An array of tokens that may appear anywhere in the language. This
+   * is often used for whitespace and comments. The default value of
+   * extras is to accept whitespace. To control whitespace explicitly,
+   * specify extras: `$ => []` in your grammar.
+   *
+   *  @param $ grammar rules
+   */
+  extras?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * An array of rules that should be automatically removed from the
+   * grammar by replacing all of their usages with a copy of their definition.
+   * This is useful for rules that are used in multiple places but for which
+   * you don't want to create syntax tree nodes at runtime.
+   *
+   * @param $ grammar rules
+   */
+  inline?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * A list of hidden rule names that should be considered supertypes in the
+   * generated node types file.
+   *
+   * @param $ grammar rules
+   *
+   * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+   */
+  supertypes?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * The name of a token that will match keywords for the purpose of the
+   * keyword extraction optimization.
+   *
+   * @param $ grammar rules
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
+   */
+  word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
+}
+
+type GrammarSchema<RuleName extends string> = {
+  [K in keyof Grammar<RuleName>]: K extends 'rules'
+    ? Record<RuleName, Rule>
+    : Grammar<RuleName>[K];
+};
+
+/**
+ * Causes the given rule to appear with an alternative name in the syntax tree.
+ * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
+ * anonymous node, as if the rule had been written as the simple string.
+ *
+ * @param rule rule that will be aliased
+ * @param name target name for the alias
+ */
+declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
+
+/**
+ * Causes the given rule to appear as an alternative named node, for instance
+ * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
+ * node called `bar`.
+ *
+ * @param rule rule that will be aliased
+ * @param symbol target symbol for the alias
+ */
+declare function alias(
+  rule: RuleOrLiteral,
+  symbol: SymbolRule<string>,
+): AliasRule;
+
+/**
+ * Creates a blank rule, matching nothing.
+ */
+declare function blank(): BlankRule;
+
+/**
+ * Assigns a field name to the child node(s) matched by the given rule.
+ * In the resulting syntax tree, you can then use that field name to
+ * access specific children.
+ *
+ * @param name name of the field
+ * @param rule rule the field should match
+ */
+declare function field(name: string, rule: RuleOrLiteral): FieldRule;
+
+/**
+ * Creates a rule that matches one of a set of possible rules. The order
+ * of the arguments does not matter. This is analogous to the `|` (pipe)
+ * operator in EBNF notation.
+ *
+ * @param options possible rule choices
+ */
+declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
+
+/**
+ * Creates a rule that matches zero or one occurrence of a given rule.
+ * It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
+ *
+ * @param value rule to be made optional
+ */
+declare function optional(rule: RuleOrLiteral): ChoiceRule;
+
+/**
+ * Marks the given rule with a precedence which will be used to resolve LR(1)
+ * conflicts at parser-generation time. When two rules overlap in a way that
+ * represents either a true ambiguity or a _local_ ambiguity given one token
+ * of lookahead, Tree-sitter will try to resolve the conflict by matching the
+ * rule with the higher precedence.
+ *
+ * Precedence values can either be strings or numbers. When comparing rules
+ * with numerical precedence, higher numbers indicate higher precedences. To
+ * compare rules with string precedence, Tree-sitter uses the grammar's `precedences`
+ * field.
+ *
+ * rules is zero. This works similarly to the precedence directives in Yacc grammars.
+ *
+ * @param value precedence weight
+ * @param rule rule being weighted
+ *
+ * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
+ * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+ */
+declare const prec: {
+  (value: String | number, rule: RuleOrLiteral): PrecRule;
+
+  /**
+   * Marks the given rule as left-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a left-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _earlier_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param value (optional) precedence weight
+   * @param rule rule to mark as left-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  left(rule: RuleOrLiteral): PrecLeftRule;
+  left(value: String | number, rule: RuleOrLiteral): PrecLeftRule;
+
+  /**
+   * Marks the given rule as right-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a right-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _later_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param value (optional) precedence weight
+   * @param rule rule to mark as right-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  right(rule: RuleOrLiteral): PrecRightRule;
+  right(value: String | number, rule: RuleOrLiteral): PrecRightRule;
+
+  /**
+   * Marks the given rule with a numerical precedence which will be used to
+   * resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
+   * This is only necessary when handling a conflict dynamically using the
+   * `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
+   * multiple rules correctly match a given piece of code. In that event,
+   * Tree-sitter compares the total dynamic precedence associated with each
+   * rule, and selects the one with the highest total. This is similar to
+   * dynamic precedence directives in Bison grammars.
+   *
+   * @param value precedence weight
+   * @param rule rule being weighted
+   *
+   * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
+   */
+  dynamic(value: String | number, rule: RuleOrLiteral): PrecDynamicRule;
+};
+
+/**
+ * Creates a rule that matches _zero-or-more_ occurrences of a given rule.
+ * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
+ * rule is implemented in terms of `repeat1` but is included because it
+ * is very commonly used.
+ *
+ * @param rule rule to repeat, zero or more times
+ */
+declare function repeat(rule: RuleOrLiteral): RepeatRule;
+
+/**
+ * Creates a rule that matches one-or-more occurrences of a given rule.
+ *
+ * @param rule rule to repeat, one or more times
+ */
+declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
+
+/**
+ * Creates a rule that matches any number of other rules, one after another.
+ * It is analogous to simply writing multiple symbols next to each other
+ * in EBNF notation.
+ *
+ * @param rules ordered rules that comprise the sequence
+ */
+declare function seq(...rules: RuleOrLiteral[]): SeqRule;
+
+/**
+ * Creates a symbol rule, representing another rule in the grammar by name.
+ *
+ * @param name name of the target rule
+ */
+declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
+
+/**
+ * Marks the given rule as producing only a single token. Tree-sitter's
+ * default is to treat each String or RegExp literal in the grammar as a
+ * separate token. Each token is matched separately by the lexer and
+ * returned as its own leaf node in the tree. The token function allows
+ * you to express a complex rule using the DSL functions (rather
+ * than as a single regular expression) but still have Tree-sitter treat
+ * it as a single token.
+ *
+ * @param rule rule to represent as a single token
+ */
+declare const token: {
+  (rule: RuleOrLiteral): TokenRule;
+
+  /**
+   * Marks the given rule as producing an immediate token. This allows
+   * the parser to produce a different token based on whether or not
+   * there are `extras` preceding the token's main content. When there
+   * are _no_ leading `extras`, an immediate token is preferred over a
+   * normal token which would otherwise match.
+   *
+   * @param rule rule to represent as an immediate token
+   */
+  immediate(rule: RuleOrLiteral): ImmediateTokenRule;
+};
+
+/**
+ * Creates a new language grammar with the provided schema.
+ *
+ * @param options grammar options
+ */
+declare function grammar<RuleName extends string>(
+  options: Grammar<RuleName>,
+): GrammarSchema<RuleName>;
+
+/**
+ * Extends an existing language grammar with the provided options,
+ * creating a new language.
+ *
+ * @param baseGrammar base grammar schema to extend from
+ * @param options grammar options for the new extended language
+ */
+declare function grammar<
+  BaseGrammarRuleName extends string,
+  RuleName extends string
+>(
+  baseGrammar: GrammarSchema<BaseGrammarRuleName>,
+  options: Grammar<RuleName, BaseGrammarRuleName>,
+): GrammarSchema<RuleName | BaseGrammarRuleName>;
--- a/third-party/tree-sitter/tree-sitter/cli/npm/install.js
+++ b/third-party/tree-sitter/tree-sitter/cli/npm/install.js
@ -0,0 +1,93 @@
+#!/usr/bin/env node
+
+const fs = require('fs');
+const zlib = require('zlib');
+const http = require('http');
+const https = require('https');
+const packageJSON = require('./package.json');
+
+// Determine the URL of the file.
+const platformName = {
+  'darwin': 'macos',
+  'linux': 'linux',
+  'win32': 'windows'
+}[process.platform];
+
+let archName = {
+  'x64': 'x64',
+  'x86': 'x86',
+  'ia32': 'x86'
+}[process.arch];
+
+// ARM macs can run x64 binaries via Rosetta. Rely on that for now.
+if (platformName === 'macos' && process.arch === 'arm64') {
+  archName = 'x64';
+}
+
+if (!platformName || !archName) {
+  console.error(
+    `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}`
+  );
+  process.exit(1);
+}
+
+const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`;
+const assetName = `tree-sitter-${platformName}-${archName}.gz`;
+const assetURL = `${releaseURL}/${assetName}`;
+
+// Remove previously-downloaded files.
+const executableName = process.platform === 'win32' ? 'tree-sitter.exe' : 'tree-sitter';
+if (fs.existsSync(executableName)) {
+  fs.unlinkSync(executableName);
+}
+
+// Download the compressed file.
+console.log(`Downloading ${assetURL}`);
+const file = fs.createWriteStream(executableName);
+get(assetURL, response => {
+  if (response.statusCode > 299) {
+    console.error([
+      'Download failed',
+      '',
+      `url: ${assetURL}`,
+      `status: ${response.statusCode}`,
+      `headers: ${JSON.stringify(response.headers, null, 2)}`,
+      '',
+    ].join('\n'));
+    process.exit(1);
+  }
+  response.pipe(zlib.createGunzip()).pipe(file);
+});
+
+file.on('finish', () => {
+  fs.chmodSync(executableName, '755');
+});
+
+// Follow redirects.
+function get(url, callback) {
+  const requestUrl = new URL(url)
+  let request = https
+  let requestConfig = requestUrl
+  const proxyEnv = process.env['HTTPS_PROXY'] || process.env['https_proxy']
+
+  if (proxyEnv) {
+    const proxyUrl = new URL(proxyEnv)
+    request = proxyUrl.protocol === 'https:' ? https : http
+    requestConfig = {
+      hostname: proxyUrl.hostname,
+      port: proxyUrl.port,
+      path: requestUrl.toString(),
+      headers: {
+        Host: requestUrl.hostname
+      }
+    }
+  }
+
+  request.get(requestConfig, response => {
+    if (response.statusCode === 301 || response.statusCode === 302) {
+      get(response.headers.location, callback);
+    } else {
+      callback(response);
+    }
+  });
+}
--- a/third-party/tree-sitter/tree-sitter/cli/npm/package.json
+++ b/third-party/tree-sitter/tree-sitter/cli/npm/package.json
@ -0,0 +1,23 @@
+{
+  "name": "tree-sitter-cli",
+  "version": "0.20.8",
+  "author": "Max Brunsfeld",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "http://github.com/tree-sitter/tree-sitter.git"
+  },
+  "description": "CLI for generating fast incremental parsers",
+  "keywords": [
+    "parser",
+    "lexer"
+  ],
+  "main": "lib/api/index.js",
+  "scripts": {
+    "install": "node install.js",
+    "prepack": "cp ../../LICENSE ."
+  },
+  "bin": {
+    "tree-sitter": "cli.js"
+  }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/binding_files.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/binding_files.rs
@ -0,0 +1,154 @@
+use super::write_file;
+use anyhow::{Context, Result};
+use std::path::{Path, PathBuf};
+use std::{fs, str};
+
+const BINDING_CC_TEMPLATE: &'static str = include_str!("./templates/binding.cc");
+const BINDING_GYP_TEMPLATE: &'static str = include_str!("./templates/binding.gyp");
+const INDEX_JS_TEMPLATE: &'static str = include_str!("./templates/index.js");
+const LIB_RS_TEMPLATE: &'static str = include_str!("./templates/lib.rs");
+const BUILD_RS_TEMPLATE: &'static str = include_str!("./templates/build.rs");
+const CARGO_TOML_TEMPLATE: &'static str = include_str!("./templates/cargo.toml");
+const PACKAGE_JSON_TEMPLATE: &'static str = include_str!("./templates/package.json");
+const PARSER_NAME_PLACEHOLDER: &'static str = "PARSER_NAME";
+const CLI_VERSION_PLACEHOLDER: &'static str = "CLI_VERSION";
+const CLI_VERSION: &'static str = env!("CARGO_PKG_VERSION");
+const RUST_BINDING_VERSION: &'static str = env!("RUST_BINDING_VERSION");
+const RUST_BINDING_VERSION_PLACEHOLDER: &'static str = "RUST_BINDING_VERSION";
+
+pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<()> {
+    let bindings_dir = repo_path.join("bindings");
+
+    let dashed_language_name = language_name.replace("_", "-");
+    let dashed_language_name = dashed_language_name.as_str();
+
+    // Generate rust bindings if needed.
+    let rust_binding_dir = bindings_dir.join("rust");
+    create_path(&rust_binding_dir, |path| create_dir(path))?;
+
+    create_path(&rust_binding_dir.join("lib.rs").to_owned(), |path| {
+        generate_file(path, LIB_RS_TEMPLATE, language_name)
+    })?;
+
+    create_path(&rust_binding_dir.join("build.rs").to_owned(), |path| {
+        generate_file(path, BUILD_RS_TEMPLATE, language_name)
+    })?;
+
+    create_path(&repo_path.join("Cargo.toml").to_owned(), |path| {
+        generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name)
+    })?;
+
+    // Generate node bindings
+    let node_binding_dir = bindings_dir.join("node");
+    create_path(&node_binding_dir, |path| create_dir(path))?;
+
+    create_path(&node_binding_dir.join("index.js").to_owned(), |path| {
+        generate_file(path, INDEX_JS_TEMPLATE, language_name)
+    })?;
+
+    create_path(&node_binding_dir.join("binding.cc").to_owned(), |path| {
+        generate_file(path, BINDING_CC_TEMPLATE, language_name)
+    })?;
+
+    // Create binding.gyp, or update it with new binding path.
+    let binding_gyp_path = repo_path.join("binding.gyp");
+    create_path_else(
+        &binding_gyp_path,
+        |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name),
+        |path| {
+            let binding_gyp =
+                fs::read_to_string(path).with_context(|| "Failed to read binding.gyp")?;
+            let old_path = "\"src/binding.cc\"";
+            if binding_gyp.contains(old_path) {
+                eprintln!("Updating binding.gyp with new binding path");
+                let binding_gyp = binding_gyp.replace(old_path, "\"bindings/node/binding.cc\"");
+                write_file(path, binding_gyp)?;
+            }
+            Ok(())
+        },
+    )?;
+
+    // Create package.json, or update it with new binding path.
+    let package_json_path = repo_path.join("package.json");
+    create_path_else(
+        &package_json_path,
+        |path| generate_file(path, PACKAGE_JSON_TEMPLATE, dashed_language_name),
+        |path| {
+            let package_json_str =
+                fs::read_to_string(path).with_context(|| "Failed to read package.json")?;
+            let mut package_json =
+                serde_json::from_str::<serde_json::Map<String, serde_json::Value>>(
+                    &package_json_str,
+                )
+                .with_context(|| "Failed to parse package.json")?;
+            let package_json_main = package_json.get("main");
+            let package_json_needs_update = package_json_main.map_or(true, |v| {
+                let main_string = v.as_str();
+                main_string == Some("index.js") || main_string == Some("./index.js")
+            });
+            if package_json_needs_update {
+                eprintln!("Updating package.json with new binding path");
+                package_json.insert(
+                    "main".to_string(),
+                    serde_json::Value::String("bindings/node".to_string()),
+                );
+                let mut package_json_str = serde_json::to_string_pretty(&package_json)?;
+                package_json_str.push('\n');
+                write_file(path, package_json_str)?;
+            }
+            Ok(())
+        },
+    )?;
+
+    // Remove files from old node binding paths.
+    let old_index_js_path = repo_path.join("index.js");
+    let old_binding_cc_path = repo_path.join("src").join("binding.cc");
+    if old_index_js_path.exists() {
+        fs::remove_file(old_index_js_path).ok();
+    }
+    if old_binding_cc_path.exists() {
+        fs::remove_file(old_binding_cc_path).ok();
+    }
+
+    Ok(())
+}
+
+fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()> {
+    write_file(
+        path,
+        template
+            .replace(PARSER_NAME_PLACEHOLDER, language_name)
+            .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION)
+            .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION),
+    )
+}
+
+fn create_dir(path: &Path) -> Result<()> {
+    fs::create_dir_all(&path)
+        .with_context(|| format!("Failed to create {:?}", path.to_string_lossy()))
+}
+
+fn create_path<F>(path: &PathBuf, action: F) -> Result<bool>
+where
+    F: Fn(&PathBuf) -> Result<()>,
+{
+    if !path.exists() {
+        action(path)?;
+        return Ok(true);
+    }
+    Ok(false)
+}
+
+fn create_path_else<T, F>(path: &PathBuf, action: T, else_action: F) -> Result<bool>
+where
+    T: Fn(&PathBuf) -> Result<()>,
+    F: Fn(&PathBuf) -> Result<()>,
+{
+    if !path.exists() {
+        action(path)?;
+        return Ok(true);
+    } else {
+        else_action(path)?;
+    }
+    Ok(false)
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_lex_table.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_lex_table.rs
@ -0,0 +1,379 @@
+use super::coincident_tokens::CoincidentTokenIndex;
+use super::token_conflicts::TokenConflictMap;
+use crate::generate::dedup::split_state_id_groups;
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::NfaCursor;
+use crate::generate::rules::{Symbol, TokenSet};
+use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
+use log::info;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, VecDeque};
+use std::mem;
+
+pub(crate) fn build_lex_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    keywords: &TokenSet,
+    coincident_token_index: &CoincidentTokenIndex,
+    token_conflict_map: &TokenConflictMap,
+) -> (LexTable, LexTable) {
+    let keyword_lex_table;
+    if syntax_grammar.word_token.is_some() {
+        let mut builder = LexTableBuilder::new(lexical_grammar);
+        builder.add_state_for_tokens(keywords);
+        keyword_lex_table = builder.table;
+    } else {
+        keyword_lex_table = LexTable::default();
+    }
+
+    let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec<ParseStateId>)> = Vec::new();
+    for (i, state) in parse_table.states.iter().enumerate() {
+        let tokens = state
+            .terminal_entries
+            .keys()
+            .filter_map(|token| {
+                if token.is_terminal() {
+                    if keywords.contains(&token) {
+                        syntax_grammar.word_token
+                    } else {
+                        Some(*token)
+                    }
+                } else if token.is_eof() {
+                    Some(*token)
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        let mut did_merge = false;
+        for entry in parse_state_ids_by_token_set.iter_mut() {
+            if merge_token_set(
+                &mut entry.0,
+                &tokens,
+                lexical_grammar,
+                token_conflict_map,
+                coincident_token_index,
+            ) {
+                did_merge = true;
+                entry.1.push(i);
+                break;
+            }
+        }
+
+        if !did_merge {
+            parse_state_ids_by_token_set.push((tokens, vec![i]));
+        }
+    }
+
+    let mut builder = LexTableBuilder::new(lexical_grammar);
+    for (tokens, parse_state_ids) in parse_state_ids_by_token_set {
+        let lex_state_id = builder.add_state_for_tokens(&tokens);
+        for id in parse_state_ids {
+            parse_table.states[id].lex_state_id = lex_state_id;
+        }
+    }
+
+    let mut table = builder.table;
+    minimize_lex_table(&mut table, parse_table);
+    sort_states(&mut table, parse_table);
+    (table, keyword_lex_table)
+}
+
+struct QueueEntry {
+    state_id: usize,
+    nfa_states: Vec<u32>,
+    eof_valid: bool,
+}
+
+struct LexTableBuilder<'a> {
+    lexical_grammar: &'a LexicalGrammar,
+    cursor: NfaCursor<'a>,
+    table: LexTable,
+    state_queue: VecDeque<QueueEntry>,
+    state_ids_by_nfa_state_set: HashMap<(Vec<u32>, bool), usize>,
+}
+
+impl<'a> LexTableBuilder<'a> {
+    fn new(lexical_grammar: &'a LexicalGrammar) -> Self {
+        Self {
+            lexical_grammar,
+            cursor: NfaCursor::new(&lexical_grammar.nfa, vec![]),
+            table: LexTable::default(),
+            state_queue: VecDeque::new(),
+            state_ids_by_nfa_state_set: HashMap::new(),
+        }
+    }
+
+    fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
+        let mut eof_valid = false;
+        let nfa_states = tokens
+            .iter()
+            .filter_map(|token| {
+                if token.is_terminal() {
+                    Some(self.lexical_grammar.variables[token.index].start_state)
+                } else {
+                    eof_valid = true;
+                    None
+                }
+            })
+            .collect();
+        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
+
+        if is_new {
+            info!(
+                "entry point state: {}, tokens: {:?}",
+                state_id,
+                tokens
+                    .iter()
+                    .map(|t| &self.lexical_grammar.variables[t.index].name)
+                    .collect::<Vec<_>>()
+            );
+        }
+
+        while let Some(QueueEntry {
+            state_id,
+            nfa_states,
+            eof_valid,
+        }) = self.state_queue.pop_front()
+        {
+            self.populate_state(state_id, nfa_states, eof_valid);
+        }
+        state_id
+    }
+
+    fn add_state(&mut self, nfa_states: Vec<u32>, eof_valid: bool) -> (usize, bool) {
+        self.cursor.reset(nfa_states);
+        match self
+            .state_ids_by_nfa_state_set
+            .entry((self.cursor.state_ids.clone(), eof_valid))
+        {
+            Entry::Occupied(o) => (*o.get(), false),
+            Entry::Vacant(v) => {
+                let state_id = self.table.states.len();
+                self.table.states.push(LexState::default());
+                self.state_queue.push_back(QueueEntry {
+                    state_id,
+                    nfa_states: v.key().0.clone(),
+                    eof_valid,
+                });
+                v.insert(state_id);
+                (state_id, true)
+            }
+        }
+    }
+
+    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>, eof_valid: bool) {
+        self.cursor.force_reset(nfa_states);
+
+        // The EOF state is represented as an empty list of NFA states.
+        let mut completion = None;
+        for (id, prec) in self.cursor.completions() {
+            if let Some((prev_id, prev_precedence)) = completion {
+                if TokenConflictMap::prefer_token(
+                    self.lexical_grammar,
+                    (prev_precedence, prev_id),
+                    (prec, id),
+                ) {
+                    continue;
+                }
+            }
+            completion = Some((id, prec));
+        }
+
+        let transitions = self.cursor.transitions();
+        let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
+
+        // If EOF is a valid lookahead token, add a transition predicated on the null
+        // character that leads to the empty set of NFA states.
+        if eof_valid {
+            let (next_state_id, _) = self.add_state(Vec::new(), false);
+            self.table.states[state_id].eof_action = Some(AdvanceAction {
+                state: next_state_id,
+                in_main_token: true,
+            });
+        }
+
+        for transition in transitions {
+            if let Some((completed_id, completed_precedence)) = completion {
+                if !TokenConflictMap::prefer_transition(
+                    &self.lexical_grammar,
+                    &transition,
+                    completed_id,
+                    completed_precedence,
+                    has_sep,
+                ) {
+                    continue;
+                }
+            }
+
+            let (next_state_id, _) =
+                self.add_state(transition.states, eof_valid && transition.is_separator);
+            self.table.states[state_id].advance_actions.push((
+                transition.characters,
+                AdvanceAction {
+                    state: next_state_id,
+                    in_main_token: !transition.is_separator,
+                },
+            ));
+        }
+
+        if let Some((complete_id, _)) = completion {
+            self.table.states[state_id].accept_action = Some(Symbol::terminal(complete_id));
+        } else if self.cursor.state_ids.is_empty() {
+            self.table.states[state_id].accept_action = Some(Symbol::end());
+        }
+    }
+}
+
+fn merge_token_set(
+    tokens: &mut TokenSet,
+    other: &TokenSet,
+    lexical_grammar: &LexicalGrammar,
+    token_conflict_map: &TokenConflictMap,
+    coincident_token_index: &CoincidentTokenIndex,
+) -> bool {
+    for i in 0..lexical_grammar.variables.len() {
+        let symbol = Symbol::terminal(i);
+        let set_without_terminal = match (tokens.contains_terminal(i), other.contains_terminal(i)) {
+            (true, false) => other,
+            (false, true) => tokens,
+            _ => continue,
+        };
+
+        for existing_token in set_without_terminal.terminals() {
+            if token_conflict_map.does_conflict(i, existing_token.index)
+                || token_conflict_map.does_match_prefix(i, existing_token.index)
+            {
+                return false;
+            }
+            if !coincident_token_index.contains(symbol, existing_token) {
+                if token_conflict_map.does_overlap(existing_token.index, i)
+                    || token_conflict_map.does_overlap(i, existing_token.index)
+                {
+                    return false;
+                }
+            }
+        }
+    }
+
+    tokens.insert_all(other);
+    true
+}
+
+fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
+    // Initially group the states by their accept action and their
+    // valid lookahead characters.
+    let mut state_ids_by_signature = HashMap::new();
+    for (i, state) in table.states.iter().enumerate() {
+        let signature = (
+            i == 0,
+            state.accept_action,
+            state.eof_action.is_some(),
+            state
+                .advance_actions
+                .iter()
+                .map(|(characters, action)| (characters.clone(), action.in_main_token))
+                .collect::<Vec<_>>(),
+        );
+        state_ids_by_signature
+            .entry(signature)
+            .or_insert(Vec::new())
+            .push(i);
+    }
+    let mut state_ids_by_group_id = state_ids_by_signature
+        .into_iter()
+        .map(|e| e.1)
+        .collect::<Vec<_>>();
+    state_ids_by_group_id.sort();
+    let error_group_index = state_ids_by_group_id
+        .iter()
+        .position(|g| g.contains(&0))
+        .unwrap();
+    state_ids_by_group_id.swap(error_group_index, 0);
+
+    let mut group_ids_by_state_id = vec![0; table.states.len()];
+    for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() {
+        for state_id in state_ids {
+            group_ids_by_state_id[*state_id] = group_id;
+        }
+    }
+
+    while split_state_id_groups(
+        &table.states,
+        &mut state_ids_by_group_id,
+        &mut group_ids_by_state_id,
+        1,
+        lex_states_differ,
+    ) {
+        continue;
+    }
+
+    let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
+    for state_ids in &state_ids_by_group_id {
+        let mut new_state = LexState::default();
+        mem::swap(&mut new_state, &mut table.states[state_ids[0]]);
+
+        for (_, advance_action) in new_state.advance_actions.iter_mut() {
+            advance_action.state = group_ids_by_state_id[advance_action.state];
+        }
+        if let Some(eof_action) = &mut new_state.eof_action {
+            eof_action.state = group_ids_by_state_id[eof_action.state];
+        }
+        new_states.push(new_state);
+    }
+
+    for state in parse_table.states.iter_mut() {
+        state.lex_state_id = group_ids_by_state_id[state.lex_state_id];
+    }
+
+    table.states = new_states;
+}
+
+fn lex_states_differ(
+    left: &LexState,
+    right: &LexState,
+    group_ids_by_state_id: &Vec<usize>,
+) -> bool {
+    left.advance_actions
+        .iter()
+        .zip(right.advance_actions.iter())
+        .any(|(left, right)| {
+            group_ids_by_state_id[left.1.state] != group_ids_by_state_id[right.1.state]
+        })
+}
+
+fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
+    // Get a mapping of old state index -> new_state_index
+    let mut old_ids_by_new_id = (0..table.states.len()).collect::<Vec<_>>();
+    old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
+
+    // Get the inverse mapping
+    let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
+    for (id, old_id) in old_ids_by_new_id.iter().enumerate() {
+        new_ids_by_old_id[*old_id] = id;
+    }
+
+    // Reorder the parse states and update their references to reflect
+    // the new ordering.
+    table.states = old_ids_by_new_id
+        .iter()
+        .map(|old_id| {
+            let mut state = LexState::default();
+            mem::swap(&mut state, &mut table.states[*old_id]);
+            for (_, advance_action) in state.advance_actions.iter_mut() {
+                advance_action.state = new_ids_by_old_id[advance_action.state];
+            }
+            if let Some(eof_action) = &mut state.eof_action {
+                eof_action.state = new_ids_by_old_id[eof_action.state];
+            }
+            state
+        })
+        .collect();
+
+    // Update the parse table's lex state references
+    for state in parse_table.states.iter_mut() {
+        state.lex_state_id = new_ids_by_old_id[state.lex_state_id];
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_parse_table.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_parse_table.rs
@ -0,0 +1,997 @@
+use super::item::{ParseItem, ParseItemSet, ParseItemSetCore};
+use super::item_set_builder::ParseItemSetBuilder;
+use crate::generate::grammars::PrecedenceEntry;
+use crate::generate::grammars::{
+    InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
+};
+use crate::generate::node_types::VariableInfo;
+use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet};
+use crate::generate::tables::{
+    FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+    ProductionInfo, ProductionInfoId,
+};
+use anyhow::{anyhow, Result};
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
+use std::fmt::Write;
+use std::hash::BuildHasherDefault;
+use std::u32;
+
+use indexmap::{map::Entry, IndexMap};
+use rustc_hash::FxHasher;
+
+// For conflict reporting, each parse state is associated with an example
+// sequence of symbols that could lead to that parse state.
+type SymbolSequence = Vec<Symbol>;
+
+type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
+pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
+
+#[derive(Clone)]
+struct AuxiliarySymbolInfo {
+    auxiliary_symbol: Symbol,
+    parent_symbols: Vec<Symbol>,
+}
+
+#[derive(Debug, Default)]
+struct ReductionInfo {
+    precedence: Precedence,
+    symbols: Vec<Symbol>,
+    has_left_assoc: bool,
+    has_right_assoc: bool,
+    has_non_assoc: bool,
+}
+
+struct ParseStateQueueEntry {
+    state_id: ParseStateId,
+    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
+}
+
+struct ParseTableBuilder<'a> {
+    item_set_builder: ParseItemSetBuilder<'a>,
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    variable_info: &'a Vec<VariableInfo>,
+    core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
+    state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
+    parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
+    parse_state_queue: VecDeque<ParseStateQueueEntry>,
+    non_terminal_extra_states: Vec<(Symbol, usize)>,
+    parse_table: ParseTable,
+}
+
+impl<'a> ParseTableBuilder<'a> {
+    fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
+        // Ensure that the empty alias sequence has index 0.
+        self.parse_table
+            .production_infos
+            .push(ProductionInfo::default());
+
+        // Add the error state at index 0.
+        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
+
+        // Add the starting state at index 1.
+        self.add_parse_state(
+            &Vec::new(),
+            &Vec::new(),
+            ParseItemSet::with(
+                [(
+                    ParseItem::start(),
+                    [Symbol::end()].iter().cloned().collect(),
+                )]
+                .iter()
+                .cloned(),
+            ),
+        );
+
+        // Compute the possible item sets for non-terminal extras.
+        let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
+        for extra_non_terminal in self
+            .syntax_grammar
+            .extra_symbols
+            .iter()
+            .filter(|s| s.is_non_terminal())
+        {
+            let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
+            for production in &variable.productions {
+                non_terminal_extra_item_sets_by_first_terminal
+                    .entry(production.first_symbol().unwrap())
+                    .or_insert(ParseItemSet::default())
+                    .insert(
+                        ParseItem {
+                            variable_index: extra_non_terminal.index as u32,
+                            production,
+                            step_index: 1,
+                            has_preceding_inherited_fields: false,
+                        },
+                        &[Symbol::end_of_nonterminal_extra()]
+                            .iter()
+                            .cloned()
+                            .collect(),
+                    );
+            }
+        }
+
+        // Add a state for each starting terminal of a non-terminal extra rule.
+        for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
+            self.non_terminal_extra_states
+                .push((terminal, self.parse_table.states.len()));
+            self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
+        }
+
+        while let Some(entry) = self.parse_state_queue.pop_front() {
+            let item_set = self
+                .item_set_builder
+                .transitive_closure(&self.parse_state_info_by_id[entry.state_id].1);
+
+            self.add_actions(
+                self.parse_state_info_by_id[entry.state_id].0.clone(),
+                entry.preceding_auxiliary_symbols,
+                entry.state_id,
+                item_set,
+            )?;
+        }
+
+        Ok((self.parse_table, self.parse_state_info_by_id))
+    }
+
+    fn add_parse_state(
+        &mut self,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
+        item_set: ParseItemSet<'a>,
+    ) -> ParseStateId {
+        match self.state_ids_by_item_set.entry(item_set) {
+            // If an equivalent item set has already been processed, then return
+            // the existing parse state index.
+            Entry::Occupied(o) => *o.get(),
+
+            // Otherwise, insert a new parse state and add it to the queue of
+            // parse states to populate.
+            Entry::Vacant(v) => {
+                let core = v.key().core();
+                let core_count = self.core_ids_by_core.len();
+                let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);
+
+                let state_id = self.parse_table.states.len();
+                self.parse_state_info_by_id
+                    .push((preceding_symbols.clone(), v.key().clone()));
+
+                self.parse_table.states.push(ParseState {
+                    id: state_id,
+                    lex_state_id: 0,
+                    external_lex_state_id: 0,
+                    terminal_entries: IndexMap::default(),
+                    nonterminal_entries: IndexMap::default(),
+                    core_id,
+                });
+                self.parse_state_queue.push_back(ParseStateQueueEntry {
+                    state_id,
+                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
+                });
+                v.insert(state_id);
+                state_id
+            }
+        }
+    }
+
+    fn add_actions(
+        &mut self,
+        mut preceding_symbols: SymbolSequence,
+        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
+        state_id: ParseStateId,
+        item_set: ParseItemSet<'a>,
+    ) -> Result<()> {
+        let mut terminal_successors = BTreeMap::new();
+        let mut non_terminal_successors = BTreeMap::new();
+        let mut lookaheads_with_conflicts = TokenSet::new();
+        let mut reduction_infos = HashMap::<Symbol, ReductionInfo>::new();
+
+        // Each item in the item set contributes to either or a Shift action or a Reduce
+        // action in this state.
+        for (item, lookaheads) in &item_set.entries {
+            // If the item is unfinished, then this state has a transition for the item's
+            // next symbol. Advance the item to its next step and insert the resulting
+            // item into the successor item set.
+            if let Some(next_symbol) = item.symbol() {
+                let mut successor = item.successor();
+                if next_symbol.is_non_terminal() {
+                    let variable = &self.syntax_grammar.variables[next_symbol.index];
+
+                    // Keep track of where auxiliary non-terminals (repeat symbols) are
+                    // used within visible symbols. This information may be needed later
+                    // for conflict resolution.
+                    if variable.is_auxiliary() {
+                        preceding_auxiliary_symbols
+                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
+                    }
+
+                    // For most parse items, the symbols associated with the preceding children
+                    // don't matter: they have no effect on the REDUCE action that would be
+                    // performed at the end of the item. But the symbols *do* matter for
+                    // children that are hidden and have fields, because those fields are
+                    // "inherited" by the parent node.
+                    //
+                    // If this item has consumed a hidden child with fields, then the symbols
+                    // of its preceding children need to be taken into account when comparing
+                    // it with other items.
+                    if variable.is_hidden()
+                        && !self.variable_info[next_symbol.index].fields.is_empty()
+                    {
+                        successor.has_preceding_inherited_fields = true;
+                    }
+
+                    non_terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .insert(successor, lookaheads);
+                } else {
+                    terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .insert(successor, lookaheads);
+                }
+            }
+            // If the item is finished, then add a Reduce action to this state based
+            // on this item.
+            else {
+                let symbol = Symbol::non_terminal(item.variable_index as usize);
+                let action = if item.is_augmented() {
+                    ParseAction::Accept
+                } else {
+                    ParseAction::Reduce {
+                        symbol,
+                        child_count: item.step_index as usize,
+                        dynamic_precedence: item.production.dynamic_precedence,
+                        production_id: self.get_production_id(item),
+                    }
+                };
+
+                let precedence = item.precedence();
+                let associativity = item.associativity();
+                for lookahead in lookaheads.iter() {
+                    let table_entry = self.parse_table.states[state_id]
+                        .terminal_entries
+                        .entry(lookahead)
+                        .or_insert_with(|| ParseTableEntry::new());
+                    let reduction_info = reduction_infos.entry(lookahead).or_default();
+
+                    // While inserting Reduce actions, eagerly resolve conflicts related
+                    // to precedence: avoid inserting lower-precedence reductions, and
+                    // clear the action list when inserting higher-precedence reductions.
+                    if table_entry.actions.is_empty() {
+                        table_entry.actions.push(action);
+                    } else {
+                        match Self::compare_precedence(
+                            &self.syntax_grammar,
+                            precedence,
+                            &[symbol],
+                            &reduction_info.precedence,
+                            &reduction_info.symbols,
+                        ) {
+                            Ordering::Greater => {
+                                table_entry.actions.clear();
+                                table_entry.actions.push(action);
+                                lookaheads_with_conflicts.remove(&lookahead);
+                                *reduction_info = ReductionInfo::default();
+                            }
+                            Ordering::Equal => {
+                                table_entry.actions.push(action);
+                                lookaheads_with_conflicts.insert(lookahead);
+                            }
+                            Ordering::Less => continue,
+                        }
+                    }
+
+                    reduction_info.precedence = precedence.clone();
+                    if let Err(i) = reduction_info.symbols.binary_search(&symbol) {
+                        reduction_info.symbols.insert(i, symbol);
+                    }
+                    match associativity {
+                        Some(Associativity::Left) => reduction_info.has_left_assoc = true,
+                        Some(Associativity::Right) => reduction_info.has_right_assoc = true,
+                        None => reduction_info.has_non_assoc = true,
+                    }
+                }
+            }
+        }
+
+        // Having computed the the successor item sets for each symbol, add a new
+        // parse state for each of these item sets, and add a corresponding Shift
+        // action to this state.
+        for (symbol, next_item_set) in terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+
+            let entry = self.parse_table.states[state_id]
+                .terminal_entries
+                .entry(symbol);
+            if let Entry::Occupied(e) = &entry {
+                if !e.get().actions.is_empty() {
+                    lookaheads_with_conflicts.insert(symbol);
+                }
+            }
+
+            entry
+                .or_insert_with(|| ParseTableEntry::new())
+                .actions
+                .push(ParseAction::Shift {
+                    state: next_state_id,
+                    is_repetition: false,
+                });
+        }
+
+        for (symbol, next_item_set) in non_terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+            self.parse_table.states[state_id]
+                .nonterminal_entries
+                .insert(symbol, GotoAction::Goto(next_state_id));
+        }
+
+        // For any symbol with multiple actions, perform conflict resolution.
+        // This will either
+        // * choose one action over the others using precedence or associativity
+        // * keep multiple actions if this conflict has been whitelisted in the grammar
+        // * fail, terminating the parser generation process
+        for symbol in lookaheads_with_conflicts.iter() {
+            self.handle_conflict(
+                &item_set,
+                state_id,
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                symbol,
+                reduction_infos.get(&symbol).unwrap(),
+            )?;
+        }
+
+        // Finally, add actions for the grammar's `extra` symbols.
+        let state = &mut self.parse_table.states[state_id];
+        let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();
+
+        // If this state represents the end of a non-terminal extra rule, then make sure that
+        // it doesn't have other successor states. Non-terminal extra rules must have
+        // unambiguous endings.
+        if is_end_of_non_terminal_extra {
+            if state.terminal_entries.len() > 1 {
+                let parent_symbols = item_set
+                    .entries
+                    .iter()
+                    .filter_map(|(item, _)| {
+                        if !item.is_augmented() && item.step_index > 0 {
+                            Some(item.variable_index)
+                        } else {
+                            None
+                        }
+                    })
+                    .collect::<HashSet<_>>();
+                let mut message =
+                    "Extra rules must have unambiguous endings. Conflicting rules: ".to_string();
+                for (i, variable_index) in parent_symbols.iter().enumerate() {
+                    if i > 0 {
+                        message += ", ";
+                    }
+                    message += &self.syntax_grammar.variables[*variable_index as usize].name;
+                }
+                return Err(anyhow!(message));
+            }
+        }
+        // Add actions for the start tokens of each non-terminal extra rule.
+        else {
+            for (terminal, state_id) in &self.non_terminal_extra_states {
+                state
+                    .terminal_entries
+                    .entry(*terminal)
+                    .or_insert(ParseTableEntry {
+                        reusable: true,
+                        actions: vec![ParseAction::Shift {
+                            state: *state_id,
+                            is_repetition: false,
+                        }],
+                    });
+            }
+
+            // Add ShiftExtra actions for the terminal extra tokens. These actions
+            // are added to every state except for those at the ends of non-terminal
+            // extras.
+            for extra_token in &self.syntax_grammar.extra_symbols {
+                if extra_token.is_non_terminal() {
+                    state
+                        .nonterminal_entries
+                        .insert(*extra_token, GotoAction::ShiftExtra);
+                } else {
+                    state
+                        .terminal_entries
+                        .entry(*extra_token)
+                        .or_insert(ParseTableEntry {
+                            reusable: true,
+                            actions: vec![ParseAction::ShiftExtra],
+                        });
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn handle_conflict(
+        &mut self,
+        item_set: &ParseItemSet,
+        state_id: ParseStateId,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
+        conflicting_lookahead: Symbol,
+        reduction_info: &ReductionInfo,
+    ) -> Result<()> {
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+
+        // Determine which items in the set conflict with each other, and the
+        // precedences associated with SHIFT vs REDUCE actions. There won't
+        // be multiple REDUCE actions with different precedences; that is
+        // sorted out ahead of time in `add_actions`. But there can still be
+        // REDUCE-REDUCE conflicts where all actions have the *same*
+        // precedence, and there can still be SHIFT/REDUCE conflicts.
+        let mut considered_associativity = false;
+        let mut shift_precedence: Vec<(&Precedence, Symbol)> = Vec::new();
+        let mut conflicting_items = HashSet::new();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(step) = item.step() {
+                if item.step_index > 0 {
+                    if self
+                        .item_set_builder
+                        .first_set(&step.symbol)
+                        .contains(&conflicting_lookahead)
+                    {
+                        if item.variable_index != u32::MAX {
+                            conflicting_items.insert(item);
+                        }
+
+                        let p = (
+                            item.precedence(),
+                            Symbol::non_terminal(item.variable_index as usize),
+                        );
+                        if let Err(i) = shift_precedence.binary_search(&p) {
+                            shift_precedence.insert(i, p);
+                        }
+                    }
+                }
+            } else if lookaheads.contains(&conflicting_lookahead) {
+                if item.variable_index != u32::MAX {
+                    conflicting_items.insert(item);
+                }
+            }
+        }
+
+        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
+            // If all of the items in the conflict have the same parent symbol,
+            // and that parent symbols is auxiliary, then this is just the intentional
+            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
+            // by leaving it in the parse table, but marking the SHIFT action with
+            // an `is_repetition` flag.
+            let conflicting_variable_index =
+                conflicting_items.iter().next().unwrap().variable_index;
+            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary()
+                && conflicting_items
+                    .iter()
+                    .all(|item| item.variable_index == conflicting_variable_index)
+            {
+                *is_repetition = true;
+                return Ok(());
+            }
+
+            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
+            let mut shift_is_less = false;
+            let mut shift_is_more = false;
+            for p in shift_precedence {
+                match Self::compare_precedence(
+                    &self.syntax_grammar,
+                    p.0,
+                    &[p.1],
+                    &reduction_info.precedence,
+                    &reduction_info.symbols,
+                ) {
+                    Ordering::Greater => shift_is_more = true,
+                    Ordering::Less => shift_is_less = true,
+                    Ordering::Equal => {}
+                }
+            }
+
+            if shift_is_more && !shift_is_less {
+                entry.actions.drain(0..entry.actions.len() - 1);
+            }
+            // If the REDUCE actions have higher precedence, remove the SHIFT action.
+            else if shift_is_less && !shift_is_more {
+                entry.actions.pop();
+                conflicting_items.retain(|item| item.is_done());
+            }
+            // If the SHIFT and REDUCE actions have the same predence, consider
+            // the REDUCE actions' associativity.
+            else if !shift_is_less && !shift_is_more {
+                considered_associativity = true;
+
+                // If all Reduce actions are left associative, remove the SHIFT action.
+                // If all Reduce actions are right associative, remove the REDUCE actions.
+                match (
+                    reduction_info.has_left_assoc,
+                    reduction_info.has_non_assoc,
+                    reduction_info.has_right_assoc,
+                ) {
+                    (true, false, false) => {
+                        entry.actions.pop();
+                        conflicting_items.retain(|item| item.is_done());
+                    }
+                    (false, false, true) => {
+                        entry.actions.drain(0..entry.actions.len() - 1);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        // If all of the actions but one have been eliminated, then there's no problem.
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+        if entry.actions.len() == 1 {
+            return Ok(());
+        }
+
+        // Determine the set of parent symbols involved in this conflict.
+        let mut actual_conflict = Vec::new();
+        for item in &conflicting_items {
+            let symbol = Symbol::non_terminal(item.variable_index as usize);
+            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
+                actual_conflict.extend(
+                    preceding_auxiliary_symbols
+                        .iter()
+                        .rev()
+                        .find_map(|info| {
+                            if info.auxiliary_symbol == symbol {
+                                Some(&info.parent_symbols)
+                            } else {
+                                None
+                            }
+                        })
+                        .unwrap()
+                        .iter(),
+                );
+            } else {
+                actual_conflict.push(symbol);
+            }
+        }
+        actual_conflict.sort_unstable();
+        actual_conflict.dedup();
+
+        // If this set of symbols has been whitelisted, then there's no error.
+        if self
+            .syntax_grammar
+            .expected_conflicts
+            .contains(&actual_conflict)
+        {
+            return Ok(());
+        }
+
+        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
+        for symbol in preceding_symbols {
+            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
+        }
+
+        write!(
+            &mut msg,
+            "  •  {}  …\n\n",
+            self.symbol_name(&conflicting_lookahead)
+        )
+        .unwrap();
+        write!(&mut msg, "Possible interpretations:\n\n").unwrap();
+
+        let mut interpretations = conflicting_items
+            .iter()
+            .map(|item| {
+                let mut line = String::new();
+                for preceding_symbol in preceding_symbols
+                    .iter()
+                    .take(preceding_symbols.len() - item.step_index as usize)
+                {
+                    write!(&mut line, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+                }
+
+                write!(
+                    &mut line,
+                    "  ({}",
+                    &self.syntax_grammar.variables[item.variable_index as usize].name
+                )
+                .unwrap();
+
+                for (j, step) in item.production.steps.iter().enumerate() {
+                    if j as u32 == item.step_index {
+                        write!(&mut line, "  •").unwrap();
+                    }
+                    write!(&mut line, "  {}", self.symbol_name(&step.symbol)).unwrap();
+                }
+
+                write!(&mut line, ")").unwrap();
+
+                if item.is_done() {
+                    write!(
+                        &mut line,
+                        "  •  {}  …",
+                        self.symbol_name(&conflicting_lookahead)
+                    )
+                    .unwrap();
+                }
+
+                let precedence = item.precedence();
+                let associativity = item.associativity();
+
+                let prec_line = if let Some(associativity) = associativity {
+                    Some(format!(
+                        "(precedence: {}, associativity: {:?})",
+                        precedence, associativity
+                    ))
+                } else if !precedence.is_none() {
+                    Some(format!("(precedence: {})", precedence))
+                } else {
+                    None
+                };
+
+                (line, prec_line)
+            })
+            .collect::<Vec<_>>();
+
+        let max_interpretation_length = interpretations
+            .iter()
+            .map(|i| i.0.chars().count())
+            .max()
+            .unwrap();
+        interpretations.sort_unstable();
+        for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
+            write!(&mut msg, "  {}:", i + 1).unwrap();
+            msg += &line;
+            if let Some(prec_suffix) = prec_suffix {
+                for _ in line.chars().count()..max_interpretation_length {
+                    msg.push(' ');
+                }
+                msg += "  ";
+                msg += &prec_suffix;
+            }
+            msg.push('\n');
+        }
+
+        let mut resolution_count = 0;
+        write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
+        let mut shift_items = Vec::new();
+        let mut reduce_items = Vec::new();
+        for item in conflicting_items {
+            if item.is_done() {
+                reduce_items.push(item);
+            } else {
+                shift_items.push(item);
+            }
+        }
+        shift_items.sort_unstable();
+        reduce_items.sort_unstable();
+
+        let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| {
+            let mut last_rule_id = None;
+            for item in items {
+                if last_rule_id == Some(item.variable_index) {
+                    continue;
+                }
+
+                if last_rule_id.is_some() {
+                    write!(&mut msg, " and").unwrap();
+                }
+
+                last_rule_id = Some(item.variable_index);
+                write!(
+                    msg,
+                    " `{}`",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+        };
+
+        if actual_conflict.len() > 1 {
+            if shift_items.len() > 0 {
+                resolution_count += 1;
+                write!(
+                    &mut msg,
+                    "  {}:  Specify a higher precedence in",
+                    resolution_count
+                )
+                .unwrap();
+                list_rule_names(&mut msg, &shift_items);
+                write!(&mut msg, " than in the other rules.\n").unwrap();
+            }
+
+            for item in &reduce_items {
+                resolution_count += 1;
+                write!(
+                    &mut msg,
+                    "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
+                    resolution_count,
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+        }
+
+        if considered_associativity {
+            resolution_count += 1;
+            write!(
+                &mut msg,
+                "  {}:  Specify a left or right associativity in",
+                resolution_count
+            )
+            .unwrap();
+            list_rule_names(&mut msg, &reduce_items);
+            write!(&mut msg, "\n").unwrap();
+        }
+
+        resolution_count += 1;
+        write!(
+            &mut msg,
+            "  {}:  Add a conflict for these rules: ",
+            resolution_count
+        )
+        .unwrap();
+        for (i, symbol) in actual_conflict.iter().enumerate() {
+            if i > 0 {
+                write!(&mut msg, ", ").unwrap();
+            }
+            write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
+        }
+        write!(&mut msg, "\n").unwrap();
+
+        Err(anyhow!(msg))
+    }
+
+    fn compare_precedence(
+        grammar: &SyntaxGrammar,
+        left: &Precedence,
+        left_symbols: &[Symbol],
+        right: &Precedence,
+        right_symbols: &[Symbol],
+    ) -> Ordering {
+        let precedence_entry_matches =
+            |entry: &PrecedenceEntry, precedence: &Precedence, symbols: &[Symbol]| -> bool {
+                match entry {
+                    PrecedenceEntry::Name(n) => {
+                        if let Precedence::Name(p) = precedence {
+                            n == p
+                        } else {
+                            false
+                        }
+                    }
+                    PrecedenceEntry::Symbol(n) => symbols
+                        .iter()
+                        .any(|s| &grammar.variables[s.index].name == n),
+                }
+            };
+
+        match (left, right) {
+            // Integer precedences can be compared to other integer precedences,
+            // and to the default precedence, which is zero.
+            (Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r),
+            (Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0),
+            (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(&r),
+
+            // Named precedences can be compared to other named precedences.
+            _ => grammar
+                .precedence_orderings
+                .iter()
+                .find_map(|list| {
+                    let mut saw_left = false;
+                    let mut saw_right = false;
+                    for entry in list {
+                        let matches_left = precedence_entry_matches(entry, left, left_symbols);
+                        let matches_right = precedence_entry_matches(entry, right, right_symbols);
+                        if matches_left {
+                            saw_left = true;
+                            if saw_right {
+                                return Some(Ordering::Less);
+                            }
+                        } else if matches_right {
+                            saw_right = true;
+                            if saw_left {
+                                return Some(Ordering::Greater);
+                            }
+                        }
+                    }
+                    None
+                })
+                .unwrap_or(Ordering::Equal),
+        }
+    }
+
+    fn get_auxiliary_node_info(
+        &self,
+        item_set: &ParseItemSet,
+        symbol: Symbol,
+    ) -> AuxiliarySymbolInfo {
+        let parent_symbols = item_set
+            .entries
+            .iter()
+            .filter_map(|(item, _)| {
+                let variable_index = item.variable_index as usize;
+                if item.symbol() == Some(symbol)
+                    && !self.syntax_grammar.variables[variable_index].is_auxiliary()
+                {
+                    Some(Symbol::non_terminal(variable_index))
+                } else {
+                    None
+                }
+            })
+            .collect();
+        AuxiliarySymbolInfo {
+            auxiliary_symbol: symbol,
+            parent_symbols,
+        }
+    }
+
+    fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId {
+        let mut production_info = ProductionInfo {
+            alias_sequence: Vec::new(),
+            field_map: BTreeMap::new(),
+        };
+
+        for (i, step) in item.production.steps.iter().enumerate() {
+            production_info.alias_sequence.push(step.alias.clone());
+            if let Some(field_name) = &step.field_name {
+                production_info
+                    .field_map
+                    .entry(field_name.clone())
+                    .or_insert(Vec::new())
+                    .push(FieldLocation {
+                        index: i,
+                        inherited: false,
+                    });
+            }
+
+            if step.symbol.kind == SymbolType::NonTerminal
+                && !self.syntax_grammar.variables[step.symbol.index]
+                    .kind
+                    .is_visible()
+            {
+                let info = &self.variable_info[step.symbol.index];
+                for (field_name, _) in &info.fields {
+                    production_info
+                        .field_map
+                        .entry(field_name.clone())
+                        .or_insert(Vec::new())
+                        .push(FieldLocation {
+                            index: i,
+                            inherited: true,
+                        });
+                }
+            }
+        }
+
+        while production_info.alias_sequence.last() == Some(&None) {
+            production_info.alias_sequence.pop();
+        }
+
+        if item.production.steps.len() > self.parse_table.max_aliased_production_length {
+            self.parse_table.max_aliased_production_length = item.production.steps.len()
+        }
+
+        if let Some(index) = self
+            .parse_table
+            .production_infos
+            .iter()
+            .position(|seq| *seq == production_info)
+        {
+            index
+        } else {
+            self.parse_table.production_infos.push(production_info);
+            self.parse_table.production_infos.len() - 1
+        }
+    }
+
+    fn symbol_name(&self, symbol: &Symbol) -> String {
+        match symbol.kind {
+            SymbolType::End | SymbolType::EndOfNonTerminalExtra => "EOF".to_string(),
+            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
+                .name
+                .clone(),
+            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
+            SymbolType::Terminal => {
+                let variable = &self.lexical_grammar.variables[symbol.index];
+                if variable.kind == VariableType::Named {
+                    variable.name.clone()
+                } else {
+                    format!("'{}'", &variable.name)
+                }
+            }
+        }
+    }
+}
+
+fn populate_following_tokens(
+    result: &mut Vec<TokenSet>,
+    grammar: &SyntaxGrammar,
+    inlines: &InlinedProductionMap,
+    builder: &ParseItemSetBuilder,
+) {
+    let productions = grammar
+        .variables
+        .iter()
+        .flat_map(|v| &v.productions)
+        .chain(&inlines.productions);
+    let all_tokens = (0..result.len())
+        .into_iter()
+        .map(Symbol::terminal)
+        .collect::<TokenSet>();
+    for production in productions {
+        for i in 1..production.steps.len() {
+            let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
+            let right_tokens = builder.first_set(&production.steps[i].symbol);
+            for left_token in left_tokens.iter() {
+                if left_token.is_terminal() {
+                    result[left_token.index].insert_all_terminals(right_tokens);
+                }
+            }
+        }
+    }
+    for extra in &grammar.extra_symbols {
+        if extra.is_terminal() {
+            for entry in result.iter_mut() {
+                entry.insert(*extra);
+            }
+            result[extra.index] = all_tokens.clone();
+        }
+    }
+}
+
+pub(crate) fn build_parse_table<'a>(
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    inlines: &'a InlinedProductionMap,
+    variable_info: &'a Vec<VariableInfo>,
+) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
+    let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
+    let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
+    populate_following_tokens(
+        &mut following_tokens,
+        syntax_grammar,
+        inlines,
+        &item_set_builder,
+    );
+
+    let (table, item_sets) = ParseTableBuilder {
+        syntax_grammar,
+        lexical_grammar,
+        item_set_builder,
+        variable_info,
+        non_terminal_extra_states: Vec::new(),
+        state_ids_by_item_set: IndexMap::default(),
+        core_ids_by_core: HashMap::new(),
+        parse_state_info_by_id: Vec::new(),
+        parse_state_queue: VecDeque::new(),
+        parse_table: ParseTable {
+            states: Vec::new(),
+            symbols: Vec::new(),
+            external_lex_states: Vec::new(),
+            production_infos: Vec::new(),
+            max_aliased_production_length: 1,
+        },
+    }
+    .build()?;
+
+    Ok((table, following_tokens, item_sets))
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/coincident_tokens.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/coincident_tokens.rs
@ -0,0 +1,75 @@
+use crate::generate::grammars::LexicalGrammar;
+use crate::generate::rules::Symbol;
+use crate::generate::tables::{ParseStateId, ParseTable};
+use std::fmt;
+
+pub(crate) struct CoincidentTokenIndex<'a> {
+    entries: Vec<Vec<ParseStateId>>,
+    grammar: &'a LexicalGrammar,
+    n: usize,
+}
+
+impl<'a> CoincidentTokenIndex<'a> {
+    pub fn new(table: &ParseTable, lexical_grammar: &'a LexicalGrammar) -> Self {
+        let n = lexical_grammar.variables.len();
+        let mut result = Self {
+            n,
+            grammar: lexical_grammar,
+            entries: vec![Vec::new(); n * n],
+        };
+        for (i, state) in table.states.iter().enumerate() {
+            for symbol in state.terminal_entries.keys() {
+                if symbol.is_terminal() {
+                    for other_symbol in state.terminal_entries.keys() {
+                        if other_symbol.is_terminal() {
+                            let index = result.index(symbol.index, other_symbol.index);
+                            if result.entries[index].last().cloned() != Some(i) {
+                                result.entries[index].push(i);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        result
+    }
+
+    pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec<ParseStateId> {
+        &self.entries[self.index(a.index, b.index)]
+    }
+
+    pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
+        !self.entries[self.index(a.index, b.index)].is_empty()
+    }
+
+    fn index(&self, a: usize, b: usize) -> usize {
+        if a < b {
+            a * self.n + b
+        } else {
+            b * self.n + a
+        }
+    }
+}
+
+impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "CoincidentTokenIndex {{\n")?;
+
+        write!(f, "  entries: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {{\n", self.grammar.variables[i].name)?;
+            for j in 0..self.n {
+                write!(
+                    f,
+                    "      {}: {:?},\n",
+                    self.grammar.variables[j].name,
+                    self.entries[self.index(i, j)].len()
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item.rs
@ -0,0 +1,416 @@
+use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
+use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet};
+use lazy_static::lazy_static;
+use std::cmp::Ordering;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::u32;
+
+lazy_static! {
+    static ref START_PRODUCTION: Production = Production {
+        dynamic_precedence: 0,
+        steps: vec![ProductionStep {
+            symbol: Symbol {
+                index: 0,
+                kind: SymbolType::NonTerminal,
+            },
+            precedence: Precedence::None,
+            associativity: None,
+            alias: None,
+            field_name: None,
+        }],
+    };
+}
+
+/// A ParseItem represents an in-progress match of a single production in a grammar.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct ParseItem<'a> {
+    /// The index of the parent rule within the grammar.
+    pub variable_index: u32,
+    /// The number of symbols that have already been matched.
+    pub step_index: u32,
+    /// The production being matched.
+    pub production: &'a Production,
+    /// A boolean indicating whether any of the already-matched children were
+    /// hidden nodes and had fields. Ordinarily, a parse item's behavior is not
+    /// affected by the symbols of its preceding children; it only needs to
+    /// keep track of their fields and aliases.
+    ///
+    /// Take for example these two items:
+    ///   X -> a b • c
+    ///   X -> a g • c
+    ///
+    /// They can be considered equivalent, for the purposes of parse table
+    /// generation, because they entail the same actions. But if this flag is
+    /// true, then the item's set of inherited fields may depend on the specific
+    /// symbols of its preceding children.
+    pub has_preceding_inherited_fields: bool,
+}
+
+/// A ParseItemSet represents a set of in-progress matches of productions in a
+/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
+/// are allowed to *follow* the in-progress rule. This object corresponds directly
+/// to a state in the final parse table.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ParseItemSet<'a> {
+    pub entries: Vec<(ParseItem<'a>, TokenSet)>,
+}
+
+/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead
+/// information. Parse states with the same core are candidates for merging.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ParseItemSetCore<'a> {
+    pub entries: Vec<ParseItem<'a>>,
+}
+
+pub(crate) struct ParseItemDisplay<'a>(
+    pub &'a ParseItem<'a>,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
+);
+
+pub(crate) struct TokenSetDisplay<'a>(
+    pub &'a TokenSet,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
+);
+
+pub(crate) struct ParseItemSetDisplay<'a>(
+    pub &'a ParseItemSet<'a>,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
+);
+
+impl<'a> ParseItem<'a> {
+    pub fn start() -> Self {
+        ParseItem {
+            variable_index: u32::MAX,
+            production: &START_PRODUCTION,
+            step_index: 0,
+            has_preceding_inherited_fields: false,
+        }
+    }
+
+    pub fn step(&self) -> Option<&'a ProductionStep> {
+        self.production.steps.get(self.step_index as usize)
+    }
+
+    pub fn symbol(&self) -> Option<Symbol> {
+        self.step().map(|step| step.symbol)
+    }
+
+    pub fn associativity(&self) -> Option<Associativity> {
+        self.prev_step().and_then(|step| step.associativity)
+    }
+
+    pub fn precedence(&self) -> &Precedence {
+        self.prev_step()
+            .map_or(&Precedence::None, |step| &step.precedence)
+    }
+
+    pub fn prev_step(&self) -> Option<&'a ProductionStep> {
+        if self.step_index > 0 {
+            Some(&self.production.steps[self.step_index as usize - 1])
+        } else {
+            None
+        }
+    }
+
+    pub fn is_done(&self) -> bool {
+        self.step_index as usize == self.production.steps.len()
+    }
+
+    pub fn is_augmented(&self) -> bool {
+        self.variable_index == u32::MAX
+    }
+
+    /// Create an item like this one, but advanced by one step.
+    pub fn successor(&self) -> ParseItem<'a> {
+        ParseItem {
+            variable_index: self.variable_index,
+            production: self.production,
+            step_index: self.step_index + 1,
+            has_preceding_inherited_fields: self.has_preceding_inherited_fields,
+        }
+    }
+
+    /// Create an item identical to this one, but with a different production.
+    /// This is used when dynamically "inlining" certain symbols in a production.
+    pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
+        let mut result = self.clone();
+        result.production = production;
+        result
+    }
+}
+
+impl<'a> ParseItemSet<'a> {
+    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
+        let mut result = Self::default();
+        for (item, lookaheads) in elements {
+            result.insert(item, &lookaheads);
+        }
+        result
+    }
+
+    pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
+        match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
+            Err(i) => {
+                self.entries.insert(i, (item, lookaheads.clone()));
+                &mut self.entries[i].1
+            }
+            Ok(i) => {
+                self.entries[i].1.insert_all(lookaheads);
+                &mut self.entries[i].1
+            }
+        }
+    }
+
+    pub fn core(&self) -> ParseItemSetCore<'a> {
+        ParseItemSetCore {
+            entries: self.entries.iter().map(|e| e.0).collect(),
+        }
+    }
+}
+
+impl<'a> Default for ParseItemSet<'a> {
+    fn default() -> Self {
+        Self {
+            entries: Vec::new(),
+        }
+    }
+}
+
+impl<'a> fmt::Display for ParseItemDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        if self.0.is_augmented() {
+            write!(f, "START →")?;
+        } else {
+            write!(
+                f,
+                "{} →",
+                &self.1.variables[self.0.variable_index as usize].name
+            )?;
+        }
+
+        for (i, step) in self.0.production.steps.iter().enumerate() {
+            if i == self.0.step_index as usize {
+                write!(f, " •")?;
+                if let Some(associativity) = step.associativity {
+                    if !step.precedence.is_none() {
+                        write!(f, " ({} {:?})", step.precedence, associativity)?;
+                    } else {
+                        write!(f, " ({:?})", associativity)?;
+                    }
+                } else if !step.precedence.is_none() {
+                    write!(f, " ({})", step.precedence)?;
+                }
+            }
+
+            write!(f, " ")?;
+            if step.symbol.is_terminal() {
+                if let Some(variable) = self.2.variables.get(step.symbol.index) {
+                    write!(f, "{}", &variable.name)?;
+                } else {
+                    write!(f, "{}-{}", "terminal", step.symbol.index)?;
+                }
+            } else if step.symbol.is_external() {
+                write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
+            } else {
+                write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
+            }
+
+            if let Some(alias) = &step.alias {
+                write!(f, "@{}", alias.value)?;
+            }
+        }
+
+        if self.0.is_done() {
+            write!(f, " •")?;
+            if let Some(step) = self.0.production.steps.last() {
+                if let Some(associativity) = step.associativity {
+                    if !step.precedence.is_none() {
+                        write!(f, " ({} {:?})", step.precedence, associativity)?;
+                    } else {
+                        write!(f, " ({:?})", associativity)?;
+                    }
+                } else if !step.precedence.is_none() {
+                    write!(f, " ({})", step.precedence)?;
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for TokenSetDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "[")?;
+        for (i, symbol) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+
+            if symbol.is_terminal() {
+                if let Some(variable) = self.2.variables.get(symbol.index) {
+                    write!(f, "{}", &variable.name)?;
+                } else {
+                    write!(f, "{}-{}", "terminal", symbol.index)?;
+                }
+            } else if symbol.is_external() {
+                write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
+            } else {
+                write!(f, "{}", &self.1.variables[symbol.index].name)?;
+            }
+        }
+        write!(f, "]")?;
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        for (item, lookaheads) in self.0.entries.iter() {
+            writeln!(
+                f,
+                "{}\t{}",
+                ParseItemDisplay(item, self.1, self.2),
+                TokenSetDisplay(lookaheads, self.1, self.2)
+            )?;
+        }
+        Ok(())
+    }
+}
+
+impl<'a> Hash for ParseItem<'a> {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_u32(self.variable_index);
+        hasher.write_u32(self.step_index);
+        hasher.write_i32(self.production.dynamic_precedence);
+        hasher.write_usize(self.production.steps.len());
+        hasher.write_i32(self.has_preceding_inherited_fields as i32);
+        self.precedence().hash(hasher);
+        self.associativity().hash(hasher);
+
+        // The already-matched children don't play any role in the parse state for
+        // this item, unless any of the following are true:
+        //   * the children have fields
+        //   * the children have aliases
+        //   * the children are hidden and
+        // See the docs for `has_preceding_inherited_fields`.
+        for step in &self.production.steps[0..self.step_index as usize] {
+            step.alias.hash(hasher);
+            step.field_name.hash(hasher);
+            if self.has_preceding_inherited_fields {
+                step.symbol.hash(hasher);
+            }
+        }
+        for step in &self.production.steps[self.step_index as usize..] {
+            step.hash(hasher);
+        }
+    }
+}
+
+impl<'a> PartialEq for ParseItem<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.variable_index != other.variable_index
+            || self.step_index != other.step_index
+            || self.production.dynamic_precedence != other.production.dynamic_precedence
+            || self.production.steps.len() != other.production.steps.len()
+            || self.precedence() != other.precedence()
+            || self.associativity() != other.associativity()
+            || self.has_preceding_inherited_fields != other.has_preceding_inherited_fields
+        {
+            return false;
+        }
+
+        for (i, step) in self.production.steps.iter().enumerate() {
+            // See the previous comment (in the `Hash::hash` impl) regarding comparisons
+            // of parse items' already-completed steps.
+            if i < self.step_index as usize {
+                if step.alias != other.production.steps[i].alias {
+                    return false;
+                }
+                if step.field_name != other.production.steps[i].field_name {
+                    return false;
+                }
+                if self.has_preceding_inherited_fields
+                    && step.symbol != other.production.steps[i].symbol
+                {
+                    return false;
+                }
+            } else if *step != other.production.steps[i] {
+                return false;
+            }
+        }
+
+        return true;
+    }
+}
+
+impl<'a> Ord for ParseItem<'a> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.step_index
+            .cmp(&other.step_index)
+            .then_with(|| self.variable_index.cmp(&other.variable_index))
+            .then_with(|| {
+                self.production
+                    .dynamic_precedence
+                    .cmp(&other.production.dynamic_precedence)
+            })
+            .then_with(|| {
+                self.production
+                    .steps
+                    .len()
+                    .cmp(&other.production.steps.len())
+            })
+            .then_with(|| self.precedence().cmp(&other.precedence()))
+            .then_with(|| self.associativity().cmp(&other.associativity()))
+            .then_with(|| {
+                for (i, step) in self.production.steps.iter().enumerate() {
+                    // See the previous comment (in the `Hash::hash` impl) regarding comparisons
+                    // of parse items' already-completed steps.
+                    let o = if i < self.step_index as usize {
+                        step.alias
+                            .cmp(&other.production.steps[i].alias)
+                            .then_with(|| {
+                                step.field_name.cmp(&other.production.steps[i].field_name)
+                            })
+                    } else {
+                        step.cmp(&other.production.steps[i])
+                    };
+                    if o != Ordering::Equal {
+                        return o;
+                    }
+                }
+                return Ordering::Equal;
+            })
+    }
+}
+
+impl<'a> PartialOrd for ParseItem<'a> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<'a> Eq for ParseItem<'a> {}
+
+impl<'a> Hash for ParseItemSet<'a> {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_usize(self.entries.len());
+        for (item, lookaheads) in self.entries.iter() {
+            item.hash(hasher);
+            lookaheads.hash(hasher);
+        }
+    }
+}
+
+impl<'a> Hash for ParseItemSetCore<'a> {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_usize(self.entries.len());
+        for item in &self.entries {
+            item.hash(hasher);
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item_set_builder.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item_set_builder.rs
@ -0,0 +1,347 @@
+use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Symbol, SymbolType, TokenSet};
+use std::collections::{HashMap, HashSet};
+use std::fmt;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct TransitiveClosureAddition<'a> {
+    item: ParseItem<'a>,
+    info: FollowSetInfo,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct FollowSetInfo {
+    lookaheads: TokenSet,
+    propagates_lookaheads: bool,
+}
+
+pub(crate) struct ParseItemSetBuilder<'a> {
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    first_sets: HashMap<Symbol, TokenSet>,
+    last_sets: HashMap<Symbol, TokenSet>,
+    inlines: &'a InlinedProductionMap,
+    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
+}
+
+fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
+    if !vector.contains(&value) {
+        vector.push(value);
+    }
+}
+
+impl<'a> ParseItemSetBuilder<'a> {
+    pub fn new(
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> Self {
+        let mut result = Self {
+            syntax_grammar,
+            lexical_grammar,
+            first_sets: HashMap::new(),
+            last_sets: HashMap::new(),
+            inlines,
+            transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
+        };
+
+        // For each grammar symbol, populate the FIRST and LAST sets: the set of
+        // terminals that appear at the beginning and end that symbol's productions,
+        // respectively.
+        //
+        // For a terminal symbol, the FIRST and LAST set just consists of the
+        // terminal itself.
+        for i in 0..lexical_grammar.variables.len() {
+            let symbol = Symbol::terminal(i);
+            let mut set = TokenSet::new();
+            set.insert(symbol);
+            result.first_sets.insert(symbol, set.clone());
+            result.last_sets.insert(symbol, set);
+        }
+
+        for i in 0..syntax_grammar.external_tokens.len() {
+            let symbol = Symbol::external(i);
+            let mut set = TokenSet::new();
+            set.insert(symbol);
+            result.first_sets.insert(symbol, set.clone());
+            result.last_sets.insert(symbol, set);
+        }
+
+        // The FIRST set of a non-terminal `i` is the union of the following sets:
+        // * the set of all terminals that appear at the beginings of i's productions
+        // * the FIRST sets of all the non-terminals that appear at the beginnings
+        //   of i's productions
+        //
+        // Rather than computing these sets using recursion, we use an explicit stack
+        // called `symbols_to_process`.
+        let mut symbols_to_process = Vec::new();
+        let mut processed_non_terminals = HashSet::new();
+        for i in 0..syntax_grammar.variables.len() {
+            let symbol = Symbol::non_terminal(i);
+
+            let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new());
+            processed_non_terminals.clear();
+            symbols_to_process.clear();
+            symbols_to_process.push(symbol);
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    first_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in syntax_grammar.variables[current_symbol.index]
+                        .productions
+                        .iter()
+                    {
+                        if let Some(step) = production.steps.first() {
+                            symbols_to_process.push(step.symbol);
+                        }
+                    }
+                }
+            }
+
+            // The LAST set is defined in a similar way to the FIRST set.
+            let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new());
+            processed_non_terminals.clear();
+            symbols_to_process.clear();
+            symbols_to_process.push(symbol);
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    last_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in syntax_grammar.variables[current_symbol.index]
+                        .productions
+                        .iter()
+                    {
+                        if let Some(step) = production.steps.last() {
+                            symbols_to_process.push(step.symbol);
+                        }
+                    }
+                }
+            }
+        }
+
+        // To compute an item set's transitive closure, we find each item in the set
+        // whose next symbol is a non-terminal, and we add new items to the set for
+        // each of that symbols' productions. These productions might themselves begin
+        // with non-terminals, so the process continues recursively. In this process,
+        // the total set of entries that get added depends only on two things:
+        //   * the set of non-terminal symbols that occur at each item's current position
+        //   * the set of terminals that occurs after each of these non-terminal symbols
+        //
+        // So we can avoid a lot of duplicated recursive work by precomputing, for each
+        // non-terminal symbol `i`, a final list of *additions* that must be made to an
+        // item set when `i` occurs as the next symbol in one if its core items. The
+        // structure of an *addition* is as follows:
+        //   * `item` - the new item that must be added as part of the expansion of `i`
+        //   * `lookaheads` - lookahead tokens that can always come after that item in
+        //      the expansion of `i`
+        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can
+        //      occur at the *end* of the expansion of `i`, so that i's own current
+        //      lookahead tokens can occur after `item`.
+        //
+        // Again, rather than computing these additions recursively, we use an explicit
+        // stack called `entries_to_process`.
+        for i in 0..syntax_grammar.variables.len() {
+            let empty_lookaheads = TokenSet::new();
+            let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
+
+            // First, build up a map whose keys are all of the non-terminals that can
+            // appear at the beginning of non-terminal `i`, and whose values store
+            // information about the tokens that can follow each non-terminal.
+            let mut follow_set_info_by_non_terminal = HashMap::new();
+            while let Some(entry) = entries_to_process.pop() {
+                let (variable_index, lookaheads, propagates_lookaheads) = entry;
+                let existing_info = follow_set_info_by_non_terminal
+                    .entry(variable_index)
+                    .or_insert_with(|| FollowSetInfo {
+                        lookaheads: TokenSet::new(),
+                        propagates_lookaheads: false,
+                    });
+
+                let did_add_follow_set_info;
+                if propagates_lookaheads {
+                    did_add_follow_set_info = !existing_info.propagates_lookaheads;
+                    existing_info.propagates_lookaheads = true;
+                } else {
+                    did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
+                }
+
+                if did_add_follow_set_info {
+                    for production in &syntax_grammar.variables[variable_index].productions {
+                        if let Some(symbol) = production.first_symbol() {
+                            if symbol.is_non_terminal() {
+                                if production.steps.len() == 1 {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        lookaheads,
+                                        propagates_lookaheads,
+                                    ));
+                                } else {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        &result.first_sets[&production.steps[1].symbol],
+                                        false,
+                                    ));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Store all of those non-terminals' productions, along with their associated
+            // lookahead info, as *additions* associated with non-terminal `i`.
+            let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
+            for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
+                let variable = &syntax_grammar.variables[variable_index];
+                let non_terminal = Symbol::non_terminal(variable_index);
+                let variable_index = variable_index as u32;
+                if syntax_grammar.variables_to_inline.contains(&non_terminal) {
+                    continue;
+                }
+                for production in &variable.productions {
+                    let item = ParseItem {
+                        variable_index,
+                        production,
+                        step_index: 0,
+                        has_preceding_inherited_fields: false,
+                    };
+
+                    if let Some(inlined_productions) =
+                        inlines.inlined_productions(item.production, item.step_index)
+                    {
+                        for production in inlined_productions {
+                            find_or_push(
+                                additions_for_non_terminal,
+                                TransitiveClosureAddition {
+                                    item: item.substitute_production(production),
+                                    info: follow_set_info.clone(),
+                                },
+                            );
+                        }
+                    } else {
+                        find_or_push(
+                            additions_for_non_terminal,
+                            TransitiveClosureAddition {
+                                item,
+                                info: follow_set_info.clone(),
+                            },
+                        );
+                    }
+                }
+            }
+        }
+
+        result
+    }
+
+    pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
+        let mut result = ParseItemSet::default();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(productions) = self
+                .inlines
+                .inlined_productions(item.production, item.step_index)
+            {
+                for production in productions {
+                    self.add_item(
+                        &mut result,
+                        item.substitute_production(production),
+                        lookaheads,
+                    );
+                }
+            } else {
+                self.add_item(&mut result, *item, lookaheads);
+            }
+        }
+        result
+    }
+
+    pub fn first_set(&self, symbol: &Symbol) -> &TokenSet {
+        &self.first_sets[symbol]
+    }
+
+    pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
+        &self.last_sets[symbol]
+    }
+
+    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
+        if let Some(step) = item.step() {
+            if step.symbol.is_non_terminal() {
+                let next_step = item.successor().step();
+
+                // Determine which tokens can follow this non-terminal.
+                let following_tokens = if let Some(next_step) = next_step {
+                    self.first_sets.get(&next_step.symbol).unwrap()
+                } else {
+                    &lookaheads
+                };
+
+                // Use the pre-computed *additions* to expand the non-terminal.
+                for addition in &self.transitive_closure_additions[step.symbol.index] {
+                    let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
+                    if addition.info.propagates_lookaheads {
+                        lookaheads.insert_all(following_tokens);
+                    }
+                }
+            }
+        }
+        set.insert(item, lookaheads);
+    }
+}
+
+impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ParseItemSetBuilder {{\n")?;
+
+        write!(f, "  first_sets: {{\n")?;
+        for (symbol, first_set) in &self.first_sets {
+            let name = match symbol.kind {
+                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
+                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
+                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
+                SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
+            };
+            write!(
+                f,
+                "    first({:?}): {}\n",
+                name,
+                TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar)
+            )?;
+        }
+        write!(f, "  }}\n")?;
+
+        write!(f, "  last_sets: {{\n")?;
+        for (symbol, last_set) in &self.last_sets {
+            let name = match symbol.kind {
+                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
+                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
+                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
+                SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
+            };
+            write!(
+                f,
+                "    last({:?}): {}\n",
+                name,
+                TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar)
+            )?;
+        }
+        write!(f, "  }}\n")?;
+
+        write!(f, "  additions: {{\n")?;
+        for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
+            write!(f, "    {}: {{\n", variable.name)?;
+            for addition in &self.transitive_closure_additions[i] {
+                write!(
+                    f,
+                    "      {}\n",
+                    ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/minimize_parse_table.rs
@ -0,0 +1,511 @@
+use super::token_conflicts::TokenConflictMap;
+use crate::generate::dedup::split_state_id_groups;
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::generate::rules::{AliasMap, Symbol, TokenSet};
+use crate::generate::tables::{
+    GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+};
+use log::info;
+use std::collections::{HashMap, HashSet};
+use std::mem;
+
+pub(crate) fn minimize_parse_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    simple_aliases: &AliasMap,
+    token_conflict_map: &TokenConflictMap,
+    keywords: &TokenSet,
+) {
+    let mut minimizer = Minimizer {
+        parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        token_conflict_map,
+        keywords,
+        simple_aliases,
+    };
+    minimizer.merge_compatible_states();
+    minimizer.remove_unit_reductions();
+    minimizer.remove_unused_states();
+    minimizer.reorder_states_by_descending_size();
+}
+
+struct Minimizer<'a> {
+    parse_table: &'a mut ParseTable,
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    token_conflict_map: &'a TokenConflictMap<'a>,
+    keywords: &'a TokenSet,
+    simple_aliases: &'a AliasMap,
+}
+
+impl<'a> Minimizer<'a> {
+    fn remove_unit_reductions(&mut self) {
+        let mut aliased_symbols = HashSet::new();
+        for variable in &self.syntax_grammar.variables {
+            for production in &variable.productions {
+                for step in &production.steps {
+                    if step.alias.is_some() {
+                        aliased_symbols.insert(step.symbol);
+                    }
+                }
+            }
+        }
+
+        let mut unit_reduction_symbols_by_state = HashMap::new();
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            let mut only_unit_reductions = true;
+            let mut unit_reduction_symbol = None;
+            for (_, entry) in &state.terminal_entries {
+                for action in &entry.actions {
+                    match action {
+                        ParseAction::ShiftExtra => continue,
+                        ParseAction::Reduce {
+                            child_count: 1,
+                            production_id: 0,
+                            symbol,
+                            ..
+                        } => {
+                            if !self.simple_aliases.contains_key(&symbol)
+                                && !self.syntax_grammar.supertype_symbols.contains(&symbol)
+                                && !aliased_symbols.contains(&symbol)
+                                && self.syntax_grammar.variables[symbol.index].kind
+                                    != VariableType::Named
+                                && (unit_reduction_symbol.is_none()
+                                    || unit_reduction_symbol == Some(symbol))
+                            {
+                                unit_reduction_symbol = Some(symbol);
+                                continue;
+                            }
+                        }
+                        _ => {}
+                    }
+                    only_unit_reductions = false;
+                    break;
+                }
+
+                if !only_unit_reductions {
+                    break;
+                }
+            }
+
+            if let Some(symbol) = unit_reduction_symbol {
+                if only_unit_reductions {
+                    unit_reduction_symbols_by_state.insert(i, *symbol);
+                }
+            }
+        }
+
+        for state in self.parse_table.states.iter_mut() {
+            let mut done = false;
+            while !done {
+                done = true;
+                state.update_referenced_states(|other_state_id, state| {
+                    if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
+                        done = false;
+                        match state.nonterminal_entries.get(symbol) {
+                            Some(GotoAction::Goto(state_id)) => *state_id,
+                            _ => other_state_id,
+                        }
+                    } else {
+                        other_state_id
+                    }
+                })
+            }
+        }
+    }
+
+    fn merge_compatible_states(&mut self) {
+        let core_count = 1 + self
+            .parse_table
+            .states
+            .iter()
+            .map(|state| state.core_id)
+            .max()
+            .unwrap();
+
+        // Initially group the states by their parse item set core.
+        let mut group_ids_by_state_id = Vec::with_capacity(self.parse_table.states.len());
+        let mut state_ids_by_group_id = vec![Vec::<ParseStateId>::new(); core_count];
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            state_ids_by_group_id[state.core_id].push(i);
+            group_ids_by_state_id.push(state.core_id);
+        }
+
+        split_state_id_groups(
+            &self.parse_table.states,
+            &mut state_ids_by_group_id,
+            &mut group_ids_by_state_id,
+            0,
+            |left, right, groups| self.states_conflict(left, right, groups),
+        );
+
+        while split_state_id_groups(
+            &self.parse_table.states,
+            &mut state_ids_by_group_id,
+            &mut group_ids_by_state_id,
+            0,
+            |left, right, groups| self.state_successors_differ(left, right, groups),
+        ) {
+            continue;
+        }
+
+        let error_group_index = state_ids_by_group_id
+            .iter()
+            .position(|g| g.contains(&0))
+            .unwrap();
+        let start_group_index = state_ids_by_group_id
+            .iter()
+            .position(|g| g.contains(&1))
+            .unwrap();
+        state_ids_by_group_id.swap(error_group_index, 0);
+        state_ids_by_group_id.swap(start_group_index, 1);
+
+        // Create a list of new parse states: one state for each group of old states.
+        let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
+        for state_ids in &state_ids_by_group_id {
+            // Initialize the new state based on the first old state in the group.
+            let mut parse_state = ParseState::default();
+            mem::swap(&mut parse_state, &mut self.parse_table.states[state_ids[0]]);
+
+            // Extend the new state with all of the actions from the other old states
+            // in the group.
+            for state_id in &state_ids[1..] {
+                let mut other_parse_state = ParseState::default();
+                mem::swap(
+                    &mut other_parse_state,
+                    &mut self.parse_table.states[*state_id],
+                );
+
+                parse_state
+                    .terminal_entries
+                    .extend(other_parse_state.terminal_entries);
+                parse_state
+                    .nonterminal_entries
+                    .extend(other_parse_state.nonterminal_entries);
+            }
+
+            // Update the new state's outgoing references using the new grouping.
+            parse_state.update_referenced_states(|state_id, _| group_ids_by_state_id[state_id]);
+            new_states.push(parse_state);
+        }
+
+        self.parse_table.states = new_states;
+    }
+
+    fn states_conflict(
+        &self,
+        left_state: &ParseState,
+        right_state: &ParseState,
+        group_ids_by_state_id: &Vec<ParseStateId>,
+    ) -> bool {
+        for (token, left_entry) in &left_state.terminal_entries {
+            if let Some(right_entry) = right_state.terminal_entries.get(token) {
+                if self.entries_conflict(
+                    left_state.id,
+                    right_state.id,
+                    token,
+                    left_entry,
+                    right_entry,
+                    group_ids_by_state_id,
+                ) {
+                    return true;
+                }
+            } else if self.token_conflicts(
+                left_state.id,
+                right_state.id,
+                right_state.terminal_entries.keys(),
+                *token,
+            ) {
+                return true;
+            }
+        }
+
+        for token in right_state.terminal_entries.keys() {
+            if !left_state.terminal_entries.contains_key(token) {
+                if self.token_conflicts(
+                    left_state.id,
+                    right_state.id,
+                    left_state.terminal_entries.keys(),
+                    *token,
+                ) {
+                    return true;
+                }
+            }
+        }
+
+        false
+    }
+
+    fn state_successors_differ(
+        &self,
+        state1: &ParseState,
+        state2: &ParseState,
+        group_ids_by_state_id: &Vec<ParseStateId>,
+    ) -> bool {
+        for (token, entry1) in &state1.terminal_entries {
+            if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() {
+                if let Some(entry2) = state2.terminal_entries.get(token) {
+                    if let ParseAction::Shift { state: s2, .. } = entry2.actions.last().unwrap() {
+                        let group1 = group_ids_by_state_id[*s1];
+                        let group2 = group_ids_by_state_id[*s2];
+                        if group1 != group2 {
+                            info!(
+                                "split states {} {} - successors for {} are split: {} {}",
+                                state1.id,
+                                state2.id,
+                                self.symbol_name(token),
+                                s1,
+                                s2,
+                            );
+                            return true;
+                        }
+                    }
+                }
+            }
+        }
+
+        for (symbol, s1) in &state1.nonterminal_entries {
+            if let Some(s2) = state2.nonterminal_entries.get(symbol) {
+                match (s1, s2) {
+                    (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
+                    (GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
+                        let group1 = group_ids_by_state_id[*s1];
+                        let group2 = group_ids_by_state_id[*s2];
+                        if group1 != group2 {
+                            info!(
+                                "split states {} {} - successors for {} are split: {} {}",
+                                state1.id,
+                                state2.id,
+                                self.symbol_name(symbol),
+                                s1,
+                                s2,
+                            );
+                            return true;
+                        }
+                    }
+                    _ => return true,
+                }
+            }
+        }
+
+        false
+    }
+
+    fn entries_conflict(
+        &self,
+        state_id1: ParseStateId,
+        state_id2: ParseStateId,
+        token: &Symbol,
+        entry1: &ParseTableEntry,
+        entry2: &ParseTableEntry,
+        group_ids_by_state_id: &Vec<ParseStateId>,
+    ) -> bool {
+        // To be compatible, entries need to have the same actions.
+        let actions1 = &entry1.actions;
+        let actions2 = &entry2.actions;
+        if actions1.len() != actions2.len() {
+            info!(
+                "split states {} {} - differing action counts for token {}",
+                state_id1,
+                state_id2,
+                self.symbol_name(token)
+            );
+            return true;
+        }
+
+        for (i, action1) in actions1.iter().enumerate() {
+            let action2 = &actions2[i];
+
+            // Two shift actions are equivalent if their destinations are in the same group.
+            if let (
+                ParseAction::Shift {
+                    state: s1,
+                    is_repetition: is_repetition1,
+                },
+                ParseAction::Shift {
+                    state: s2,
+                    is_repetition: is_repetition2,
+                },
+            ) = (action1, action2)
+            {
+                let group1 = group_ids_by_state_id[*s1];
+                let group2 = group_ids_by_state_id[*s2];
+                if group1 == group2 && is_repetition1 == is_repetition2 {
+                    continue;
+                } else {
+                    info!(
+                        "split states {} {} - successors for {} are split: {} {}",
+                        state_id1,
+                        state_id2,
+                        self.symbol_name(token),
+                        s1,
+                        s2,
+                    );
+                    return true;
+                }
+            } else if action1 != action2 {
+                info!(
+                    "split states {} {} - unequal actions for {}",
+                    state_id1,
+                    state_id2,
+                    self.symbol_name(token),
+                );
+                return true;
+            }
+        }
+
+        false
+    }
+
+    fn token_conflicts<'b>(
+        &self,
+        left_id: ParseStateId,
+        right_id: ParseStateId,
+        existing_tokens: impl Iterator<Item = &'b Symbol>,
+        new_token: Symbol,
+    ) -> bool {
+        if new_token == Symbol::end_of_nonterminal_extra() {
+            info!(
+                "split states {} {} - end of non-terminal extra",
+                left_id, right_id,
+            );
+            return true;
+        }
+
+        // Do not add external tokens; they could conflict lexically with any of the state's
+        // existing lookahead tokens.
+        if new_token.is_external() {
+            info!(
+                "split states {} {} - external token {}",
+                left_id,
+                right_id,
+                self.symbol_name(&new_token),
+            );
+            return true;
+        }
+
+        // Do not add tokens which are both internal and external. Their validity could
+        // influence the behavior of the external scanner.
+        if self
+            .syntax_grammar
+            .external_tokens
+            .iter()
+            .any(|external| external.corresponding_internal_token == Some(new_token))
+        {
+            info!(
+                "split states {} {} - internal/external token {}",
+                left_id,
+                right_id,
+                self.symbol_name(&new_token),
+            );
+            return true;
+        }
+
+        // Do not add a token if it conflicts with an existing token.
+        for token in existing_tokens {
+            if token.is_terminal() {
+                if !(self.syntax_grammar.word_token == Some(*token)
+                    && self.keywords.contains(&new_token))
+                    && !(self.syntax_grammar.word_token == Some(new_token)
+                        && self.keywords.contains(token))
+                    && (self
+                        .token_conflict_map
+                        .does_conflict(new_token.index, token.index)
+                        || self
+                            .token_conflict_map
+                            .does_match_same_string(new_token.index, token.index))
+                {
+                    info!(
+                        "split states {} {} - token {} conflicts with {}",
+                        left_id,
+                        right_id,
+                        self.symbol_name(&new_token),
+                        self.symbol_name(token),
+                    );
+                    return true;
+                }
+            }
+        }
+
+        false
+    }
+
+    fn symbol_name(&self, symbol: &Symbol) -> &String {
+        if symbol.is_non_terminal() {
+            &self.syntax_grammar.variables[symbol.index].name
+        } else if symbol.is_external() {
+            &self.syntax_grammar.external_tokens[symbol.index].name
+        } else {
+            &self.lexical_grammar.variables[symbol.index].name
+        }
+    }
+
+    fn remove_unused_states(&mut self) {
+        let mut state_usage_map = vec![false; self.parse_table.states.len()];
+
+        state_usage_map[0] = true;
+        state_usage_map[1] = true;
+
+        for state in &self.parse_table.states {
+            for referenced_state in state.referenced_states() {
+                state_usage_map[referenced_state] = true;
+            }
+        }
+        let mut removed_predecessor_count = 0;
+        let mut state_replacement_map = vec![0; self.parse_table.states.len()];
+        for state_id in 0..self.parse_table.states.len() {
+            state_replacement_map[state_id] = state_id - removed_predecessor_count;
+            if !state_usage_map[state_id] {
+                removed_predecessor_count += 1;
+            }
+        }
+        let mut state_id = 0;
+        let mut original_state_id = 0;
+        while state_id < self.parse_table.states.len() {
+            if state_usage_map[original_state_id] {
+                self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
+                    state_replacement_map[other_state_id]
+                });
+                state_id += 1;
+            } else {
+                self.parse_table.states.remove(state_id);
+            }
+            original_state_id += 1;
+        }
+    }
+
+    fn reorder_states_by_descending_size(&mut self) {
+        // Get a mapping of old state index -> new_state_index
+        let mut old_ids_by_new_id = (0..self.parse_table.states.len()).collect::<Vec<_>>();
+        old_ids_by_new_id.sort_unstable_by_key(|i| {
+            // Don't changes states 0 (the error state) or 1 (the start state).
+            if *i <= 1 {
+                return *i as i64 - 1_000_000;
+            }
+
+            // Reorder all the other states by descending symbol count.
+            let state = &self.parse_table.states[*i];
+            -((state.terminal_entries.len() + state.nonterminal_entries.len()) as i64)
+        });
+
+        // Get the inverse mapping
+        let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
+        for (id, old_id) in old_ids_by_new_id.iter().enumerate() {
+            new_ids_by_old_id[*old_id] = id;
+        }
+
+        // Reorder the parse states and update their references to reflect
+        // the new ordering.
+        self.parse_table.states = old_ids_by_new_id
+            .iter()
+            .map(|old_id| {
+                let mut state = ParseState::default();
+                mem::swap(&mut state, &mut self.parse_table.states[*old_id]);
+                state.update_referenced_states(|id, _| new_ids_by_old_id[id]);
+                state
+            })
+            .collect();
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/mod.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/mod.rs
@ -0,0 +1,479 @@
+pub(crate) mod build_lex_table;
+pub(crate) mod build_parse_table;
+mod coincident_tokens;
+mod item;
+mod item_set_builder;
+mod minimize_parse_table;
+mod token_conflicts;
+
+use self::build_lex_table::build_lex_table;
+use self::build_parse_table::{build_parse_table, ParseStateInfo};
+use self::coincident_tokens::CoincidentTokenIndex;
+use self::minimize_parse_table::minimize_parse_table;
+use self::token_conflicts::TokenConflictMap;
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::NfaCursor;
+use crate::generate::node_types::VariableInfo;
+use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
+use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
+use anyhow::Result;
+use log::info;
+use std::collections::{BTreeSet, HashMap};
+
+pub(crate) fn build_tables(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    simple_aliases: &AliasMap,
+    variable_info: &Vec<VariableInfo>,
+    inlines: &InlinedProductionMap,
+    report_symbol_name: Option<&str>,
+) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
+    let (mut parse_table, following_tokens, parse_state_info) =
+        build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
+    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
+    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
+    let keywords = identify_keywords(
+        lexical_grammar,
+        &parse_table,
+        syntax_grammar.word_token,
+        &token_conflict_map,
+        &coincident_token_index,
+    );
+    populate_error_state(
+        &mut parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        &coincident_token_index,
+        &token_conflict_map,
+        &keywords,
+    );
+    populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
+    minimize_parse_table(
+        &mut parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+        &token_conflict_map,
+        &keywords,
+    );
+    let (main_lex_table, keyword_lex_table) = build_lex_table(
+        &mut parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        &keywords,
+        &coincident_token_index,
+        &token_conflict_map,
+    );
+    populate_external_lex_states(&mut parse_table, syntax_grammar);
+    mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
+
+    if let Some(report_symbol_name) = report_symbol_name {
+        report_state_info(
+            &syntax_grammar,
+            &lexical_grammar,
+            &parse_table,
+            &parse_state_info,
+            report_symbol_name,
+        );
+    }
+    Ok((
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        syntax_grammar.word_token,
+    ))
+}
+
+fn populate_error_state(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    coincident_token_index: &CoincidentTokenIndex,
+    token_conflict_map: &TokenConflictMap,
+    keywords: &TokenSet,
+) {
+    let state = &mut parse_table.states[0];
+    let n = lexical_grammar.variables.len();
+
+    // First identify the *conflict-free tokens*: tokens that do not overlap with
+    // any other token in any way, besides matching exactly the same string.
+    let conflict_free_tokens: TokenSet = (0..n)
+        .into_iter()
+        .filter_map(|i| {
+            let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
+                j != i
+                    && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
+                    && token_conflict_map.does_match_shorter_or_longer(i, j)
+            });
+            if conflicts_with_other_tokens {
+                None
+            } else {
+                info!(
+                    "error recovery - token {} has no conflicts",
+                    lexical_grammar.variables[i].name
+                );
+                Some(Symbol::terminal(i))
+            }
+        })
+        .collect();
+
+    let recover_entry = ParseTableEntry {
+        reusable: false,
+        actions: vec![ParseAction::Recover],
+    };
+
+    // Exclude from the error-recovery state any token that conflicts with one of
+    // the *conflict-free tokens* identified above.
+    for i in 0..n {
+        let symbol = Symbol::terminal(i);
+        if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) {
+            if syntax_grammar.word_token != Some(symbol) {
+                if let Some(t) = conflict_free_tokens.iter().find(|t| {
+                    !coincident_token_index.contains(symbol, *t)
+                        && token_conflict_map.does_conflict(symbol.index, t.index)
+                }) {
+                    info!(
+                        "error recovery - exclude token {} because of conflict with {}",
+                        lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
+                    );
+                    continue;
+                }
+            }
+        }
+        info!(
+            "error recovery - include token {}",
+            lexical_grammar.variables[i].name
+        );
+        state
+            .terminal_entries
+            .entry(symbol)
+            .or_insert_with(|| recover_entry.clone());
+    }
+
+    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
+        if external_token.corresponding_internal_token.is_none() {
+            state
+                .terminal_entries
+                .entry(Symbol::external(i))
+                .or_insert_with(|| recover_entry.clone());
+        }
+    }
+
+    state.terminal_entries.insert(Symbol::end(), recover_entry);
+}
+
+fn populate_used_symbols(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+) {
+    let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
+    let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
+    let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
+    for state in &parse_table.states {
+        for symbol in state.terminal_entries.keys() {
+            match symbol.kind {
+                SymbolType::Terminal => terminal_usages[symbol.index] = true,
+                SymbolType::External => external_usages[symbol.index] = true,
+                _ => {}
+            }
+        }
+        for symbol in state.nonterminal_entries.keys() {
+            non_terminal_usages[symbol.index] = true;
+        }
+    }
+    parse_table.symbols.push(Symbol::end());
+    for (i, value) in terminal_usages.into_iter().enumerate() {
+        if value {
+            // Assign the grammar's word token a low numerical index. This ensures that
+            // it can be stored in a subtree with no heap allocations, even for grammars with
+            // very large numbers of tokens. This is an optimization, but it's also important to
+            // ensure that a subtree's symbol can be successfully reassigned to the word token
+            // without having to move the subtree to the heap.
+            // See https://github.com/tree-sitter/tree-sitter/issues/258
+            if syntax_grammar.word_token.map_or(false, |t| t.index == i) {
+                parse_table.symbols.insert(1, Symbol::terminal(i));
+            } else {
+                parse_table.symbols.push(Symbol::terminal(i));
+            }
+        }
+    }
+    for (i, value) in external_usages.into_iter().enumerate() {
+        if value {
+            parse_table.symbols.push(Symbol::external(i));
+        }
+    }
+    for (i, value) in non_terminal_usages.into_iter().enumerate() {
+        if value {
+            parse_table.symbols.push(Symbol::non_terminal(i));
+        }
+    }
+}
+
+fn populate_external_lex_states(parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar) {
+    let mut external_tokens_by_corresponding_internal_token = HashMap::new();
+    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
+        if let Some(symbol) = external_token.corresponding_internal_token {
+            external_tokens_by_corresponding_internal_token.insert(symbol.index, i);
+        }
+    }
+
+    // Ensure that external lex state 0 represents the absence of any
+    // external tokens.
+    parse_table.external_lex_states.push(TokenSet::new());
+
+    for i in 0..parse_table.states.len() {
+        let mut external_tokens = TokenSet::new();
+        for token in parse_table.states[i].terminal_entries.keys() {
+            if token.is_external() {
+                external_tokens.insert(*token);
+            } else if token.is_terminal() {
+                if let Some(index) =
+                    external_tokens_by_corresponding_internal_token.get(&token.index)
+                {
+                    external_tokens.insert(Symbol::external(*index));
+                }
+            }
+        }
+
+        parse_table.states[i].external_lex_state_id = parse_table
+            .external_lex_states
+            .iter()
+            .position(|tokens| *tokens == external_tokens)
+            .unwrap_or_else(|| {
+                parse_table.external_lex_states.push(external_tokens);
+                parse_table.external_lex_states.len() - 1
+            });
+    }
+}
+
+fn identify_keywords(
+    lexical_grammar: &LexicalGrammar,
+    parse_table: &ParseTable,
+    word_token: Option<Symbol>,
+    token_conflict_map: &TokenConflictMap,
+    coincident_token_index: &CoincidentTokenIndex,
+) -> TokenSet {
+    if word_token.is_none() {
+        return TokenSet::new();
+    }
+
+    let word_token = word_token.unwrap();
+    let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
+
+    // First find all of the candidate keyword tokens: tokens that start with
+    // letters or underscore and can match the same string as a word token.
+    let keyword_candidates: TokenSet = lexical_grammar
+        .variables
+        .iter()
+        .enumerate()
+        .filter_map(|(i, variable)| {
+            cursor.reset(vec![variable.start_state]);
+            if all_chars_are_alphabetical(&cursor)
+                && token_conflict_map.does_match_same_string(i, word_token.index)
+                && !token_conflict_map.does_match_different_string(i, word_token.index)
+            {
+                info!(
+                    "Keywords - add candidate {}",
+                    lexical_grammar.variables[i].name
+                );
+                Some(Symbol::terminal(i))
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    // Exclude keyword candidates that shadow another keyword candidate.
+    let keywords: TokenSet = keyword_candidates
+        .iter()
+        .filter(|token| {
+            for other_token in keyword_candidates.iter() {
+                if other_token != *token
+                    && token_conflict_map.does_match_same_string(other_token.index, token.index)
+                {
+                    info!(
+                        "Keywords - exclude {} because it matches the same string as {}",
+                        lexical_grammar.variables[token.index].name,
+                        lexical_grammar.variables[other_token.index].name
+                    );
+                    return false;
+                }
+            }
+            true
+        })
+        .collect();
+
+    // Exclude keyword candidates for which substituting the keyword capture
+    // token would introduce new lexical conflicts with other tokens.
+    let keywords = keywords
+        .iter()
+        .filter(|token| {
+            for other_index in 0..lexical_grammar.variables.len() {
+                if keyword_candidates.contains(&Symbol::terminal(other_index)) {
+                    continue;
+                }
+
+                // If the word token was already valid in every state containing
+                // this keyword candidate, then substituting the word token won't
+                // introduce any new lexical conflicts.
+                if coincident_token_index
+                    .states_with(*token, Symbol::terminal(other_index))
+                    .iter()
+                    .all(|state_id| {
+                        parse_table.states[*state_id]
+                            .terminal_entries
+                            .contains_key(&word_token)
+                    })
+                {
+                    continue;
+                }
+
+                if !token_conflict_map.has_same_conflict_status(
+                    token.index,
+                    word_token.index,
+                    other_index,
+                ) {
+                    info!(
+                        "Keywords - exclude {} because of conflict with {}",
+                        lexical_grammar.variables[token.index].name,
+                        lexical_grammar.variables[other_index].name
+                    );
+                    return false;
+                }
+            }
+
+            info!(
+                "Keywords - include {}",
+                lexical_grammar.variables[token.index].name,
+            );
+            true
+        })
+        .collect();
+
+    keywords
+}
+
+fn mark_fragile_tokens(
+    parse_table: &mut ParseTable,
+    lexical_grammar: &LexicalGrammar,
+    token_conflict_map: &TokenConflictMap,
+) {
+    let n = lexical_grammar.variables.len();
+    let mut valid_tokens_mask = Vec::with_capacity(n);
+    for state in parse_table.states.iter_mut() {
+        valid_tokens_mask.clear();
+        valid_tokens_mask.resize(n, false);
+        for token in state.terminal_entries.keys() {
+            if token.is_terminal() {
+                valid_tokens_mask[token.index] = true;
+            }
+        }
+        for (token, entry) in state.terminal_entries.iter_mut() {
+            if token.is_terminal() {
+                for (i, is_valid) in valid_tokens_mask.iter().enumerate() {
+                    if *is_valid {
+                        if token_conflict_map.does_overlap(i, token.index) {
+                            entry.reusable = false;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn report_state_info<'a>(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    parse_table: &ParseTable,
+    parse_state_info: &Vec<ParseStateInfo<'a>>,
+    report_symbol_name: &'a str,
+) {
+    let mut all_state_indices = BTreeSet::new();
+    let mut symbols_with_state_indices = (0..syntax_grammar.variables.len())
+        .map(|i| (Symbol::non_terminal(i), BTreeSet::new()))
+        .collect::<Vec<_>>();
+
+    for (i, state) in parse_table.states.iter().enumerate() {
+        all_state_indices.insert(i);
+        let item_set = &parse_state_info[state.id];
+        for (item, _) in item_set.1.entries.iter() {
+            if !item.is_augmented() {
+                symbols_with_state_indices[item.variable_index as usize]
+                    .1
+                    .insert(i);
+            }
+        }
+    }
+
+    symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32));
+
+    let max_symbol_name_length = syntax_grammar
+        .variables
+        .iter()
+        .map(|v| v.name.len())
+        .max()
+        .unwrap();
+    for (symbol, states) in &symbols_with_state_indices {
+        eprintln!(
+            "{:width$}\t{}",
+            syntax_grammar.variables[symbol.index].name,
+            states.len(),
+            width = max_symbol_name_length
+        );
+    }
+    eprintln!("");
+
+    let state_indices = if report_symbol_name == "*" {
+        Some(&all_state_indices)
+    } else {
+        symbols_with_state_indices
+            .iter()
+            .find_map(|(symbol, state_indices)| {
+                if syntax_grammar.variables[symbol.index].name == report_symbol_name {
+                    Some(state_indices)
+                } else {
+                    None
+                }
+            })
+    };
+
+    if let Some(state_indices) = state_indices {
+        let mut state_indices = state_indices.into_iter().cloned().collect::<Vec<_>>();
+        state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));
+
+        for state_index in state_indices {
+            let id = parse_table.states[state_index].id;
+            let (preceding_symbols, item_set) = &parse_state_info[id];
+            eprintln!("state index: {}", state_index);
+            eprintln!("state id: {}", id);
+            eprint!("symbol sequence:");
+            for symbol in preceding_symbols {
+                let name = if symbol.is_terminal() {
+                    &lexical_grammar.variables[symbol.index].name
+                } else if symbol.is_external() {
+                    &syntax_grammar.external_tokens[symbol.index].name
+                } else {
+                    &syntax_grammar.variables[symbol.index].name
+                };
+                eprint!(" {}", name);
+            }
+            eprintln!(
+                "\nitems:\n{}",
+                self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,),
+            );
+        }
+    }
+}
+
+fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
+    cursor.transition_chars().all(|(chars, is_sep)| {
+        if is_sep {
+            true
+        } else {
+            chars.chars().all(|c| c.is_alphabetic() || c == '_')
+        }
+    })
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/token_conflicts.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/token_conflicts.rs
@ -0,0 +1,532 @@
+use crate::generate::build_tables::item::TokenSetDisplay;
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use crate::generate::rules::TokenSet;
+use std::cmp::Ordering;
+use std::collections::HashSet;
+use std::fmt;
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+struct TokenConflictStatus {
+    matches_prefix: bool,
+    does_match_continuation: bool,
+    does_match_valid_continuation: bool,
+    does_match_separators: bool,
+    matches_same_string: bool,
+    matches_different_string: bool,
+}
+
+pub(crate) struct TokenConflictMap<'a> {
+    n: usize,
+    status_matrix: Vec<TokenConflictStatus>,
+    following_tokens: Vec<TokenSet>,
+    starting_chars_by_index: Vec<CharacterSet>,
+    following_chars_by_index: Vec<CharacterSet>,
+    grammar: &'a LexicalGrammar,
+}
+
+impl<'a> TokenConflictMap<'a> {
+    /// Create a token conflict map based on a lexical grammar, which describes the structure
+    /// each token, and a `following_token` map, which indicates which tokens may be appear
+    /// immediately after each other token.
+    ///
+    /// This analyzes the possible kinds of overlap between each pair of tokens and stores
+    /// them in a matrix.
+    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
+        let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
+        let starting_chars = get_starting_chars(&mut cursor, grammar);
+        let following_chars = get_following_chars(&starting_chars, &following_tokens);
+
+        let n = grammar.variables.len();
+        let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
+        for i in 0..grammar.variables.len() {
+            for j in 0..i {
+                let status = compute_conflict_status(&mut cursor, grammar, &following_chars, i, j);
+                status_matrix[matrix_index(n, i, j)] = status.0;
+                status_matrix[matrix_index(n, j, i)] = status.1;
+            }
+        }
+
+        TokenConflictMap {
+            n,
+            status_matrix,
+            following_tokens,
+            starting_chars_by_index: starting_chars,
+            following_chars_by_index: following_chars,
+            grammar,
+        }
+    }
+
+    /// Does token `i` match any strings that token `j` also matches, such that token `i`
+    /// is preferred over token `j`?
+    pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
+        let left = &self.status_matrix[matrix_index(self.n, a, other)];
+        let right = &self.status_matrix[matrix_index(self.n, b, other)];
+        left == right
+    }
+
+    /// Does token `i` match any strings that token `j` does *not* match?
+    pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
+    }
+
+    /// Does token `i` match any strings that token `j` also matches, where
+    /// token `i` is preferred over token `j`?
+    pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
+    }
+
+    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
+        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
+        entry.does_match_valid_continuation
+            || entry.does_match_separators
+            || entry.matches_same_string
+    }
+
+    /// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
+    pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
+    }
+
+    pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
+        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
+        let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
+        (entry.does_match_valid_continuation || entry.does_match_separators)
+            && !reverse_entry.does_match_separators
+    }
+
+    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
+        let status = &self.status_matrix[matrix_index(self.n, i, j)];
+        status.does_match_separators
+            || status.matches_prefix
+            || status.matches_same_string
+            || status.does_match_continuation
+    }
+
+    pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
+        if left.0 > right.0 {
+            return true;
+        } else if left.0 < right.0 {
+            return false;
+        }
+
+        match grammar.variables[left.1]
+            .implicit_precedence
+            .cmp(&grammar.variables[right.1].implicit_precedence)
+        {
+            Ordering::Less => false,
+            Ordering::Greater => true,
+            Ordering::Equal => left.1 < right.1,
+        }
+    }
+
+    pub fn prefer_transition(
+        grammar: &LexicalGrammar,
+        t: &NfaTransition,
+        completed_id: usize,
+        completed_precedence: i32,
+        has_separator_transitions: bool,
+    ) -> bool {
+        if t.precedence < completed_precedence {
+            return false;
+        }
+        if t.precedence == completed_precedence {
+            if t.is_separator {
+                return false;
+            }
+            if has_separator_transitions
+                && grammar
+                    .variable_indices_for_nfa_states(&t.states)
+                    .position(|i| i == completed_id)
+                    .is_none()
+            {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+impl<'a> fmt::Debug for TokenConflictMap<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "TokenConflictMap {{\n")?;
+
+        let syntax_grammar = SyntaxGrammar::default();
+
+        write!(f, "  following_tokens: {{\n")?;
+        for (i, following_tokens) in self.following_tokens.iter().enumerate() {
+            write!(
+                f,
+                "    follow({:?}): {},\n",
+                self.grammar.variables[i].name,
+                TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar)
+            )?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  starting_characters: {{\n")?;
+        for i in 0..self.n {
+            write!(
+                f,
+                "    {:?}: {:?},\n",
+                self.grammar.variables[i].name, self.starting_chars_by_index[i]
+            )?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  following_characters: {{\n")?;
+        for i in 0..self.n {
+            write!(
+                f,
+                "    {:?}: {:?},\n",
+                self.grammar.variables[i].name, self.following_chars_by_index[i]
+            )?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  status_matrix: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {:?}: {{\n", self.grammar.variables[i].name)?;
+            for j in 0..self.n {
+                write!(
+                    f,
+                    "      {:?}: {:?},\n",
+                    self.grammar.variables[j].name,
+                    self.status_matrix[matrix_index(self.n, i, j)]
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
+
+fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
+    variable_count * i + j
+}
+
+fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<CharacterSet> {
+    let mut result = Vec::with_capacity(grammar.variables.len());
+    for variable in &grammar.variables {
+        cursor.reset(vec![variable.start_state]);
+        let mut all_chars = CharacterSet::empty();
+        for (chars, _) in cursor.transition_chars() {
+            all_chars = all_chars.add(chars);
+        }
+        result.push(all_chars);
+    }
+    result
+}
+
+fn get_following_chars(
+    starting_chars: &Vec<CharacterSet>,
+    following_tokens: &Vec<TokenSet>,
+) -> Vec<CharacterSet> {
+    following_tokens
+        .iter()
+        .map(|following_tokens| {
+            let mut chars = CharacterSet::empty();
+            for token in following_tokens.iter() {
+                if token.is_terminal() {
+                    chars = chars.add(&starting_chars[token.index]);
+                }
+            }
+            chars
+        })
+        .collect()
+}
+
+fn compute_conflict_status(
+    cursor: &mut NfaCursor,
+    grammar: &LexicalGrammar,
+    following_chars: &Vec<CharacterSet>,
+    i: usize,
+    j: usize,
+) -> (TokenConflictStatus, TokenConflictStatus) {
+    let mut visited_state_sets = HashSet::new();
+    let mut state_set_queue = vec![vec![
+        grammar.variables[i].start_state,
+        grammar.variables[j].start_state,
+    ]];
+    let mut result = (
+        TokenConflictStatus::default(),
+        TokenConflictStatus::default(),
+    );
+
+    while let Some(state_set) = state_set_queue.pop() {
+        let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);
+
+        // If only one of the two tokens could possibly match from this state, then
+        // there is no reason to analyze any of its successors. Just record the fact
+        // that the token matches a string that the other token does not match.
+        let first_live_variable_index = live_variable_indices.next().unwrap();
+        if live_variable_indices.count() == 0 {
+            if first_live_variable_index == i {
+                result.0.matches_different_string = true;
+            } else {
+                result.1.matches_different_string = true;
+            }
+            continue;
+        }
+
+        // Don't pursue states where there's no potential for conflict.
+        cursor.reset(state_set);
+        let within_separator = cursor.transition_chars().any(|(_, sep)| sep);
+
+        // Examine each possible completed token in this state.
+        let mut completion = None;
+        for (id, precedence) in cursor.completions() {
+            if within_separator {
+                if id == i {
+                    result.0.does_match_separators = true;
+                } else {
+                    result.1.does_match_separators = true;
+                }
+            }
+
+            // If the other token has already completed, then this is
+            // a same-string conflict.
+            if let Some((prev_id, prev_precedence)) = completion {
+                if id == prev_id {
+                    continue;
+                }
+
+                // Determine which of the two tokens is preferred.
+                let preferred_id;
+                if TokenConflictMap::prefer_token(
+                    grammar,
+                    (prev_precedence, prev_id),
+                    (precedence, id),
+                ) {
+                    preferred_id = prev_id;
+                } else {
+                    preferred_id = id;
+                    completion = Some((id, precedence));
+                }
+
+                if preferred_id == i {
+                    result.0.matches_same_string = true;
+                } else {
+                    result.1.matches_same_string = true;
+                }
+            } else {
+                completion = Some((id, precedence));
+            }
+        }
+
+        // Examine each possible transition from this state to detect substring conflicts.
+        for transition in cursor.transitions() {
+            let mut can_advance = true;
+
+            // If there is already a completed token in this state, then determine
+            // if the next state can also match the completed token. If so, then
+            // this is *not* a conflict.
+            if let Some((completed_id, completed_precedence)) = completion {
+                let mut advanced_id = None;
+                let mut successor_contains_completed_id = false;
+                for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) {
+                    if variable_id == completed_id {
+                        successor_contains_completed_id = true;
+                        break;
+                    } else {
+                        advanced_id = Some(variable_id);
+                    }
+                }
+
+                // Determine which action is preferred: matching the already complete
+                // token, or continuing on to try and match the other longer token.
+                if let (Some(advanced_id), false) = (advanced_id, successor_contains_completed_id) {
+                    if TokenConflictMap::prefer_transition(
+                        grammar,
+                        &transition,
+                        completed_id,
+                        completed_precedence,
+                        within_separator,
+                    ) {
+                        can_advance = true;
+                        if advanced_id == i {
+                            result.0.does_match_continuation = true;
+                            if transition.characters.does_intersect(&following_chars[j]) {
+                                result.0.does_match_valid_continuation = true;
+                            }
+                        } else {
+                            result.1.does_match_continuation = true;
+                            if transition.characters.does_intersect(&following_chars[i]) {
+                                result.1.does_match_valid_continuation = true;
+                            }
+                        }
+                    } else {
+                        if completed_id == i {
+                            result.0.matches_prefix = true;
+                        } else {
+                            result.1.matches_prefix = true;
+                        }
+                    }
+                }
+            }
+
+            if can_advance && visited_state_sets.insert(transition.states.clone()) {
+                state_set_queue.push(transition.states);
+            }
+        }
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::{Variable, VariableType};
+    use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
+    use crate::generate::rules::{Precedence, Rule, Symbol};
+
+    #[test]
+    fn test_starting_characters() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "token_0".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("[a-f]1|0x\\d"),
+                },
+                Variable {
+                    name: "token_1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("d*ef"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let token_map = TokenConflictMap::new(&grammar, Vec::new());
+
+        assert_eq!(
+            token_map.starting_chars_by_index[0],
+            CharacterSet::empty().add_range('a', 'f').add_char('0')
+        );
+        assert_eq!(
+            token_map.starting_chars_by_index[1],
+            CharacterSet::empty().add_range('d', 'e')
+        );
+    }
+
+    #[test]
+    fn test_token_conflicts() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "in".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("in"),
+                },
+                Variable {
+                    name: "identifier".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("\\w+"),
+                },
+                Variable {
+                    name: "instanceof".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("instanceof"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(
+            &grammar,
+            vec![
+                [Symbol::terminal(var("identifier"))]
+                    .iter()
+                    .cloned()
+                    .collect(),
+                [Symbol::terminal(var("in"))].iter().cloned().collect(),
+                [Symbol::terminal(var("identifier"))]
+                    .iter()
+                    .cloned()
+                    .collect(),
+            ],
+        );
+
+        // Given the string "in", the `in` token is preferred over the `identifier` token
+        assert!(token_map.does_match_same_string(var("in"), var("identifier")));
+        assert!(!token_map.does_match_same_string(var("identifier"), var("in")));
+
+        // Depending on what character follows, the string "in" may be treated as part of an
+        // `identifier` token.
+        assert!(token_map.does_conflict(var("identifier"), var("in")));
+
+        // Depending on what character follows, the string "instanceof" may be treated as part of
+        // an `identifier` token.
+        assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
+        assert!(token_map.does_conflict(var("instanceof"), var("in")));
+    }
+
+    #[test]
+    fn test_token_conflicts_with_separators() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: vec![Rule::pattern("\\s")],
+            variables: vec![
+                Variable {
+                    name: "x".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                Variable {
+                    name: "newline".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("\n"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
+
+        assert!(token_map.does_conflict(var("newline"), var("x")));
+        assert!(!token_map.does_conflict(var("x"), var("newline")));
+    }
+
+    #[test]
+    fn test_token_conflicts_with_open_ended_tokens() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: vec![Rule::pattern("\\s")],
+            variables: vec![
+                Variable {
+                    name: "x".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                Variable {
+                    name: "anything".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
+
+        assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
+        assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
+    }
+
+    fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
+        grammar
+            .variables
+            .iter()
+            .position(|v| v.name == name)
+            .unwrap()
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/char_tree.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/char_tree.rs
@ -0,0 +1,133 @@
+use std::ops::Range;
+
+/// A set of characters represented as a balanced binary tree of comparisons.
+/// This is used as an intermediate step in generating efficient code for
+/// matching a given character set.
+#[derive(PartialEq, Eq)]
+pub enum CharacterTree {
+    Yes,
+    Compare {
+        value: char,
+        operator: Comparator,
+        consequence: Option<Box<CharacterTree>>,
+        alternative: Option<Box<CharacterTree>>,
+    },
+}
+
+#[derive(PartialEq, Eq)]
+pub enum Comparator {
+    Less,
+    LessOrEqual,
+    Equal,
+    GreaterOrEqual,
+}
+
+impl CharacterTree {
+    pub fn from_ranges(ranges: &[Range<char>]) -> Option<Self> {
+        match ranges.len() {
+            0 => None,
+            1 => {
+                let range = &ranges[0];
+                if range.start == range.end {
+                    Some(CharacterTree::Compare {
+                        operator: Comparator::Equal,
+                        value: range.start,
+                        consequence: Some(Box::new(CharacterTree::Yes)),
+                        alternative: None,
+                    })
+                } else {
+                    Some(CharacterTree::Compare {
+                        operator: Comparator::GreaterOrEqual,
+                        value: range.start,
+                        consequence: Some(Box::new(CharacterTree::Compare {
+                            operator: Comparator::LessOrEqual,
+                            value: range.end,
+                            consequence: Some(Box::new(CharacterTree::Yes)),
+                            alternative: None,
+                        })),
+                        alternative: None,
+                    })
+                }
+            }
+            len => {
+                let mid = len / 2;
+                let mid_range = &ranges[mid];
+                Some(CharacterTree::Compare {
+                    operator: Comparator::Less,
+                    value: mid_range.start,
+                    consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new),
+                    alternative: Some(Box::new(CharacterTree::Compare {
+                        operator: Comparator::LessOrEqual,
+                        value: mid_range.end,
+                        consequence: Some(Box::new(CharacterTree::Yes)),
+                        alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new),
+                    })),
+                })
+            }
+        }
+    }
+
+    #[cfg(test)]
+    fn contains(&self, c: char) -> bool {
+        match self {
+            CharacterTree::Yes => true,
+            CharacterTree::Compare {
+                value,
+                operator,
+                alternative,
+                consequence,
+            } => {
+                let condition = match operator {
+                    Comparator::Less => c < *value,
+                    Comparator::LessOrEqual => c <= *value,
+                    Comparator::Equal => c == *value,
+                    Comparator::GreaterOrEqual => c >= *value,
+                };
+                if condition { consequence } else { alternative }
+                    .as_ref()
+                    .map_or(false, |a| a.contains(c))
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_character_tree_simple() {
+        let tree = CharacterTree::from_ranges(&['a'..'d', 'h'..'l', 'p'..'r', 'u'..'u', 'z'..'z'])
+            .unwrap();
+
+        assert!(tree.contains('a'));
+        assert!(tree.contains('b'));
+        assert!(tree.contains('c'));
+        assert!(tree.contains('d'));
+
+        assert!(!tree.contains('e'));
+        assert!(!tree.contains('f'));
+        assert!(!tree.contains('g'));
+
+        assert!(tree.contains('h'));
+        assert!(tree.contains('i'));
+        assert!(tree.contains('j'));
+        assert!(tree.contains('k'));
+        assert!(tree.contains('l'));
+
+        assert!(!tree.contains('m'));
+        assert!(!tree.contains('n'));
+        assert!(!tree.contains('o'));
+
+        assert!(tree.contains('p'));
+        assert!(tree.contains('q'));
+        assert!(tree.contains('r'));
+
+        assert!(!tree.contains('s'));
+        assert!(!tree.contains('s'));
+
+        assert!(tree.contains('u'));
+
+        assert!(!tree.contains('v'));
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/dedup.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/dedup.rs
@ -0,0 +1,63 @@
+pub(crate) fn split_state_id_groups<S>(
+    states: &Vec<S>,
+    state_ids_by_group_id: &mut Vec<Vec<usize>>,
+    group_ids_by_state_id: &mut Vec<usize>,
+    start_group_id: usize,
+    mut f: impl FnMut(&S, &S, &Vec<usize>) -> bool,
+) -> bool {
+    let mut result = false;
+
+    let mut group_id = start_group_id;
+    while group_id < state_ids_by_group_id.len() {
+        let state_ids = &state_ids_by_group_id[group_id];
+        let mut split_state_ids = Vec::new();
+
+        let mut i = 0;
+        while i < state_ids.len() {
+            let left_state_id = state_ids[i];
+            if split_state_ids.contains(&left_state_id) {
+                i += 1;
+                continue;
+            }
+
+            let left_state = &states[left_state_id];
+
+            // Identify all of the other states in the group that are incompatible with
+            // this state.
+            let mut j = i + 1;
+            while j < state_ids.len() {
+                let right_state_id = state_ids[j];
+                if split_state_ids.contains(&right_state_id) {
+                    j += 1;
+                    continue;
+                }
+                let right_state = &states[right_state_id];
+
+                if f(left_state, right_state, &group_ids_by_state_id) {
+                    split_state_ids.push(right_state_id);
+                }
+
+                j += 1;
+            }
+
+            i += 1;
+        }
+
+        // If any states were removed from the group, add them all as a new group.
+        if split_state_ids.len() > 0 {
+            result = true;
+            state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(&i));
+
+            let new_group_id = state_ids_by_group_id.len();
+            for id in &split_state_ids {
+                group_ids_by_state_id[*id] = new_group_id;
+            }
+
+            state_ids_by_group_id.push(split_state_ids);
+        }
+
+        group_id += 1;
+    }
+
+    result
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/dsl.js
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/dsl.js
@ -0,0 +1,418 @@
+function alias(rule, value) {
+  const result = {
+    type: "ALIAS",
+    content: normalize(rule),
+    named: false,
+    value: null
+  };
+
+  switch (value.constructor) {
+    case String:
+      result.named = false;
+      result.value = value;
+      return result;
+    case ReferenceError:
+      result.named = true;
+      result.value = value.symbol.name;
+      return result;
+    case Object:
+      if (typeof value.type === 'string' && value.type === 'SYMBOL') {
+        result.named = true;
+        result.value = value.name;
+        return result;
+      }
+  }
+
+  throw new Error('Invalid alias value ' + value);
+}
+
+function blank() {
+  return {
+    type: "BLANK"
+  };
+}
+
+function field(name, rule) {
+  return {
+    type: "FIELD",
+    name: name,
+    content: normalize(rule)
+  }
+}
+
+function choice(...elements) {
+  return {
+    type: "CHOICE",
+    members: elements.map(normalize)
+  };
+}
+
+function optional(value) {
+  checkArguments(arguments.length, optional, 'optional');
+  return choice(value, blank());
+}
+
+function prec(number, rule) {
+  checkPrecedence(number);
+  checkArguments(
+    arguments.length - 1,
+    prec,
+    'prec',
+    ' and a precedence argument'
+  );
+
+  return {
+    type: "PREC",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.left = function(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  checkPrecedence(number);
+  checkArguments(
+    arguments.length - 1,
+    prec.left,
+    'prec.left',
+    ' and an optional precedence argument'
+  );
+
+  return {
+    type: "PREC_LEFT",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.right = function(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  checkPrecedence(number);
+  checkArguments(
+    arguments.length - 1,
+    prec.right,
+    'prec.right',
+    ' and an optional precedence argument'
+  );
+
+  return {
+    type: "PREC_RIGHT",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.dynamic = function(number, rule) {
+  checkPrecedence(number);
+  checkArguments(
+    arguments.length - 1,
+    prec.dynamic,
+    'prec.dynamic',
+    ' and a precedence argument'
+  );
+
+  return {
+    type: "PREC_DYNAMIC",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+function repeat(rule) {
+  checkArguments(arguments.length, repeat, 'repeat');
+  return {
+    type: "REPEAT",
+    content: normalize(rule)
+  };
+}
+
+function repeat1(rule) {
+  checkArguments(arguments.length, repeat1, 'repeat1');
+  return {
+    type: "REPEAT1",
+    content: normalize(rule)
+  };
+}
+
+function seq(...elements) {
+  return {
+    type: "SEQ",
+    members: elements.map(normalize)
+  };
+}
+
+function sym(name) {
+  return {
+    type: "SYMBOL",
+    name: name
+  };
+}
+
+function token(value) {
+  return {
+    type: "TOKEN",
+    content: normalize(value)
+  };
+}
+
+token.immediate = function(value) {
+  return {
+    type: "IMMEDIATE_TOKEN",
+    content: normalize(value)
+  };
+}
+
+function normalize(value) {
+  if (typeof value == "undefined")
+    throw new Error("Undefined symbol");
+
+  switch (value.constructor) {
+    case String:
+      return {
+        type: 'STRING',
+        value
+      };
+    case RegExp:
+      return {
+        type: 'PATTERN',
+        value: value.source
+      };
+    case ReferenceError:
+      throw value
+    default:
+      if (typeof value.type === 'string') {
+        return value;
+      } else {
+        throw new TypeError("Invalid rule: " + value.toString());
+      }
+  }
+}
+
+function RuleBuilder(ruleMap) {
+  return new Proxy({}, {
+    get(target, propertyName) {
+      const symbol = sym(propertyName);
+
+      if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
+        return symbol;
+      } else {
+        const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
+        error.symbol = symbol;
+        return error;
+      }
+    }
+  })
+}
+
+function grammar(baseGrammar, options) {
+  if (!options) {
+    options = baseGrammar;
+    baseGrammar = {
+      name: null,
+      rules: {},
+      extras: [normalize(/\s/)],
+      conflicts: [],
+      externals: [],
+      inline: [],
+      supertypes: [],
+      precedences: [],
+    };
+  }
+
+  let externals = baseGrammar.externals;
+  if (options.externals) {
+    if (typeof options.externals !== "function") {
+      throw new Error("Grammar's 'externals' property must be a function.");
+    }
+
+    const externalsRuleBuilder = RuleBuilder(null)
+    const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
+
+    if (!Array.isArray(externalRules)) {
+      throw new Error("Grammar's 'externals' property must return an array of rules.");
+    }
+
+    externals = externalRules.map(normalize);
+  }
+
+  const ruleMap = {};
+  for (const key in options.rules) {
+    ruleMap[key] = true;
+  }
+  for (const key in baseGrammar.rules) {
+    ruleMap[key] = true;
+  }
+  for (const external of externals) {
+    if (typeof external.name === 'string') {
+      ruleMap[external.name] = true;
+    }
+  }
+
+  const ruleBuilder = RuleBuilder(ruleMap);
+
+  const name = options.name;
+  if (typeof name !== "string") {
+    throw new Error("Grammar's 'name' property must be a string.");
+  }
+
+  if (!/^[a-zA-Z_]\w*$/.test(name)) {
+    throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
+  }
+
+  let rules = Object.assign({}, baseGrammar.rules);
+  if (options.rules) {
+    if (typeof options.rules !== "object") {
+      throw new Error("Grammar's 'rules' property must be an object.");
+    }
+
+    for (const ruleName in options.rules) {
+      const ruleFn = options.rules[ruleName];
+      if (typeof ruleFn !== "function") {
+        throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
+      }
+      rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
+    }
+  }
+
+  let extras = baseGrammar.extras.slice();
+  if (options.extras) {
+    if (typeof options.extras !== "function") {
+      throw new Error("Grammar's 'extras' property must be a function.");
+    }
+
+    extras = options.extras
+      .call(ruleBuilder, ruleBuilder, baseGrammar.extras)
+
+    if (!Array.isArray(extras)) {
+      throw new Error("Grammar's 'extras' function must return an array.")
+    }
+
+    extras = extras.map(normalize);
+  }
+
+  let word = baseGrammar.word;
+  if (options.word) {
+    word = options.word.call(ruleBuilder, ruleBuilder).name;
+    if (typeof word != 'string') {
+      throw new Error("Grammar's 'word' property must be a named rule.");
+    }
+  }
+
+  let conflicts = baseGrammar.conflicts;
+  if (options.conflicts) {
+    if (typeof options.conflicts !== "function") {
+      throw new Error("Grammar's 'conflicts' property must be a function.");
+    }
+
+    const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
+    const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
+
+    if (!Array.isArray(conflictRules)) {
+      throw new Error("Grammar's conflicts must be an array of arrays of rules.");
+    }
+
+    conflicts = conflictRules.map(conflictSet => {
+      if (!Array.isArray(conflictSet)) {
+        throw new Error("Grammar's conflicts must be an array of arrays of rules.");
+      }
+
+      return conflictSet.map(symbol => normalize(symbol).name);
+    });
+  }
+
+  let inline = baseGrammar.inline;
+  if (options.inline) {
+    if (typeof options.inline !== "function") {
+      throw new Error("Grammar's 'inline' property must be a function.");
+    }
+
+    const baseInlineRules = baseGrammar.inline.map(sym);
+    const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
+
+    if (!Array.isArray(inlineRules)) {
+      throw new Error("Grammar's inline must be an array of rules.");
+    }
+
+    inline = inlineRules.map(symbol => symbol.name);
+  }
+
+  let supertypes = baseGrammar.supertypes;
+  if (options.supertypes) {
+    if (typeof options.supertypes !== "function") {
+      throw new Error("Grammar's 'supertypes' property must be a function.");
+    }
+
+    const baseSupertypeRules = baseGrammar.supertypes.map(sym);
+    const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules);
+
+    if (!Array.isArray(supertypeRules)) {
+      throw new Error("Grammar's supertypes must be an array of rules.");
+    }
+
+    supertypes = supertypeRules.map(symbol => symbol.name);
+  }
+
+  let precedences = baseGrammar.precedences;
+  if (options.precedences) {
+    if (typeof options.precedences !== "function") {
+      throw new Error("Grammar's 'precedences' property must be a function");
+    }
+    precedences = options.precedences.call(ruleBuilder, ruleBuilder, baseGrammar.precedences);
+    if (!Array.isArray(precedences)) {
+      throw new Error("Grammar's precedences must be an array of arrays of rules.");
+    }
+    precedences = precedences.map(list => {
+      if (!Array.isArray(list)) {
+        throw new Error("Grammar's precedences must be an array of arrays of rules.");
+      }
+      return list.map(normalize);
+    });
+  }
+
+  if (Object.keys(rules).length == 0) {
+    throw new Error("Grammar must have at least one rule.");
+  }
+
+  return {name, word, rules, extras, conflicts, precedences, externals, inline, supertypes};
+}
+
+function checkArguments(ruleCount, caller, callerName, suffix = '') {
+  if (ruleCount > 1) {
+    const error = new Error([
+      `The \`${callerName}\` function only takes one rule argument${suffix}.`,
+      'You passed multiple rules. Did you mean to call `seq`?\n'
+    ].join('\n'));
+    Error.captureStackTrace(error, caller);
+    throw error
+  }
+}
+
+function checkPrecedence(value) {
+  if (value == null) {
+    throw new Error('Missing precedence value');
+  }
+}
+
+global.alias = alias;
+global.blank = blank;
+global.choice = choice;
+global.optional = optional;
+global.prec = prec;
+global.repeat = repeat;
+global.repeat1 = repeat1;
+global.seq = seq;
+global.sym = sym;
+global.token = token;
+global.grammar = grammar;
+global.field = field;
+
+const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
+console.log(JSON.stringify(result, null, 2));
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar-schema.json
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar-schema.json
@ -0,0 +1,269 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "tree-sitter grammar specification",
+  "type": "object",
+
+  "required": ["name", "rules"],
+
+  "additionalProperties": false,
+
+  "properties": {
+    "name": {
+      "description": "the name of the grammar",
+      "type": "string",
+      "pattern": "^[a-zA-Z_]\\w*"
+    },
+
+    "rules": {
+      "type": "object",
+      "patternProperties": {
+        "^[a-zA-Z_]\\w*$": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "additionalProperties": false
+    },
+
+    "extras": {
+      "type": "array",
+      "items": {
+        "$ref": "#/definitions/rule"
+      }
+    },
+
+    "externals": {
+      "type": "array",
+      "items": {
+        "$ref": "#/definitions/rule"
+      }
+    },
+
+    "inline": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "^[a-zA-Z_]\\w*$"
+      }
+    },
+
+    "conflicts": {
+      "type": "array",
+      "items": {
+        "type": "array",
+        "items": {
+          "type": "string",
+          "pattern": "^[a-zA-Z_]\\w*$"
+        }
+      }
+    },
+
+    "word": {
+      "type": "string",
+      "pattern": "^[a-zA-Z_]\\w*"
+    },
+
+    "supertypes": {
+      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
+      "type": "array",
+      "items": {
+        "description": "the name of a rule in `rules` or `extras`",
+        "type": "string"
+      }
+    }
+  },
+
+  "definitions": {
+    "blank-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^BLANK$"
+        }
+      },
+      "required": ["type"]
+    },
+
+    "string-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^STRING$"
+        },
+        "value": {
+          "type": "string"
+        }
+      },
+      "required": ["type", "value"]
+    },
+
+    "pattern-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^PATTERN$"
+        },
+        "value": { "type": "string" }
+      },
+      "required": ["type", "value"]
+    },
+
+    "symbol-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^SYMBOL$"
+        },
+        "name": { "type": "string" }
+      },
+      "required": ["type", "name"]
+    },
+
+    "seq-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^SEQ$"
+        },
+        "members": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/rule"
+          }
+        }
+      },
+      "required": ["type", "members"]
+    },
+
+    "choice-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^CHOICE$"
+        },
+        "members": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/rule"
+          }
+        }
+      },
+      "required": ["type", "members"]
+    },
+
+    "alias-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^ALIAS$"
+        },
+        "value": {
+          "type": "string"
+        },
+        "named": {
+          "type": "boolean"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "named", "content", "value"]
+    },
+
+    "repeat-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^REPEAT$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "repeat1-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^REPEAT1$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "token-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^(TOKEN|IMMEDIATE_TOKEN)$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "field-rule": {
+      "properties": {
+        "name": { "type": "string" },
+        "type": {
+          "type": "string",
+          "pattern": "^FIELD$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["name", "type", "content"]
+    },
+
+    "prec-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$"
+        },
+        "value": {
+          "type": "integer"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content", "value"]
+    },
+
+    "rule": {
+      "oneOf": [
+        { "$ref": "#/definitions/alias-rule" },
+        { "$ref": "#/definitions/blank-rule" },
+        { "$ref": "#/definitions/string-rule" },
+        { "$ref": "#/definitions/pattern-rule" },
+        { "$ref": "#/definitions/symbol-rule" },
+        { "$ref": "#/definitions/seq-rule" },
+        { "$ref": "#/definitions/choice-rule" },
+        { "$ref": "#/definitions/repeat1-rule" },
+        { "$ref": "#/definitions/repeat-rule" },
+        { "$ref": "#/definitions/token-rule" },
+        { "$ref": "#/definitions/field-rule" },
+        { "$ref": "#/definitions/prec-rule" }
+      ]
+    }
+  }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammars.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammars.rs
@ -0,0 +1,262 @@
+use super::nfa::Nfa;
+use super::rules::{Alias, Associativity, Precedence, Rule, Symbol};
+use std::collections::HashMap;
+use std::fmt;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) enum VariableType {
+    Hidden,
+    Auxiliary,
+    Anonymous,
+    Named,
+}
+
+// Input grammar
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Variable {
+    pub name: String,
+    pub kind: VariableType,
+    pub rule: Rule,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub(crate) enum PrecedenceEntry {
+    Name(String),
+    Symbol(String),
+}
+
+#[derive(Debug, Default, PartialEq, Eq)]
+pub(crate) struct InputGrammar {
+    pub name: String,
+    pub variables: Vec<Variable>,
+    pub extra_symbols: Vec<Rule>,
+    pub expected_conflicts: Vec<Vec<String>>,
+    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
+    pub external_tokens: Vec<Rule>,
+    pub variables_to_inline: Vec<String>,
+    pub supertype_symbols: Vec<String>,
+    pub word_token: Option<String>,
+}
+
+// Extracted lexical grammar
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexicalVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub implicit_precedence: i32,
+    pub start_state: u32,
+}
+
+#[derive(Debug, Default, PartialEq, Eq)]
+pub(crate) struct LexicalGrammar {
+    pub nfa: Nfa,
+    pub variables: Vec<LexicalVariable>,
+}
+
+// Extracted syntax grammar
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct ProductionStep {
+    pub symbol: Symbol,
+    pub precedence: Precedence,
+    pub associativity: Option<Associativity>,
+    pub alias: Option<Alias>,
+    pub field_name: Option<String>,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub(crate) struct Production {
+    pub steps: Vec<ProductionStep>,
+    pub dynamic_precedence: i32,
+}
+
+#[derive(Default)]
+pub(crate) struct InlinedProductionMap {
+    pub productions: Vec<Production>,
+    pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct SyntaxVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub productions: Vec<Production>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ExternalToken {
+    pub name: String,
+    pub kind: VariableType,
+    pub corresponding_internal_token: Option<Symbol>,
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct SyntaxGrammar {
+    pub variables: Vec<SyntaxVariable>,
+    pub extra_symbols: Vec<Symbol>,
+    pub expected_conflicts: Vec<Vec<Symbol>>,
+    pub external_tokens: Vec<ExternalToken>,
+    pub supertype_symbols: Vec<Symbol>,
+    pub variables_to_inline: Vec<Symbol>,
+    pub word_token: Option<Symbol>,
+    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
+}
+
+#[cfg(test)]
+impl ProductionStep {
+    pub(crate) fn new(symbol: Symbol) -> Self {
+        Self {
+            symbol,
+            precedence: Precedence::None,
+            associativity: None,
+            alias: None,
+            field_name: None,
+        }
+    }
+
+    pub(crate) fn with_prec(
+        self,
+        precedence: Precedence,
+        associativity: Option<Associativity>,
+    ) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence,
+            associativity,
+            alias: self.alias,
+            field_name: self.field_name,
+        }
+    }
+
+    pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence: self.precedence,
+            associativity: self.associativity,
+            alias: Some(Alias {
+                value: value.to_string(),
+                is_named,
+            }),
+            field_name: self.field_name,
+        }
+    }
+    pub(crate) fn with_field_name(self, name: &str) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence: self.precedence,
+            associativity: self.associativity,
+            alias: self.alias,
+            field_name: Some(name.to_string()),
+        }
+    }
+}
+
+impl Production {
+    pub fn first_symbol(&self) -> Option<Symbol> {
+        self.steps.first().map(|s| s.symbol.clone())
+    }
+}
+
+#[cfg(test)]
+impl Variable {
+    pub fn named(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Named,
+            rule,
+        }
+    }
+
+    pub fn auxiliary(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Auxiliary,
+            rule,
+        }
+    }
+
+    pub fn hidden(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Hidden,
+            rule,
+        }
+    }
+
+    pub fn anonymous(name: &str, rule: Rule) -> Self {
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Anonymous,
+            rule,
+        }
+    }
+}
+
+impl VariableType {
+    pub fn is_visible(&self) -> bool {
+        *self == VariableType::Named || *self == VariableType::Anonymous
+    }
+}
+
+impl LexicalGrammar {
+    pub fn variable_indices_for_nfa_states<'a>(
+        &'a self,
+        state_ids: &'a Vec<u32>,
+    ) -> impl Iterator<Item = usize> + 'a {
+        let mut prev = None;
+        state_ids.iter().filter_map(move |state_id| {
+            let variable_id = self.variable_index_for_nfa_state(*state_id);
+            if prev != Some(variable_id) {
+                prev = Some(variable_id);
+                prev
+            } else {
+                None
+            }
+        })
+    }
+
+    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
+        self.variables
+            .iter()
+            .position(|v| v.start_state >= state_id)
+            .unwrap()
+    }
+}
+
+impl SyntaxVariable {
+    pub fn is_auxiliary(&self) -> bool {
+        self.kind == VariableType::Auxiliary
+    }
+
+    pub fn is_hidden(&self) -> bool {
+        self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary
+    }
+}
+
+impl InlinedProductionMap {
+    pub fn inlined_productions<'a>(
+        &'a self,
+        production: &Production,
+        step_index: u32,
+    ) -> Option<impl Iterator<Item = &'a Production> + 'a> {
+        self.production_map
+            .get(&(production as *const Production, step_index))
+            .map(|production_indices| {
+                production_indices
+                    .iter()
+                    .cloned()
+                    .map(move |index| &self.productions[index])
+            })
+    }
+}
+
+impl fmt::Display for PrecedenceEntry {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            PrecedenceEntry::Name(n) => write!(f, "'{}'", n),
+            PrecedenceEntry::Symbol(s) => write!(f, "$.{}", s),
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/mod.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/mod.rs
@ -0,0 +1,214 @@
+mod binding_files;
+mod build_tables;
+mod char_tree;
+mod dedup;
+mod grammars;
+mod nfa;
+mod node_types;
+pub mod parse_grammar;
+mod prepare_grammar;
+mod render;
+mod rules;
+mod tables;
+
+use self::build_tables::build_tables;
+use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use self::parse_grammar::parse_grammar;
+use self::prepare_grammar::prepare_grammar;
+use self::render::render_c_code;
+use self::rules::AliasMap;
+use anyhow::{anyhow, Context, Result};
+use lazy_static::lazy_static;
+use regex::{Regex, RegexBuilder};
+use semver::Version;
+use std::fs;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+
+lazy_static! {
+    static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
+        .multi_line(true)
+        .build()
+        .unwrap();
+}
+
+struct GeneratedParser {
+    c_code: String,
+    node_types_json: String,
+}
+
+pub fn generate_parser_in_directory(
+    repo_path: &PathBuf,
+    grammar_path: Option<&str>,
+    abi_version: usize,
+    generate_bindings: bool,
+    report_symbol_name: Option<&str>,
+) -> Result<()> {
+    let src_path = repo_path.join("src");
+    let header_path = src_path.join("tree_sitter");
+
+    // Ensure that the output directories exist.
+    fs::create_dir_all(&src_path)?;
+    fs::create_dir_all(&header_path)?;
+
+    // Read the grammar.json.
+    let grammar_json;
+    match grammar_path {
+        Some(path) => {
+            grammar_json = load_grammar_file(path.as_ref())?;
+        }
+        None => {
+            let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
+            grammar_json = load_grammar_file(&grammar_js_path)?;
+            fs::write(&src_path.join("grammar.json"), &grammar_json)?;
+        }
+    }
+
+    // Parse and preprocess the grammar.
+    let input_grammar = parse_grammar(&grammar_json)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let language_name = input_grammar.name;
+
+    // Generate the parser and related files.
+    let GeneratedParser {
+        c_code,
+        node_types_json,
+    } = generate_parser_for_grammar_with_opts(
+        &language_name,
+        syntax_grammar,
+        lexical_grammar,
+        inlines,
+        simple_aliases,
+        abi_version,
+        report_symbol_name,
+    )?;
+
+    write_file(&src_path.join("parser.c"), c_code)?;
+    write_file(&src_path.join("node-types.json"), node_types_json)?;
+    write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
+
+    if generate_bindings {
+        binding_files::generate_binding_files(&repo_path, &language_name)?;
+    }
+
+    Ok(())
+}
+
+pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
+    let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
+    let input_grammar = parse_grammar(&grammar_json)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let parser = generate_parser_for_grammar_with_opts(
+        &input_grammar.name,
+        syntax_grammar,
+        lexical_grammar,
+        inlines,
+        simple_aliases,
+        tree_sitter::LANGUAGE_VERSION,
+        None,
+    )?;
+    Ok((input_grammar.name, parser.c_code))
+}
+
+fn generate_parser_for_grammar_with_opts(
+    name: &String,
+    syntax_grammar: SyntaxGrammar,
+    lexical_grammar: LexicalGrammar,
+    inlines: InlinedProductionMap,
+    simple_aliases: AliasMap,
+    abi_version: usize,
+    report_symbol_name: Option<&str>,
+) -> Result<GeneratedParser> {
+    let variable_info =
+        node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
+    let node_types_json = node_types::generate_node_types_json(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &variable_info,
+    );
+    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &variable_info,
+        &inlines,
+        report_symbol_name,
+    )?;
+    let c_code = render_c_code(
+        name,
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+        abi_version,
+    );
+    Ok(GeneratedParser {
+        c_code,
+        node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
+    })
+}
+
+pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
+    match grammar_path.extension().and_then(|e| e.to_str()) {
+        Some("js") => Ok(load_js_grammar_file(grammar_path)?),
+        Some("json") => Ok(fs::read_to_string(grammar_path)?),
+        _ => Err(anyhow!(
+            "Unknown grammar file extension: {:?}",
+            grammar_path
+        )),
+    }
+}
+
+fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
+    let grammar_path = fs::canonicalize(grammar_path)?;
+    let mut node_process = Command::new("node")
+        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Failed to run `node`");
+
+    let mut node_stdin = node_process
+        .stdin
+        .take()
+        .expect("Failed to open stdin for node");
+    let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
+        .expect("Could not parse this package's version as semver.");
+    write!(
+        node_stdin,
+        "global.TREE_SITTER_CLI_VERSION_MAJOR = {};
+        global.TREE_SITTER_CLI_VERSION_MINOR = {};
+        global.TREE_SITTER_CLI_VERSION_PATCH = {};",
+        cli_version.major, cli_version.minor, cli_version.patch,
+    )
+    .expect("Failed to write tree-sitter version to node's stdin");
+    let javascript_code = include_bytes!("./dsl.js");
+    node_stdin
+        .write(javascript_code)
+        .expect("Failed to write grammar dsl to node's stdin");
+    drop(node_stdin);
+    let output = node_process
+        .wait_with_output()
+        .expect("Failed to read output from node");
+    match output.status.code() {
+        None => panic!("Node process was killed"),
+        Some(0) => {}
+        Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
+    }
+
+    let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
+    result.push('\n');
+    Ok(result)
+}
+
+fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> {
+    fs::write(path, body)
+        .with_context(|| format!("Failed to write {:?}", path.file_name().unwrap()))
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/nfa.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/nfa.rs
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/node_types.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/node_types.rs
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/parse_grammar.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/parse_grammar.rs
@ -0,0 +1,222 @@
+use super::grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType};
+use super::rules::{Precedence, Rule};
+use anyhow::{anyhow, Result};
+use serde::Deserialize;
+use serde_json::{Map, Value};
+
+#[derive(Deserialize)]
+#[serde(tag = "type")]
+#[allow(non_camel_case_types)]
+enum RuleJSON {
+    ALIAS {
+        content: Box<RuleJSON>,
+        named: bool,
+        value: String,
+    },
+    BLANK,
+    STRING {
+        value: String,
+    },
+    PATTERN {
+        value: String,
+    },
+    SYMBOL {
+        name: String,
+    },
+    CHOICE {
+        members: Vec<RuleJSON>,
+    },
+    FIELD {
+        name: String,
+        content: Box<RuleJSON>,
+    },
+    SEQ {
+        members: Vec<RuleJSON>,
+    },
+    REPEAT {
+        content: Box<RuleJSON>,
+    },
+    REPEAT1 {
+        content: Box<RuleJSON>,
+    },
+    PREC_DYNAMIC {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC_LEFT {
+        value: PrecedenceValueJSON,
+        content: Box<RuleJSON>,
+    },
+    PREC_RIGHT {
+        value: PrecedenceValueJSON,
+        content: Box<RuleJSON>,
+    },
+    PREC {
+        value: PrecedenceValueJSON,
+        content: Box<RuleJSON>,
+    },
+    TOKEN {
+        content: Box<RuleJSON>,
+    },
+    IMMEDIATE_TOKEN {
+        content: Box<RuleJSON>,
+    },
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum PrecedenceValueJSON {
+    Integer(i32),
+    Name(String),
+}
+
+#[derive(Deserialize)]
+pub(crate) struct GrammarJSON {
+    pub(crate) name: String,
+    rules: Map<String, Value>,
+    #[serde(default)]
+    precedences: Vec<Vec<RuleJSON>>,
+    #[serde(default)]
+    conflicts: Vec<Vec<String>>,
+    #[serde(default)]
+    externals: Vec<RuleJSON>,
+    #[serde(default)]
+    extras: Vec<RuleJSON>,
+    #[serde(default)]
+    inline: Vec<String>,
+    #[serde(default)]
+    supertypes: Vec<String>,
+    word: Option<String>,
+}
+
+pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
+    let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
+
+    let mut variables = Vec::with_capacity(grammar_json.rules.len());
+    for (name, value) in grammar_json.rules {
+        variables.push(Variable {
+            name: name.to_owned(),
+            kind: VariableType::Named,
+            rule: parse_rule(serde_json::from_value(value)?),
+        })
+    }
+
+    let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
+    for list in grammar_json.precedences {
+        let mut ordering = Vec::with_capacity(list.len());
+        for entry in list {
+            ordering.push(match entry {
+                RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
+                RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
+                _ => {
+                    return Err(anyhow!(
+                        "Invalid rule in precedences array. Only strings and symbols are allowed"
+                    ))
+                }
+            })
+        }
+        precedence_orderings.push(ordering);
+    }
+
+    let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect();
+    let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect();
+
+    Ok(InputGrammar {
+        name: grammar_json.name,
+        word_token: grammar_json.word,
+        expected_conflicts: grammar_json.conflicts,
+        supertype_symbols: grammar_json.supertypes,
+        variables_to_inline: grammar_json.inline,
+        precedence_orderings,
+        variables,
+        extra_symbols,
+        external_tokens,
+    })
+}
+
+fn parse_rule(json: RuleJSON) -> Rule {
+    match json {
+        RuleJSON::ALIAS {
+            content,
+            value,
+            named,
+        } => Rule::alias(parse_rule(*content), value, named),
+        RuleJSON::BLANK => Rule::Blank,
+        RuleJSON::STRING { value } => Rule::String(value),
+        RuleJSON::PATTERN { value } => Rule::Pattern(value),
+        RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
+        RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
+        RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
+        RuleJSON::REPEAT { content } => {
+            Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
+        }
+        RuleJSON::PREC { value, content } => Rule::prec(value.into(), parse_rule(*content)),
+        RuleJSON::PREC_LEFT { value, content } => {
+            Rule::prec_left(value.into(), parse_rule(*content))
+        }
+        RuleJSON::PREC_RIGHT { value, content } => {
+            Rule::prec_right(value.into(), parse_rule(*content))
+        }
+        RuleJSON::PREC_DYNAMIC { value, content } => {
+            Rule::prec_dynamic(value, parse_rule(*content))
+        }
+        RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
+        RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
+    }
+}
+
+impl Into<Precedence> for PrecedenceValueJSON {
+    fn into(self) -> Precedence {
+        match self {
+            PrecedenceValueJSON::Integer(i) => Precedence::Integer(i),
+            PrecedenceValueJSON::Name(i) => Precedence::Name(i),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_grammar() {
+        let grammar = parse_grammar(
+            r#"{
+            "name": "my_lang",
+            "rules": {
+                "file": {
+                    "type": "REPEAT1",
+                    "content": {
+                        "type": "SYMBOL",
+                        "name": "statement"
+                    }
+                },
+                "statement": {
+                    "type": "STRING",
+                    "value": "foo"
+                }
+            }
+        }"#,
+        )
+        .unwrap();
+
+        assert_eq!(grammar.name, "my_lang");
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable {
+                    name: "file".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
+                },
+                Variable {
+                    name: "statement".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::String("foo".to_string())
+                },
+            ]
+        );
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_repeats.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_repeats.rs
@ -0,0 +1,289 @@
+use super::ExtractedSyntaxGrammar;
+use crate::generate::grammars::{Variable, VariableType};
+use crate::generate::rules::{Rule, Symbol};
+use std::collections::HashMap;
+use std::mem;
+
+struct Expander {
+    variable_name: String,
+    repeat_count_in_variable: usize,
+    preceding_symbol_count: usize,
+    auxiliary_variables: Vec<Variable>,
+    existing_repeats: HashMap<Rule, Symbol>,
+}
+
+impl Expander {
+    fn expand_variable(&mut self, index: usize, variable: &mut Variable) -> bool {
+        self.variable_name.clear();
+        self.variable_name.push_str(&variable.name);
+        self.repeat_count_in_variable = 0;
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+
+        // In the special case of a hidden variable with a repetition at its top level,
+        // convert that rule itself into a binary tree structure instead of introducing
+        // another auxiliary rule.
+        if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) {
+            let inner_rule = self.expand_rule(&repeated_content);
+            variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule);
+            variable.kind = VariableType::Auxiliary;
+            return true;
+        }
+
+        variable.rule = self.expand_rule(&rule);
+        false
+    }
+
+    fn expand_rule(&mut self, rule: &Rule) -> Rule {
+        match rule {
+            // For choices, sequences, and metadata, descend into the child rules,
+            // replacing any nested repetitions.
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|element| self.expand_rule(element))
+                    .collect(),
+            ),
+
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|element| self.expand_rule(element))
+                    .collect(),
+            ),
+
+            Rule::Metadata { rule, params } => Rule::Metadata {
+                rule: Box::new(self.expand_rule(rule)),
+                params: params.clone(),
+            },
+
+            // For repetitions, introduce an auxiliary rule that contains the the
+            // repeated content, but can also contain a recursive binary tree structure.
+            Rule::Repeat(content) => {
+                let inner_rule = self.expand_rule(content);
+
+                if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
+                    return Rule::Symbol(*existing_symbol);
+                }
+
+                self.repeat_count_in_variable += 1;
+                let rule_name = format!(
+                    "{}_repeat{}",
+                    self.variable_name, self.repeat_count_in_variable
+                );
+                let repeat_symbol = Symbol::non_terminal(
+                    self.preceding_symbol_count + self.auxiliary_variables.len(),
+                );
+                self.existing_repeats
+                    .insert(inner_rule.clone(), repeat_symbol);
+                self.auxiliary_variables.push(Variable {
+                    name: rule_name,
+                    kind: VariableType::Auxiliary,
+                    rule: self.wrap_rule_in_binary_tree(repeat_symbol, inner_rule),
+                });
+
+                Rule::Symbol(repeat_symbol)
+            }
+
+            // For primitive rules, don't change anything.
+            _ => rule.clone(),
+        }
+    }
+
+    fn wrap_rule_in_binary_tree(&self, symbol: Symbol, rule: Rule) -> Rule {
+        Rule::choice(vec![
+            Rule::Seq(vec![Rule::Symbol(symbol), Rule::Symbol(symbol)]),
+            rule,
+        ])
+    }
+}
+
+pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
+    let mut expander = Expander {
+        variable_name: String::new(),
+        repeat_count_in_variable: 0,
+        preceding_symbol_count: grammar.variables.len(),
+        auxiliary_variables: Vec::new(),
+        existing_repeats: HashMap::new(),
+    };
+
+    for (i, mut variable) in grammar.variables.iter_mut().enumerate() {
+        let expanded_top_level_repetition = expander.expand_variable(i, &mut variable);
+
+        // If a hidden variable had a top-level repetition and it was converted to
+        // a recursive rule, then it can't be inlined.
+        if expanded_top_level_repetition {
+            grammar
+                .variables_to_inline
+                .retain(|symbol| *symbol != Symbol::non_terminal(i));
+        }
+    }
+
+    grammar
+        .variables
+        .extend(expander.auxiliary_variables.into_iter());
+    grammar
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_repeat_expansion() {
+        // Repeats nested inside of sequences and choices are expanded.
+        let grammar = expand_repeats(build_grammar(vec![Variable::named(
+            "rule0",
+            Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::choice(vec![
+                    Rule::repeat(Rule::terminal(11)),
+                    Rule::repeat(Rule::terminal(12)),
+                ]),
+                Rule::terminal(13),
+            ]),
+        )]));
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::seq(vec![
+                        Rule::terminal(10),
+                        Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
+                        Rule::terminal(13),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
+                        Rule::terminal(11),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat2",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::terminal(12),
+                    ])
+                ),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_repeat_deduplication() {
+        // Terminal 4 appears inside of a repeat in three different places.
+        let grammar = expand_repeats(build_grammar(vec![
+            Variable::named(
+                "rule0",
+                Rule::choice(vec![
+                    Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
+                    Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
+                ]),
+            ),
+            Variable::named(
+                "rule1",
+                Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
+            ),
+        ]));
+
+        // Only one auxiliary rule is created for repeating terminal 4.
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
+                        Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
+                    ])
+                ),
+                Variable::named(
+                    "rule1",
+                    Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::terminal(4),
+                    ])
+                )
+            ]
+        );
+    }
+
+    #[test]
+    fn test_expansion_of_nested_repeats() {
+        let grammar = expand_repeats(build_grammar(vec![Variable::named(
+            "rule0",
+            Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::repeat(Rule::seq(vec![
+                    Rule::terminal(11),
+                    Rule::repeat(Rule::terminal(12)),
+                ])),
+            ]),
+        )]));
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
+                        Rule::terminal(12),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat2",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
+                    ])
+                ),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_expansion_of_repeats_at_top_of_hidden_rules() {
+        let grammar = expand_repeats(build_grammar(vec![
+            Variable::named("rule0", Rule::non_terminal(1)),
+            Variable::hidden(
+                "_rule1",
+                Rule::repeat(Rule::choice(vec![Rule::terminal(11), Rule::terminal(12)])),
+            ),
+        ]));
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named("rule0", Rule::non_terminal(1),),
+                Variable::auxiliary(
+                    "_rule1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1)]),
+                        Rule::terminal(11),
+                        Rule::terminal(12),
+                    ]),
+                ),
+            ]
+        );
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
+        ExtractedSyntaxGrammar {
+            variables,
+            ..Default::default()
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_tokens.rs
@ -0,0 +1,903 @@
+use super::ExtractedLexicalGrammar;
+use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
+use crate::generate::rules::{Precedence, Rule};
+use anyhow::{anyhow, Context, Result};
+use lazy_static::lazy_static;
+use regex::Regex;
+use regex_syntax::ast::{
+    parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
+    ClassUnicodeKind, RepetitionKind, RepetitionRange,
+};
+use std::collections::HashMap;
+use std::i32;
+
+lazy_static! {
+    static ref CURLY_BRACE_REGEX: Regex =
+        Regex::new(r#"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap();
+    static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
+        serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
+    static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
+        serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
+    static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
+        serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
+    static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
+        serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
+}
+
+const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json");
+const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json");
+const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json");
+const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json");
+const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
+
+struct NfaBuilder {
+    nfa: Nfa,
+    is_sep: bool,
+    precedence_stack: Vec<i32>,
+}
+
+fn get_implicit_precedence(rule: &Rule) -> i32 {
+    match rule {
+        Rule::String(_) => 2,
+        Rule::Metadata { rule, params } => {
+            if params.is_main_token {
+                get_implicit_precedence(rule) + 1
+            } else {
+                get_implicit_precedence(rule)
+            }
+        }
+        _ => 0,
+    }
+}
+
+fn get_completion_precedence(rule: &Rule) -> i32 {
+    if let Rule::Metadata { params, .. } = rule {
+        if let Precedence::Integer(p) = params.precedence {
+            return p;
+        }
+    }
+    0
+}
+
+fn preprocess_regex(content: &str) -> String {
+    let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}");
+    let mut result = String::with_capacity(content.len());
+    let mut is_escaped = false;
+    for c in content.chars() {
+        if is_escaped {
+            if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
+                result.push(c);
+            } else {
+                result.push('\\');
+                result.push(c);
+            }
+            is_escaped = false;
+        } else if c == '\\' {
+            is_escaped = true;
+        } else {
+            result.push(c);
+        }
+    }
+    if is_escaped {
+        result.push('\\');
+    }
+    result
+}
+
+pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut builder = NfaBuilder {
+        nfa: Nfa::new(),
+        is_sep: true,
+        precedence_stack: vec![0],
+    };
+
+    let separator_rule = if grammar.separators.len() > 0 {
+        grammar.separators.push(Rule::Blank);
+        Rule::repeat(Rule::choice(grammar.separators))
+    } else {
+        Rule::Blank
+    };
+
+    let mut variables = Vec::new();
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        let is_immediate_token = match &variable.rule {
+            Rule::Metadata { params, .. } => params.is_main_token,
+            _ => false,
+        };
+
+        builder.is_sep = false;
+        builder.nfa.states.push(NfaState::Accept {
+            variable_index: i,
+            precedence: get_completion_precedence(&variable.rule),
+        });
+        let last_state_id = builder.nfa.last_state_id();
+        builder
+            .expand_rule(&variable.rule, last_state_id)
+            .with_context(|| format!("Error processing rule {}", variable.name))?;
+
+        if !is_immediate_token {
+            builder.is_sep = true;
+            let last_state_id = builder.nfa.last_state_id();
+            builder.expand_rule(&separator_rule, last_state_id)?;
+        }
+
+        variables.push(LexicalVariable {
+            name: variable.name,
+            kind: variable.kind,
+            implicit_precedence: get_implicit_precedence(&variable.rule),
+            start_state: builder.nfa.last_state_id(),
+        });
+    }
+
+    Ok(LexicalGrammar {
+        nfa: builder.nfa,
+        variables,
+    })
+}
+
+impl NfaBuilder {
+    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
+        match rule {
+            Rule::Pattern(s) => {
+                let s = preprocess_regex(s);
+                let ast = parse::Parser::new().parse(&s)?;
+                self.expand_regex(&ast, next_state_id)
+            }
+            Rule::String(s) => {
+                for c in s.chars().rev() {
+                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
+                    next_state_id = self.nfa.last_state_id();
+                }
+                Ok(s.len() > 0)
+            }
+            Rule::Choice(elements) => {
+                let mut alternative_state_ids = Vec::new();
+                for element in elements {
+                    if self.expand_rule(element, next_state_id)? {
+                        alternative_state_ids.push(self.nfa.last_state_id());
+                    } else {
+                        alternative_state_ids.push(next_state_id);
+                    }
+                }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
+                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+                for alternative_state_id in alternative_state_ids {
+                    self.push_split(alternative_state_id);
+                }
+                Ok(true)
+            }
+            Rule::Seq(elements) => {
+                let mut result = false;
+                for element in elements.into_iter().rev() {
+                    if self.expand_rule(element, next_state_id)? {
+                        result = true;
+                    }
+                    next_state_id = self.nfa.last_state_id();
+                }
+                Ok(result)
+            }
+            Rule::Repeat(rule) => {
+                self.nfa.states.push(NfaState::Accept {
+                    variable_index: 0,
+                    precedence: 0,
+                }); // Placeholder for split
+                let split_state_id = self.nfa.last_state_id();
+                if self.expand_rule(rule, split_state_id)? {
+                    self.nfa.states[split_state_id as usize] =
+                        NfaState::Split(self.nfa.last_state_id(), next_state_id);
+                    Ok(true)
+                } else {
+                    Ok(false)
+                }
+            }
+            Rule::Metadata { rule, params } => {
+                let has_precedence = if let Precedence::Integer(precedence) = &params.precedence {
+                    self.precedence_stack.push(*precedence);
+                    true
+                } else {
+                    false
+                };
+                let result = self.expand_rule(rule, next_state_id);
+                if has_precedence {
+                    self.precedence_stack.pop();
+                }
+                result
+            }
+            Rule::Blank => Ok(false),
+            _ => Err(anyhow!("Grammar error: Unexpected rule {:?}", rule)),
+        }
+    }
+
+    fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
+        match ast {
+            Ast::Empty(_) => Ok(false),
+            Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
+            Ast::Literal(literal) => {
+                self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
+                Ok(true)
+            }
+            Ast::Dot(_) => {
+                self.push_advance(CharacterSet::from_char('\n').negate(), next_state_id);
+                Ok(true)
+            }
+            Ast::Assertion(_) => Err(anyhow!("Regex error: Assertions are not supported")),
+            Ast::Class(class) => match class {
+                Class::Unicode(class) => {
+                    let mut chars = self.expand_unicode_character_class(&class.kind)?;
+                    if class.negated {
+                        chars = chars.negate();
+                    }
+                    self.push_advance(chars, next_state_id);
+                    Ok(true)
+                }
+                Class::Perl(class) => {
+                    let mut chars = self.expand_perl_character_class(&class.kind);
+                    if class.negated {
+                        chars = chars.negate();
+                    }
+                    self.push_advance(chars, next_state_id);
+                    Ok(true)
+                }
+                Class::Bracketed(class) => {
+                    let mut chars = self.translate_class_set(&class.kind)?;
+                    if class.negated {
+                        chars = chars.negate();
+                    }
+                    self.push_advance(chars, next_state_id);
+                    Ok(true)
+                }
+            },
+            Ast::Repetition(repetition) => match repetition.op.kind {
+                RepetitionKind::ZeroOrOne => {
+                    self.expand_zero_or_one(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::OneOrMore => {
+                    self.expand_one_or_more(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::ZeroOrMore => {
+                    self.expand_zero_or_more(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
+                    self.expand_count(&repetition.ast, count, next_state_id)
+                }
+                RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
+                    if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
+                        self.expand_count(&repetition.ast, min, next_state_id)
+                    } else {
+                        Ok(false)
+                    }
+                }
+                RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
+                    let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
+                    for _ in min..max {
+                        if result {
+                            next_state_id = self.nfa.last_state_id();
+                        }
+                        if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
+                            result = true;
+                        }
+                    }
+                    Ok(result)
+                }
+            },
+            Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
+            Ast::Alternation(alternation) => {
+                let mut alternative_state_ids = Vec::new();
+                for ast in alternation.asts.iter() {
+                    if self.expand_regex(&ast, next_state_id)? {
+                        alternative_state_ids.push(self.nfa.last_state_id());
+                    } else {
+                        alternative_state_ids.push(next_state_id);
+                    }
+                }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
+                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+
+                for alternative_state_id in alternative_state_ids {
+                    self.push_split(alternative_state_id);
+                }
+                Ok(true)
+            }
+            Ast::Concat(concat) => {
+                let mut result = false;
+                for ast in concat.asts.iter().rev() {
+                    if self.expand_regex(&ast, next_state_id)? {
+                        result = true;
+                        next_state_id = self.nfa.last_state_id();
+                    }
+                }
+                Ok(result)
+            }
+        }
+    }
+
+    fn translate_class_set(&self, class_set: &ClassSet) -> Result<CharacterSet> {
+        match &class_set {
+            ClassSet::Item(item) => self.expand_character_class(&item),
+            ClassSet::BinaryOp(binary_op) => {
+                let mut lhs_char_class = self.translate_class_set(&binary_op.lhs)?;
+                let mut rhs_char_class = self.translate_class_set(&binary_op.rhs)?;
+                match binary_op.kind {
+                    ClassSetBinaryOpKind::Intersection => {
+                        Ok(lhs_char_class.remove_intersection(&mut rhs_char_class))
+                    }
+                    ClassSetBinaryOpKind::Difference => {
+                        Ok(lhs_char_class.difference(rhs_char_class))
+                    }
+                    ClassSetBinaryOpKind::SymmetricDifference => {
+                        Ok(lhs_char_class.symmetric_difference(rhs_char_class))
+                    }
+                }
+            }
+        }
+    }
+
+    fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        self.nfa.states.push(NfaState::Accept {
+            variable_index: 0,
+            precedence: 0,
+        }); // Placeholder for split
+        let split_state_id = self.nfa.last_state_id();
+        if self.expand_regex(&ast, split_state_id)? {
+            self.nfa.states[split_state_id as usize] =
+                NfaState::Split(self.nfa.last_state_id(), next_state_id);
+            Ok(true)
+        } else {
+            self.nfa.states.pop();
+            Ok(false)
+        }
+    }
+
+    fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        if self.expand_regex(ast, next_state_id)? {
+            self.push_split(next_state_id);
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        if self.expand_one_or_more(&ast, next_state_id)? {
+            self.push_split(next_state_id);
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
+        let mut result = false;
+        for _ in 0..count {
+            if self.expand_regex(ast, next_state_id)? {
+                result = true;
+                next_state_id = self.nfa.last_state_id();
+            }
+        }
+        Ok(result)
+    }
+
+    fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
+        match item {
+            ClassSetItem::Empty(_) => Ok(CharacterSet::empty()),
+            ClassSetItem::Literal(literal) => Ok(CharacterSet::from_char(literal.c)),
+            ClassSetItem::Range(range) => Ok(CharacterSet::from_range(range.start.c, range.end.c)),
+            ClassSetItem::Union(union) => {
+                let mut result = CharacterSet::empty();
+                for item in &union.items {
+                    result = result.add(&self.expand_character_class(&item)?);
+                }
+                Ok(result)
+            }
+            ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
+            ClassSetItem::Unicode(class) => {
+                let mut set = self.expand_unicode_character_class(&class.kind)?;
+                if class.negated {
+                    set = set.negate();
+                }
+                Ok(set)
+            }
+            ClassSetItem::Bracketed(class) => {
+                let mut set = self.translate_class_set(&class.kind)?;
+                if class.negated {
+                    set = set.negate();
+                }
+                Ok(set)
+            }
+            _ => Err(anyhow!(
+                "Regex error: Unsupported character class syntax {:?}",
+                item
+            )),
+        }
+    }
+
+    fn expand_unicode_character_class(&self, class: &ClassUnicodeKind) -> Result<CharacterSet> {
+        let mut chars = CharacterSet::empty();
+
+        let category_letter;
+        match class {
+            ClassUnicodeKind::OneLetter(le) => {
+                category_letter = le.to_string();
+            }
+            ClassUnicodeKind::Named(class_name) => {
+                let actual_class_name = UNICODE_CATEGORY_ALIASES
+                    .get(class_name.as_str())
+                    .or_else(|| UNICODE_PROPERTY_ALIASES.get(class_name.as_str()))
+                    .unwrap_or(class_name);
+                if actual_class_name.len() == 1 {
+                    category_letter = actual_class_name.clone();
+                } else {
+                    let code_points = UNICODE_CATEGORIES
+                        .get(actual_class_name.as_str())
+                        .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
+                        .ok_or_else(|| {
+                            anyhow!(
+                                "Regex error: Unsupported unicode character class {}",
+                                class_name
+                            )
+                        })?;
+                    for c in code_points {
+                        if let Some(c) = std::char::from_u32(*c) {
+                            chars = chars.add_char(c);
+                        }
+                    }
+
+                    return Ok(chars);
+                }
+            }
+            ClassUnicodeKind::NamedValue { .. } => {
+                return Err(anyhow!(
+                    "Regex error: Key-value unicode properties are not supported"
+                ))
+            }
+        }
+
+        for (category, code_points) in UNICODE_CATEGORIES.iter() {
+            if category.starts_with(&category_letter) {
+                for c in code_points {
+                    if let Some(c) = std::char::from_u32(*c) {
+                        chars = chars.add_char(c);
+                    }
+                }
+            }
+        }
+
+        Ok(chars)
+    }
+
+    fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
+        match item {
+            ClassPerlKind::Digit => CharacterSet::from_range('0', '9'),
+            ClassPerlKind::Space => CharacterSet::empty()
+                .add_char(' ')
+                .add_char('\t')
+                .add_char('\r')
+                .add_char('\n'),
+            ClassPerlKind::Word => CharacterSet::empty()
+                .add_char('_')
+                .add_range('A', 'Z')
+                .add_range('a', 'z')
+                .add_range('0', '9'),
+        }
+    }
+
+    fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
+        let precedence = *self.precedence_stack.last().unwrap();
+        self.nfa.states.push(NfaState::Advance {
+            chars,
+            state_id,
+            precedence,
+            is_sep: self.is_sep,
+        });
+    }
+
+    fn push_split(&mut self, state_id: u32) {
+        let last_state_id = self.nfa.last_state_id();
+        self.nfa
+            .states
+            .push(NfaState::Split(state_id, last_state_id));
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::Variable;
+    use crate::generate::nfa::{NfaCursor, NfaTransition};
+
+    fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
+        let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
+        let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
+
+        let mut result = None;
+        let mut result_precedence = i32::MIN;
+        let mut start_char = 0;
+        let mut end_char = 0;
+        for c in s.chars() {
+            for (id, precedence) in cursor.completions() {
+                if result.is_none() || result_precedence <= precedence {
+                    result = Some((id, &s[start_char..end_char]));
+                    result_precedence = precedence;
+                }
+            }
+            if let Some(NfaTransition {
+                states,
+                is_separator,
+                ..
+            }) = cursor
+                .transitions()
+                .into_iter()
+                .find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
+            {
+                cursor.reset(states);
+                end_char += c.len_utf8();
+                if is_separator {
+                    start_char = end_char;
+                }
+            } else {
+                break;
+            }
+        }
+
+        for (id, precedence) in cursor.completions() {
+            if result.is_none() || result_precedence <= precedence {
+                result = Some((id, &s[start_char..end_char]));
+                result_precedence = precedence;
+            }
+        }
+
+        result
+    }
+
+    #[test]
+    fn test_rule_expansion() {
+        struct Row {
+            rules: Vec<Rule>,
+            separators: Vec<Rule>,
+            examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
+        }
+
+        let table = [
+            // regex with sequences and alternatives
+            Row {
+                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
+                separators: vec![],
+                examples: vec![
+                    ("ade1", Some((0, "ade"))),
+                    ("bdf1", Some((0, "bdf"))),
+                    ("bdfh1", Some((0, "bdfh"))),
+                    ("ad1", None),
+                ],
+            },
+            // regex with repeats
+            Row {
+                rules: vec![Rule::pattern("a*")],
+                separators: vec![],
+                examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
+            },
+            // regex with repeats in sequences
+            Row {
+                rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
+                separators: vec![],
+                examples: vec![
+                    ("af1", Some((0, "af"))),
+                    ("adedef1", Some((0, "adedef"))),
+                    ("abcbcbcf1", Some((0, "abcbcbcf"))),
+                    ("a", None),
+                ],
+            },
+            // regex with character ranges
+            Row {
+                rules: vec![Rule::pattern("[a-fA-F0-9]+")],
+                separators: vec![],
+                examples: vec![("A1ff0.", Some((0, "A1ff0")))],
+            },
+            // regex with perl character classes
+            Row {
+                rules: vec![Rule::pattern("\\w\\d\\s")],
+                separators: vec![],
+                examples: vec![("_0  ", Some((0, "_0 ")))],
+            },
+            // string
+            Row {
+                rules: vec![Rule::string("abc")],
+                separators: vec![],
+                examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
+            },
+            // complex rule containing strings and regexes
+            Row {
+                rules: vec![Rule::repeat(Rule::seq(vec![
+                    Rule::string("{"),
+                    Rule::pattern("[a-f]+"),
+                    Rule::string("}"),
+                ]))],
+                separators: vec![],
+                examples: vec![
+                    ("{a}{", Some((0, "{a}"))),
+                    ("{a}{d", Some((0, "{a}"))),
+                    ("ab", None),
+                ],
+            },
+            // longest match rule
+            Row {
+                rules: vec![
+                    Rule::pattern("a|bc"),
+                    Rule::pattern("aa"),
+                    Rule::pattern("bcd"),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("a.", Some((0, "a"))),
+                    ("bc.", Some((0, "bc"))),
+                    ("aa.", Some((1, "aa"))),
+                    ("bcd?", Some((2, "bcd"))),
+                    ("b.", None),
+                    ("c.", None),
+                ],
+            },
+            // regex with an alternative including the empty string
+            Row {
+                rules: vec![Rule::pattern("a(b|)+c")],
+                separators: vec![],
+                examples: vec![
+                    ("ac.", Some((0, "ac"))),
+                    ("abc.", Some((0, "abc"))),
+                    ("abbc.", Some((0, "abbc"))),
+                ],
+            },
+            // separators
+            Row {
+                rules: vec![Rule::pattern("[a-f]+")],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                examples: vec![
+                    ("  a", Some((0, "a"))),
+                    ("  \nb", Some((0, "b"))),
+                    ("  \\a", None),
+                    ("  \\\na", Some((0, "a"))),
+                ],
+            },
+            // shorter tokens with higher precedence
+            Row {
+                rules: vec![
+                    Rule::prec(Precedence::Integer(2), Rule::pattern("abc")),
+                    Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")),
+                    Rule::pattern("[a-e]+"),
+                ],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                examples: vec![
+                    ("abceef", Some((0, "abc"))),
+                    ("abdeef", Some((1, "abde"))),
+                    ("aeeeef", Some((2, "aeeee"))),
+                ],
+            },
+            // immediate tokens with higher precedence
+            Row {
+                rules: vec![
+                    Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")),
+                    Rule::immediate_token(Rule::prec(
+                        Precedence::Integer(2),
+                        Rule::pattern("[^ab]+"),
+                    )),
+                ],
+                separators: vec![Rule::pattern("\\s")],
+                examples: vec![("cccb", Some((1, "ccc")))],
+            },
+            Row {
+                rules: vec![Rule::seq(vec![
+                    Rule::string("a"),
+                    Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
+                    Rule::string("d"),
+                ])],
+                separators: vec![],
+                examples: vec![
+                    ("abd", Some((0, "abd"))),
+                    ("acd", Some((0, "acd"))),
+                    ("abc", None),
+                    ("ad", None),
+                    ("d", None),
+                    ("a", None),
+                ],
+            },
+            // nested choices within sequences
+            Row {
+                rules: vec![Rule::seq(vec![
+                    Rule::pattern("[0-9]+"),
+                    Rule::choice(vec![
+                        Rule::Blank,
+                        Rule::choice(vec![Rule::seq(vec![
+                            Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
+                            Rule::choice(vec![
+                                Rule::Blank,
+                                Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
+                            ]),
+                            Rule::pattern("[0-9]+"),
+                        ])]),
+                    ]),
+                ])],
+                separators: vec![],
+                examples: vec![
+                    ("12", Some((0, "12"))),
+                    ("12e", Some((0, "12"))),
+                    ("12g", Some((0, "12"))),
+                    ("12e3", Some((0, "12e3"))),
+                    ("12e+", Some((0, "12"))),
+                    ("12E+34 +", Some((0, "12E+34"))),
+                    ("12e34", Some((0, "12e34"))),
+                ],
+            },
+            // nested groups
+            Row {
+                rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
+                separators: vec![],
+                examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
+            },
+            // allowing unrecognized escape sequences
+            Row {
+                rules: vec![
+                    // Escaped forward slash (used in JS because '/' is the regex delimiter)
+                    Rule::pattern(r#"\/"#),
+                    // Escaped quotes
+                    Rule::pattern(r#"\"\'"#),
+                    // Quote preceded by a literal backslash
+                    Rule::pattern(r#"[\\']+"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("/", Some((0, "/"))),
+                    ("\"\'", Some((1, "\"\'"))),
+                    (r#"'\'a"#, Some((2, r#"'\'"#))),
+                ],
+            },
+            // unicode property escapes
+            Row {
+                rules: vec![
+                    Rule::pattern(r#"\p{L}+\P{L}+"#),
+                    Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("  123   abc", Some((1, "  123   "))),
+                    ("ბΨƁ___ƀƔ", Some((0, "ბΨƁ___"))),
+                ],
+            },
+            // unicode property escapes in bracketed sets
+            Row {
+                rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)],
+                separators: vec![],
+                examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
+            },
+            // unicode character escapes
+            Row {
+                rules: vec![
+                    Rule::pattern(r#"\u{00dc}"#),
+                    Rule::pattern(r#"\U{000000dd}"#),
+                    Rule::pattern(r#"\u00de"#),
+                    Rule::pattern(r#"\U000000df"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("\u{00dc}", Some((0, "\u{00dc}"))),
+                    ("\u{00dd}", Some((1, "\u{00dd}"))),
+                    ("\u{00de}", Some((2, "\u{00de}"))),
+                    ("\u{00df}", Some((3, "\u{00df}"))),
+                ],
+            },
+            // allowing un-escaped curly braces
+            Row {
+                rules: vec![
+                    // Un-escaped curly braces
+                    Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
+                    // Already-escaped curly braces
+                    Rule::pattern(r#"\{[ab]{3}\}"#),
+                    // Unicode codepoints
+                    Rule::pattern(r#"\u{1000A}"#),
+                    // Unicode codepoints (lowercase)
+                    Rule::pattern(r#"\u{1000b}"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("u{1234} ok", Some((0, "u{1234}"))),
+                    ("{aba}}", Some((1, "{aba}"))),
+                    ("\u{1000A}", Some((2, "\u{1000A}"))),
+                    ("\u{1000b}", Some((3, "\u{1000b}"))),
+                ],
+            },
+            // Emojis
+            Row {
+                rules: vec![Rule::pattern(r"\p{Emoji}+")],
+                separators: vec![],
+                examples: vec![
+                    ("🐎", Some((0, "🐎"))),
+                    ("🐴🐴", Some((0, "🐴🐴"))),
+                    ("#0", Some((0, "#0"))), // These chars are technically emojis!
+                    ("⻢", None),
+                    ("♞", None),
+                    ("horse", None),
+                ],
+            },
+            // Intersection
+            Row {
+                rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")],
+                separators: vec![],
+                examples: vec![
+                    ("456", Some((0, "456"))),
+                    ("64", Some((0, "64"))),
+                    ("452", Some((0, "45"))),
+                    ("91", None),
+                    ("8", None),
+                    ("3", None),
+                ],
+            },
+            // Difference
+            Row {
+                rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")],
+                separators: vec![],
+                examples: vec![
+                    ("123", Some((0, "123"))),
+                    ("83", Some((0, "83"))),
+                    ("9", Some((0, "9"))),
+                    ("124", Some((0, "12"))),
+                    ("67", None),
+                    ("4", None),
+                ],
+            },
+            // Symmetric difference
+            Row {
+                rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")],
+                separators: vec![],
+                examples: vec![
+                    ("123", Some((0, "123"))),
+                    ("83", Some((0, "83"))),
+                    ("9", Some((0, "9"))),
+                    ("124", Some((0, "12"))),
+                    ("67", None),
+                    ("4", None),
+                ],
+            },
+            // Nested set operations
+            Row {
+                //               0 1 2 3 4 5 6 7 8 9
+                // [0-5]:        y y y y y y
+                // [2-4]:            y y y
+                // [0-5]--[2-4]: y y       y
+                // [3-9]:              y y y y y y y
+                // [6-7]:                    y y
+                // [3-9]--[5-7]:       y y y     y y
+                // final regex:  y y   y y       y y
+                rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")],
+                separators: vec![],
+                examples: vec![
+                    ("01", Some((0, "01"))),
+                    ("432", Some((0, "43"))),
+                    ("8", Some((0, "8"))),
+                    ("9", Some((0, "9"))),
+                    ("2", None),
+                    ("567", None),
+                ],
+            },
+        ];
+
+        for Row {
+            rules,
+            separators,
+            examples,
+        } in &table
+        {
+            let grammar = expand_tokens(ExtractedLexicalGrammar {
+                separators: separators.clone(),
+                variables: rules
+                    .into_iter()
+                    .map(|rule| Variable::named("", rule.clone()))
+                    .collect(),
+            })
+            .unwrap();
+
+            for (haystack, needle) in examples.iter() {
+                assert_eq!(simulate_nfa(&grammar, haystack), *needle);
+            }
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_default_aliases.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_default_aliases.rs
@ -0,0 +1,304 @@
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
+
+#[derive(Clone, Default)]
+struct SymbolStatus {
+    aliases: Vec<(Alias, usize)>,
+    appears_unaliased: bool,
+}
+
+// Update the grammar by finding symbols that always are aliased, and for each such symbol,
+// promoting one of its aliases to a "default alias", which is applied globally instead
+// of in a context-specific way.
+//
+// This has two benefits:
+// * It reduces the overhead of storing production-specific alias info in the parse table.
+// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
+//   ensures that the children of an `ERROR` node have symbols that are consistent with the
+//   way that they would appear in a valid syntax tree.
+pub(super) fn extract_default_aliases(
+    syntax_grammar: &mut SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+) -> AliasMap {
+    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
+    let mut non_terminal_status_list =
+        vec![SymbolStatus::default(); syntax_grammar.variables.len()];
+    let mut external_status_list =
+        vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
+
+    // For each grammar symbol, find all of the aliases under which the symbol appears,
+    // and determine whether or not the symbol ever appears *unaliased*.
+    for variable in syntax_grammar.variables.iter() {
+        for production in variable.productions.iter() {
+            for step in production.steps.iter() {
+                let mut status = match step.symbol.kind {
+                    SymbolType::External => &mut external_status_list[step.symbol.index],
+                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
+                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
+                    SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
+                        panic!("Unexpected end token")
+                    }
+                };
+
+                // Default aliases don't work for inlined variables.
+                if syntax_grammar.variables_to_inline.contains(&step.symbol) {
+                    continue;
+                }
+
+                if let Some(alias) = &step.alias {
+                    if let Some(count_for_alias) = status
+                        .aliases
+                        .iter_mut()
+                        .find_map(|(a, count)| if a == alias { Some(count) } else { None })
+                    {
+                        *count_for_alias += 1;
+                    } else {
+                        status.aliases.push((alias.clone(), 1));
+                    }
+                } else {
+                    status.appears_unaliased = true;
+                }
+            }
+        }
+    }
+
+    for symbol in syntax_grammar.extra_symbols.iter() {
+        let mut status = match symbol.kind {
+            SymbolType::External => &mut external_status_list[symbol.index],
+            SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
+            SymbolType::Terminal => &mut terminal_status_list[symbol.index],
+            SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
+                panic!("Unexpected end token")
+            }
+        };
+        status.appears_unaliased = true;
+    }
+
+    let symbols_with_statuses = (terminal_status_list
+        .iter_mut()
+        .enumerate()
+        .map(|(i, status)| (Symbol::terminal(i), status)))
+    .chain(
+        non_terminal_status_list
+            .iter_mut()
+            .enumerate()
+            .map(|(i, status)| (Symbol::non_terminal(i), status)),
+    )
+    .chain(
+        external_status_list
+            .iter_mut()
+            .enumerate()
+            .map(|(i, status)| (Symbol::external(i), status)),
+    );
+
+    // For each symbol that always appears aliased, find the alias the occurs most often,
+    // and designate that alias as the symbol's "default alias". Store all of these
+    // default aliases in a map that will be returned.
+    let mut result = AliasMap::new();
+    for (symbol, status) in symbols_with_statuses {
+        if status.appears_unaliased {
+            status.aliases.clear();
+        } else {
+            if let Some(default_entry) = status
+                .aliases
+                .iter()
+                .enumerate()
+                .max_by_key(|(i, (_, count))| (count, -(*i as i64)))
+                .map(|(_, entry)| entry.clone())
+            {
+                status.aliases.clear();
+                status.aliases.push(default_entry.clone());
+                result.insert(symbol, default_entry.0);
+            }
+        }
+    }
+
+    // Wherever a symbol is aliased as its default alias, remove the usage of the alias,
+    // because it will now be redundant.
+    let mut alias_positions_to_clear = Vec::new();
+    for variable in syntax_grammar.variables.iter_mut() {
+        alias_positions_to_clear.clear();
+
+        for (i, production) in variable.productions.iter().enumerate() {
+            for (j, step) in production.steps.iter().enumerate() {
+                let status = match step.symbol.kind {
+                    SymbolType::External => &mut external_status_list[step.symbol.index],
+                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
+                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
+                    SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
+                        panic!("Unexpected end token")
+                    }
+                };
+
+                // If this step is aliased as the symbol's default alias, then remove that alias.
+                if step.alias.is_some()
+                    && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
+                {
+                    let mut other_productions_must_use_this_alias_at_this_index = false;
+                    for (other_i, other_production) in variable.productions.iter().enumerate() {
+                        if other_i != i
+                            && other_production.steps.len() > j
+                            && other_production.steps[j].alias == step.alias
+                            && result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
+                        {
+                            other_productions_must_use_this_alias_at_this_index = true;
+                            break;
+                        }
+                    }
+
+                    if !other_productions_must_use_this_alias_at_this_index {
+                        alias_positions_to_clear.push((i, j));
+                    }
+                }
+            }
+        }
+
+        for (production_index, step_index) in &alias_positions_to_clear {
+            variable.productions[*production_index].steps[*step_index].alias = None;
+        }
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::{
+        LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
+    };
+    use crate::generate::nfa::Nfa;
+
+    #[test]
+    fn test_extract_simple_aliases() {
+        let mut syntax_grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // Token 0 is always aliased as "a1".
+                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                            // Token 1 is aliased within rule `v1` above, but not here.
+                            ProductionStep::new(Symbol::terminal(1)),
+                            // Token 2 is aliased differently here than in `v1`. The alias from
+                            // `v1` should be promoted to the default alias, because `v1` appears
+                            // first in the grammar.
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
+                            // Token 3 is also aliased differently here than in `v1`. In this case,
+                            // this alias should be promoted to the default alias, because it is
+                            // used a greater number of times (twice).
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
+                        ],
+                    }],
+                },
+            ],
+            ..Default::default()
+        };
+
+        let lexical_grammar = LexicalGrammar {
+            nfa: Nfa::new(),
+            variables: vec![
+                LexicalVariable {
+                    name: "t0".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t1".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t2".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t3".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+            ],
+        };
+
+        let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
+        assert_eq!(default_aliases.len(), 3);
+
+        assert_eq!(
+            default_aliases.get(&Symbol::terminal(0)),
+            Some(&Alias {
+                value: "a1".to_string(),
+                is_named: true,
+            })
+        );
+        assert_eq!(
+            default_aliases.get(&Symbol::terminal(2)),
+            Some(&Alias {
+                value: "a3".to_string(),
+                is_named: true,
+            })
+        );
+        assert_eq!(
+            default_aliases.get(&Symbol::terminal(3)),
+            Some(&Alias {
+                value: "a6".to_string(),
+                is_named: true,
+            })
+        );
+        assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
+
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)),
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)),
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
+                        ],
+                    },],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)),
+                            ProductionStep::new(Symbol::terminal(1)),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
+                            ProductionStep::new(Symbol::terminal(3)),
+                            ProductionStep::new(Symbol::terminal(3)),
+                        ],
+                    },],
+                },
+            ]
+        );
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_tokens.rs
@ -0,0 +1,499 @@
+use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
+use crate::generate::grammars::{ExternalToken, Variable, VariableType};
+use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType};
+use anyhow::{anyhow, Result};
+use std::collections::HashMap;
+use std::mem;
+
+pub(super) fn extract_tokens(
+    mut grammar: InternedGrammar,
+) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
+    let mut extractor = TokenExtractor {
+        current_variable_name: String::new(),
+        current_variable_token_count: 0,
+        extracted_variables: Vec::new(),
+        extracted_usage_counts: Vec::new(),
+    };
+
+    for mut variable in grammar.variables.iter_mut() {
+        extractor.extract_tokens_in_variable(&mut variable);
+    }
+
+    for mut variable in grammar.external_tokens.iter_mut() {
+        extractor.extract_tokens_in_variable(&mut variable);
+    }
+
+    let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
+    for variable in extractor.extracted_variables {
+        lexical_variables.push(Variable {
+            name: variable.name,
+            kind: variable.kind,
+            rule: variable.rule,
+        });
+    }
+
+    // If a variable's entire rule was extracted as a token and that token didn't
+    // appear within any other rule, then remove that variable from the syntax
+    // grammar, giving its name to the token in the lexical grammar. Any symbols
+    // that pointed to that variable will need to be updated to point to the
+    // variable in the lexical grammar. Symbols that pointed to later variables
+    // will need to have their indices decremented.
+    let mut variables = Vec::new();
+    let mut symbol_replacer = SymbolReplacer {
+        replacements: HashMap::new(),
+    };
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        if let Rule::Symbol(Symbol {
+            kind: SymbolType::Terminal,
+            index,
+        }) = variable.rule
+        {
+            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
+                let mut lexical_variable = &mut lexical_variables[index];
+                lexical_variable.kind = variable.kind;
+                lexical_variable.name = variable.name;
+                symbol_replacer.replacements.insert(i, index);
+                continue;
+            }
+        }
+        variables.push(variable);
+    }
+
+    for variable in variables.iter_mut() {
+        variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
+    }
+
+    let expected_conflicts = grammar
+        .expected_conflicts
+        .into_iter()
+        .map(|conflict| {
+            let mut result: Vec<_> = conflict
+                .iter()
+                .map(|symbol| symbol_replacer.replace_symbol(*symbol))
+                .collect();
+            result.sort_unstable();
+            result.dedup();
+            result
+        })
+        .collect();
+
+    let supertype_symbols = grammar
+        .supertype_symbols
+        .into_iter()
+        .map(|symbol| symbol_replacer.replace_symbol(symbol))
+        .collect();
+
+    let variables_to_inline = grammar
+        .variables_to_inline
+        .into_iter()
+        .map(|symbol| symbol_replacer.replace_symbol(symbol))
+        .collect();
+
+    let mut separators = Vec::new();
+    let mut extra_symbols = Vec::new();
+    for rule in grammar.extra_symbols {
+        if let Rule::Symbol(symbol) = rule {
+            extra_symbols.push(symbol_replacer.replace_symbol(symbol));
+        } else {
+            if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
+                extra_symbols.push(Symbol::terminal(index));
+            } else {
+                separators.push(rule);
+            }
+        }
+    }
+
+    let mut external_tokens = Vec::new();
+    for external_token in grammar.external_tokens {
+        let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
+        if let Rule::Symbol(symbol) = rule {
+            if symbol.is_non_terminal() {
+                return Err(anyhow!(
+                    "Rule '{}' cannot be used as both an external token and a non-terminal rule",
+                    &variables[symbol.index].name,
+                ));
+            }
+
+            if symbol.is_external() {
+                external_tokens.push(ExternalToken {
+                    name: external_token.name,
+                    kind: external_token.kind,
+                    corresponding_internal_token: None,
+                })
+            } else {
+                external_tokens.push(ExternalToken {
+                    name: lexical_variables[symbol.index].name.clone(),
+                    kind: external_token.kind,
+                    corresponding_internal_token: Some(symbol),
+                })
+            }
+        } else {
+            return Err(anyhow!(
+                "Non-symbol rules cannot be used as external tokens"
+            ));
+        }
+    }
+
+    let mut word_token = None;
+    if let Some(token) = grammar.word_token {
+        let token = symbol_replacer.replace_symbol(token);
+        if token.is_non_terminal() {
+            return Err(anyhow!(
+                "Non-terminal symbol '{}' cannot be used as the word token",
+                &variables[token.index].name
+            ));
+        }
+        word_token = Some(token);
+    }
+
+    Ok((
+        ExtractedSyntaxGrammar {
+            variables,
+            expected_conflicts,
+            extra_symbols,
+            variables_to_inline,
+            supertype_symbols,
+            external_tokens,
+            word_token,
+            precedence_orderings: grammar.precedence_orderings,
+        },
+        ExtractedLexicalGrammar {
+            variables: lexical_variables,
+            separators,
+        },
+    ))
+}
+
+struct TokenExtractor {
+    current_variable_name: String,
+    current_variable_token_count: usize,
+    extracted_variables: Vec<Variable>,
+    extracted_usage_counts: Vec<usize>,
+}
+
+struct SymbolReplacer {
+    replacements: HashMap<usize, usize>,
+}
+
+impl TokenExtractor {
+    fn extract_tokens_in_variable(&mut self, variable: &mut Variable) {
+        self.current_variable_name.clear();
+        self.current_variable_name.push_str(&variable.name);
+        self.current_variable_token_count = 0;
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+        variable.rule = self.extract_tokens_in_rule(&rule);
+    }
+
+    fn extract_tokens_in_rule(&mut self, input: &Rule) -> Rule {
+        match input {
+            Rule::String(name) => self.extract_token(input, Some(name)).into(),
+            Rule::Pattern(..) => self.extract_token(input, None).into(),
+            Rule::Metadata { params, rule } => {
+                if params.is_token {
+                    let mut params = params.clone();
+                    params.is_token = false;
+
+                    let mut string_value = None;
+                    if let Rule::String(value) = rule.as_ref() {
+                        string_value = Some(value);
+                    }
+
+                    let rule_to_extract = if params == MetadataParams::default() {
+                        rule.as_ref()
+                    } else {
+                        input
+                    };
+
+                    self.extract_token(rule_to_extract, string_value).into()
+                } else {
+                    Rule::Metadata {
+                        params: params.clone(),
+                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
+                    }
+                }
+            }
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.extract_tokens_in_rule(content))),
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|e| self.extract_tokens_in_rule(e))
+                    .collect(),
+            ),
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|e| self.extract_tokens_in_rule(e))
+                    .collect(),
+            ),
+            _ => input.clone(),
+        }
+    }
+
+    fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Symbol {
+        for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
+            if variable.rule == *rule {
+                self.extracted_usage_counts[i] += 1;
+                return Symbol::terminal(i);
+            }
+        }
+
+        let index = self.extracted_variables.len();
+        let variable = if let Some(string_value) = string_value {
+            Variable {
+                name: string_value.clone(),
+                kind: VariableType::Anonymous,
+                rule: rule.clone(),
+            }
+        } else {
+            self.current_variable_token_count += 1;
+            Variable {
+                name: format!(
+                    "{}_token{}",
+                    &self.current_variable_name, self.current_variable_token_count
+                ),
+                kind: VariableType::Auxiliary,
+                rule: rule.clone(),
+            }
+        };
+
+        self.extracted_variables.push(variable);
+        self.extracted_usage_counts.push(1);
+        Symbol::terminal(index)
+    }
+}
+
+impl SymbolReplacer {
+    fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
+        match rule {
+            Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|e| self.replace_symbols_in_rule(e))
+                    .collect(),
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|e| self.replace_symbols_in_rule(e))
+                    .collect(),
+            ),
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
+            Rule::Metadata { rule, params } => Rule::Metadata {
+                params: params.clone(),
+                rule: Box::new(self.replace_symbols_in_rule(rule)),
+            },
+            _ => rule.clone(),
+        }
+    }
+
+    fn replace_symbol(&self, symbol: Symbol) -> Symbol {
+        if !symbol.is_non_terminal() {
+            return symbol;
+        }
+
+        if let Some(replacement) = self.replacements.get(&symbol.index) {
+            return Symbol::terminal(*replacement);
+        }
+
+        let mut adjusted_index = symbol.index;
+        for (replaced_index, _) in self.replacements.iter() {
+            if *replaced_index < symbol.index {
+                adjusted_index -= 1;
+            }
+        }
+
+        return Symbol::non_terminal(adjusted_index);
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::generate::grammars::VariableType;
+
+    #[test]
+    fn test_extraction() {
+        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
+            Variable::named(
+                "rule_0",
+                Rule::repeat(Rule::seq(vec![
+                    Rule::string("a"),
+                    Rule::pattern("b"),
+                    Rule::choice(vec![
+                        Rule::non_terminal(1),
+                        Rule::non_terminal(2),
+                        Rule::token(Rule::repeat(Rule::choice(vec![
+                            Rule::string("c"),
+                            Rule::string("d"),
+                        ]))),
+                    ]),
+                ])),
+            ),
+            Variable::named("rule_1", Rule::pattern("e")),
+            Variable::named("rule_2", Rule::pattern("b")),
+            Variable::named(
+                "rule_3",
+                Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
+            ),
+        ]))
+        .unwrap();
+
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![
+                Variable::named(
+                    "rule_0",
+                    Rule::repeat(Rule::seq(vec![
+                        // The string "a" was replaced by a symbol referencing the lexical grammar
+                        Rule::terminal(0),
+                        // The pattern "b" was replaced by a symbol referencing the lexical grammar
+                        Rule::terminal(1),
+                        Rule::choice(vec![
+                            // The symbol referencing `rule_1` was replaced by a symbol referencing
+                            // the lexical grammar.
+                            Rule::terminal(3),
+                            // The symbol referencing `rule_2` had its index decremented because
+                            // `rule_1` was moved to the lexical grammar.
+                            Rule::non_terminal(1),
+                            // The rule wrapped in `token` was replaced by a symbol referencing
+                            // the lexical grammar.
+                            Rule::terminal(2),
+                        ])
+                    ]))
+                ),
+                // The pattern "e" was only used in once place: as the definition of `rule_1`,
+                // so that rule was moved to the lexical grammar. The pattern "b" appeared in
+                // two places, so it was not moved into the lexical grammar.
+                Variable::named("rule_2", Rule::terminal(1)),
+                Variable::named(
+                    "rule_3",
+                    Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
+                ),
+            ]
+        );
+
+        assert_eq!(
+            lexical_grammar.variables,
+            vec![
+                Variable::anonymous("a", Rule::string("a")),
+                Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
+                Variable::auxiliary(
+                    "rule_0_token2",
+                    Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
+                ),
+                Variable::named("rule_1", Rule::pattern("e")),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_start_rule_is_token() {
+        let (syntax_grammar, lexical_grammar) =
+            extract_tokens(build_grammar(vec![Variable::named(
+                "rule_0",
+                Rule::string("hello"),
+            )]))
+            .unwrap();
+
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![Variable::named("rule_0", Rule::terminal(0)),]
+        );
+        assert_eq!(
+            lexical_grammar.variables,
+            vec![Variable::anonymous("hello", Rule::string("hello")),]
+        )
+    }
+
+    #[test]
+    fn test_extracting_extra_symbols() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::string("x")),
+            Variable::named("comment", Rule::pattern("//.*")),
+        ]);
+        grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];
+
+        let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
+        assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
+        assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
+    }
+
+    #[test]
+    fn test_extract_externals() {
+        let mut grammar = build_grammar(vec![
+            Variable::named(
+                "rule_0",
+                Rule::seq(vec![
+                    Rule::external(0),
+                    Rule::string("a"),
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(2),
+                ]),
+            ),
+            Variable::named("rule_1", Rule::string("b")),
+            Variable::named("rule_2", Rule::string("c")),
+        ]);
+        grammar.external_tokens = vec![
+            Variable::named("external_0", Rule::external(0)),
+            Variable::anonymous("a", Rule::string("a")),
+            Variable::named("rule_2", Rule::non_terminal(2)),
+        ];
+
+        let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
+
+        assert_eq!(
+            syntax_grammar.external_tokens,
+            vec![
+                ExternalToken {
+                    name: "external_0".to_string(),
+                    kind: VariableType::Named,
+                    corresponding_internal_token: None,
+                },
+                ExternalToken {
+                    name: "a".to_string(),
+                    kind: VariableType::Anonymous,
+                    corresponding_internal_token: Some(Symbol::terminal(0)),
+                },
+                ExternalToken {
+                    name: "rule_2".to_string(),
+                    kind: VariableType::Named,
+                    corresponding_internal_token: Some(Symbol::terminal(2)),
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_error_on_external_with_same_name_as_non_terminal() {
+        let mut grammar = build_grammar(vec![
+            Variable::named(
+                "rule_0",
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+            Variable::named(
+                "rule_1",
+                Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
+            ),
+            Variable::named("rule_2", Rule::string("a")),
+        ]);
+        grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
+
+        match extract_tokens(grammar) {
+            Err(e) => {
+                assert_eq!(e.to_string(), "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
+            }
+            _ => {
+                panic!("Expected an error but got no error");
+            }
+        }
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
+        InternedGrammar {
+            variables,
+            ..Default::default()
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/flatten_grammar.rs
@ -0,0 +1,420 @@
+use super::ExtractedSyntaxGrammar;
+use crate::generate::grammars::{
+    Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
+};
+use crate::generate::rules::{Alias, Associativity, Precedence, Rule, Symbol};
+use anyhow::{anyhow, Result};
+
+struct RuleFlattener {
+    production: Production,
+    precedence_stack: Vec<Precedence>,
+    associativity_stack: Vec<Associativity>,
+    alias_stack: Vec<Alias>,
+    field_name_stack: Vec<String>,
+}
+
+impl RuleFlattener {
+    fn new() -> Self {
+        Self {
+            production: Production {
+                steps: Vec::new(),
+                dynamic_precedence: 0,
+            },
+            precedence_stack: Vec::new(),
+            associativity_stack: Vec::new(),
+            alias_stack: Vec::new(),
+            field_name_stack: Vec::new(),
+        }
+    }
+
+    fn flatten(mut self, rule: Rule) -> Production {
+        self.apply(rule, true);
+        self.production
+    }
+
+    fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
+        match rule {
+            Rule::Seq(members) => {
+                let mut result = false;
+                let last_index = members.len() - 1;
+                for (i, member) in members.into_iter().enumerate() {
+                    result |= self.apply(member, i == last_index && at_end);
+                }
+                result
+            }
+            Rule::Metadata { rule, params } => {
+                let mut has_precedence = false;
+                if !params.precedence.is_none() {
+                    has_precedence = true;
+                    self.precedence_stack.push(params.precedence);
+                }
+
+                let mut has_associativity = false;
+                if let Some(associativity) = params.associativity {
+                    has_associativity = true;
+                    self.associativity_stack.push(associativity);
+                }
+
+                let mut has_alias = false;
+                if let Some(alias) = params.alias {
+                    has_alias = true;
+                    self.alias_stack.push(alias);
+                }
+
+                let mut has_field_name = false;
+                if let Some(field_name) = params.field_name {
+                    has_field_name = true;
+                    self.field_name_stack.push(field_name);
+                }
+
+                if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
+                    self.production.dynamic_precedence = params.dynamic_precedence;
+                }
+
+                let did_push = self.apply(*rule, at_end);
+
+                if has_precedence {
+                    self.precedence_stack.pop();
+                    if did_push && !at_end {
+                        self.production.steps.last_mut().unwrap().precedence = self
+                            .precedence_stack
+                            .last()
+                            .cloned()
+                            .unwrap_or(Precedence::None);
+                    }
+                }
+
+                if has_associativity {
+                    self.associativity_stack.pop();
+                    if did_push && !at_end {
+                        self.production.steps.last_mut().unwrap().associativity =
+                            self.associativity_stack.last().cloned();
+                    }
+                }
+
+                if has_alias {
+                    self.alias_stack.pop();
+                }
+
+                if has_field_name {
+                    self.field_name_stack.pop();
+                }
+
+                did_push
+            }
+            Rule::Symbol(symbol) => {
+                self.production.steps.push(ProductionStep {
+                    symbol,
+                    precedence: self
+                        .precedence_stack
+                        .last()
+                        .cloned()
+                        .unwrap_or(Precedence::None),
+                    associativity: self.associativity_stack.last().cloned(),
+                    alias: self.alias_stack.last().cloned(),
+                    field_name: self.field_name_stack.last().cloned(),
+                });
+                true
+            }
+            _ => false,
+        }
+    }
+}
+
+fn extract_choices(rule: Rule) -> Vec<Rule> {
+    match rule {
+        Rule::Seq(elements) => {
+            let mut result = vec![Rule::Blank];
+            for element in elements {
+                let extraction = extract_choices(element);
+                let mut next_result = Vec::new();
+                for entry in result {
+                    for extraction_entry in extraction.iter() {
+                        next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
+                    }
+                }
+                result = next_result;
+            }
+            result
+        }
+        Rule::Choice(elements) => {
+            let mut result = Vec::new();
+            for element in elements {
+                for rule in extract_choices(element) {
+                    result.push(rule);
+                }
+            }
+            result
+        }
+        Rule::Metadata { rule, params } => extract_choices(*rule)
+            .into_iter()
+            .map(|rule| Rule::Metadata {
+                rule: Box::new(rule),
+                params: params.clone(),
+            })
+            .collect(),
+        _ => vec![rule],
+    }
+}
+
+fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
+    let mut productions = Vec::new();
+    for rule in extract_choices(variable.rule) {
+        let production = RuleFlattener::new().flatten(rule);
+        if !productions.contains(&production) {
+            productions.push(production);
+        }
+    }
+    Ok(SyntaxVariable {
+        name: variable.name,
+        kind: variable.kind,
+        productions,
+    })
+}
+
+fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
+    for variable in variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                if step.symbol == symbol {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        variables.push(flatten_variable(variable)?);
+    }
+    for (i, variable) in variables.iter().enumerate() {
+        for production in &variable.productions {
+            if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
+                return Err(anyhow!(
+                    "The rule `{}` matches the empty string.
+
+Tree-sitter does not support syntactic rules that match the empty string
+unless they are used only as the grammar's start rule.
+",
+                    variable.name
+                ));
+            }
+        }
+    }
+    Ok(SyntaxGrammar {
+        extra_symbols: grammar.extra_symbols,
+        expected_conflicts: grammar.expected_conflicts,
+        variables_to_inline: grammar.variables_to_inline,
+        precedence_orderings: grammar.precedence_orderings,
+        external_tokens: grammar.external_tokens,
+        supertype_symbols: grammar.supertype_symbols,
+        word_token: grammar.word_token,
+        variables,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::VariableType;
+    use crate::generate::rules::Symbol;
+
+    #[test]
+    fn test_flatten_grammar() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_left(
+                    Precedence::Integer(101),
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_right(
+                                Precedence::Integer(102),
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
+                        ]),
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2))
+                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(3))
+                            .with_prec(Precedence::Integer(102), Some(Associativity::Right)),
+                        ProductionStep::new(Symbol::non_terminal(4))
+                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ]
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2))
+                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(5))
+                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ]
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_dynamic(
+                    101,
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_dynamic(
+                                102,
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
+                        ]),
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 102,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::non_terminal(3)),
+                        ProductionStep::new(Symbol::non_terminal(4)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 101,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::non_terminal(5)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ],
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_final_precedence() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(
+                Precedence::Integer(101),
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1))
+                        .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(2))
+                        .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
+                ]
+            }]
+        );
+
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(
+                Precedence::Integer(101),
+                Rule::seq(vec![Rule::non_terminal(1)]),
+            ),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
+                    .with_prec(Precedence::Integer(101), Some(Associativity::Left)),]
+            }]
+        );
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_field_names() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::field("first-thing".to_string(), Rule::terminal(1)),
+                Rule::terminal(2),
+                Rule::choice(vec![
+                    Rule::Blank,
+                    Rule::field("second-thing".to_string(), Rule::terminal(3)),
+                ]),
+            ]),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
+                        ProductionStep::new(Symbol::terminal(2))
+                    ]
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
+                        ProductionStep::new(Symbol::terminal(2)),
+                        ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"),
+                    ]
+                },
+            ]
+        );
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/intern_symbols.rs
@ -0,0 +1,249 @@
+use super::InternedGrammar;
+use crate::generate::grammars::{InputGrammar, Variable, VariableType};
+use crate::generate::rules::{Rule, Symbol};
+use anyhow::{anyhow, Result};
+
+pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
+    let interner = Interner { grammar };
+
+    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
+        return Err(anyhow!("A grammar's start rule must be visible."));
+    }
+
+    let mut variables = Vec::with_capacity(grammar.variables.len());
+    for variable in grammar.variables.iter() {
+        variables.push(Variable {
+            name: variable.name.clone(),
+            kind: variable_type_for_name(&variable.name),
+            rule: interner.intern_rule(&variable.rule)?,
+        });
+    }
+
+    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
+    for external_token in grammar.external_tokens.iter() {
+        let rule = interner.intern_rule(&external_token)?;
+        let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
+            (name.clone(), variable_type_for_name(&name))
+        } else {
+            (String::new(), VariableType::Anonymous)
+        };
+        external_tokens.push(Variable { name, kind, rule });
+    }
+
+    let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
+    for extra_token in grammar.extra_symbols.iter() {
+        extra_symbols.push(interner.intern_rule(extra_token)?);
+    }
+
+    let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
+    for supertype_symbol_name in grammar.supertype_symbols.iter() {
+        supertype_symbols.push(
+            interner
+                .intern_name(supertype_symbol_name)
+                .ok_or_else(|| anyhow!("Undefined symbol `{}`", supertype_symbol_name))?,
+        );
+    }
+
+    let mut expected_conflicts = Vec::new();
+    for conflict in grammar.expected_conflicts.iter() {
+        let mut interned_conflict = Vec::with_capacity(conflict.len());
+        for name in conflict {
+            interned_conflict.push(
+                interner
+                    .intern_name(&name)
+                    .ok_or_else(|| anyhow!("Undefined symbol `{}`", name))?,
+            );
+        }
+        expected_conflicts.push(interned_conflict);
+    }
+
+    let mut variables_to_inline = Vec::new();
+    for name in grammar.variables_to_inline.iter() {
+        if let Some(symbol) = interner.intern_name(&name) {
+            variables_to_inline.push(symbol);
+        }
+    }
+
+    let mut word_token = None;
+    if let Some(name) = grammar.word_token.as_ref() {
+        word_token = Some(
+            interner
+                .intern_name(&name)
+                .ok_or_else(|| anyhow!("Undefined symbol `{}`", &name))?,
+        );
+    }
+
+    for (i, variable) in variables.iter_mut().enumerate() {
+        if supertype_symbols.contains(&Symbol::non_terminal(i)) {
+            variable.kind = VariableType::Hidden;
+        }
+    }
+
+    Ok(InternedGrammar {
+        variables,
+        external_tokens,
+        extra_symbols,
+        expected_conflicts,
+        variables_to_inline,
+        supertype_symbols,
+        word_token,
+        precedence_orderings: grammar.precedence_orderings.clone(),
+    })
+}
+
+struct Interner<'a> {
+    grammar: &'a InputGrammar,
+}
+
+impl<'a> Interner<'a> {
+    fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
+        match rule {
+            Rule::Choice(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Choice(result))
+            }
+            Rule::Seq(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Seq(result))
+            }
+            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
+            Rule::Metadata { rule, params } => Ok(Rule::Metadata {
+                rule: Box::new(self.intern_rule(rule)?),
+                params: params.clone(),
+            }),
+
+            Rule::NamedSymbol(name) => {
+                if let Some(symbol) = self.intern_name(&name) {
+                    Ok(Rule::Symbol(symbol))
+                } else {
+                    Err(anyhow!("Undefined symbol `{}`", name))
+                }
+            }
+
+            _ => Ok(rule.clone()),
+        }
+    }
+
+    fn intern_name(&self, symbol: &str) -> Option<Symbol> {
+        for (i, variable) in self.grammar.variables.iter().enumerate() {
+            if variable.name == symbol {
+                return Some(Symbol::non_terminal(i));
+            }
+        }
+
+        for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
+            if let Rule::NamedSymbol(name) = external_token {
+                if name == symbol {
+                    return Some(Symbol::external(i));
+                }
+            }
+        }
+
+        return None;
+    }
+}
+
+fn variable_type_for_name(name: &str) -> VariableType {
+    if name.starts_with("_") {
+        VariableType::Hidden
+    } else {
+        VariableType::Named
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_repeat_expansion() {
+        let grammar = intern_symbols(&build_grammar(vec![
+            Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
+            Variable::named("y", Rule::named("_z")),
+            Variable::named("_z", Rule::string("a")),
+        ]))
+        .unwrap();
+
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "x",
+                    Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
+                ),
+                Variable::named("y", Rule::non_terminal(2)),
+                Variable::hidden("_z", Rule::string("a")),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_interning_external_token_names() {
+        // Variable `y` is both an internal and an external token.
+        // Variable `z` is just an external token.
+        let mut input_grammar = build_grammar(vec![
+            Variable::named(
+                "w",
+                Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
+            ),
+            Variable::named("x", Rule::string("a")),
+            Variable::named("y", Rule::string("b")),
+        ]);
+        input_grammar
+            .external_tokens
+            .extend(vec![Rule::named("y"), Rule::named("z")]);
+
+        let grammar = intern_symbols(&input_grammar).unwrap();
+
+        // Variable `y` is referred to by its internal index.
+        // Variable `z` is referred to by its external index.
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "w",
+                    Rule::choice(vec![
+                        Rule::non_terminal(1),
+                        Rule::non_terminal(2),
+                        Rule::external(1),
+                    ])
+                ),
+                Variable::named("x", Rule::string("a")),
+                Variable::named("y", Rule::string("b")),
+            ]
+        );
+
+        // The external token for `y` refers back to its internal index.
+        assert_eq!(
+            grammar.external_tokens,
+            vec![
+                Variable::named("y", Rule::non_terminal(2)),
+                Variable::named("z", Rule::external(1)),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_grammar_with_undefined_symbols() {
+        let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
+
+        match result {
+            Err(e) => assert_eq!(e.to_string(), "Undefined symbol `y`"),
+            _ => panic!("Expected an error but got none"),
+        }
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
+        InputGrammar {
+            variables,
+            name: "the_language".to_string(),
+            ..Default::default()
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/mod.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/mod.rs
@ -0,0 +1,256 @@
+mod expand_repeats;
+mod expand_tokens;
+mod extract_default_aliases;
+mod extract_tokens;
+mod flatten_grammar;
+mod intern_symbols;
+mod process_inlines;
+
+pub(crate) use self::expand_tokens::expand_tokens;
+
+use self::expand_repeats::expand_repeats;
+use self::extract_default_aliases::extract_default_aliases;
+use self::extract_tokens::extract_tokens;
+use self::flatten_grammar::flatten_grammar;
+use self::intern_symbols::intern_symbols;
+use self::process_inlines::process_inlines;
+use super::grammars::{
+    ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
+    SyntaxGrammar, Variable,
+};
+use super::rules::{AliasMap, Precedence, Rule, Symbol};
+use anyhow::{anyhow, Result};
+use std::{
+    cmp::Ordering,
+    collections::{hash_map, HashMap, HashSet},
+    mem,
+};
+
+pub(crate) struct IntermediateGrammar<T, U> {
+    variables: Vec<Variable>,
+    extra_symbols: Vec<T>,
+    expected_conflicts: Vec<Vec<Symbol>>,
+    precedence_orderings: Vec<Vec<PrecedenceEntry>>,
+    external_tokens: Vec<U>,
+    variables_to_inline: Vec<Symbol>,
+    supertype_symbols: Vec<Symbol>,
+    word_token: Option<Symbol>,
+}
+
+pub(crate) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
+
+pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct ExtractedLexicalGrammar {
+    pub variables: Vec<Variable>,
+    pub separators: Vec<Rule>,
+}
+
+impl<T, U> Default for IntermediateGrammar<T, U> {
+    fn default() -> Self {
+        Self {
+            variables: Default::default(),
+            extra_symbols: Default::default(),
+            expected_conflicts: Default::default(),
+            precedence_orderings: Default::default(),
+            external_tokens: Default::default(),
+            variables_to_inline: Default::default(),
+            supertype_symbols: Default::default(),
+            word_token: Default::default(),
+        }
+    }
+}
+
+/// Transform an input grammar into separate components that are ready
+/// for parse table construction.
+pub(crate) fn prepare_grammar(
+    input_grammar: &InputGrammar,
+) -> Result<(
+    SyntaxGrammar,
+    LexicalGrammar,
+    InlinedProductionMap,
+    AliasMap,
+)> {
+    validate_precedences(input_grammar)?;
+
+    let interned_grammar = intern_symbols(input_grammar)?;
+    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
+    let syntax_grammar = expand_repeats(syntax_grammar);
+    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
+    let lexical_grammar = expand_tokens(lexical_grammar)?;
+    let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
+    let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?;
+    Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
+}
+
+/// Check that all of the named precedences used in the grammar are declared
+/// within the `precedences` lists, and also that there are no conflicting
+/// precedence orderings declared in those lists.
+fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
+    // For any two precedence names `a` and `b`, if `a` comes before `b`
+    // in some list, then it cannot come *after* `b` in any list.
+    let mut pairs = HashMap::new();
+    for list in &grammar.precedence_orderings {
+        for (i, mut entry1) in list.iter().enumerate() {
+            for mut entry2 in list.iter().skip(i + 1) {
+                if entry2 == entry1 {
+                    continue;
+                }
+                let mut ordering = Ordering::Greater;
+                if entry1 > entry2 {
+                    ordering = Ordering::Less;
+                    mem::swap(&mut entry1, &mut entry2);
+                }
+                match pairs.entry((entry1, entry2)) {
+                    hash_map::Entry::Vacant(e) => {
+                        e.insert(ordering);
+                    }
+                    hash_map::Entry::Occupied(e) => {
+                        if e.get() != &ordering {
+                            return Err(anyhow!(
+                                "Conflicting orderings for precedences {} and {}",
+                                entry1,
+                                entry2
+                            ));
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Check that no rule contains a named precedence that is not present in
+    // any of the `precedences` lists.
+    fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> {
+        match rule {
+            Rule::Repeat(rule) => validate(rule_name, rule, names),
+            Rule::Seq(elements) | Rule::Choice(elements) => elements
+                .iter()
+                .map(|e| validate(rule_name, e, names))
+                .collect(),
+            Rule::Metadata { rule, params } => {
+                if let Precedence::Name(n) = &params.precedence {
+                    if !names.contains(n) {
+                        return Err(anyhow!(
+                            "Undeclared precedence '{}' in rule '{}'",
+                            n,
+                            rule_name
+                        ));
+                    }
+                }
+                validate(rule_name, rule, names)?;
+                Ok(())
+            }
+            _ => Ok(()),
+        }
+    }
+
+    let precedence_names = grammar
+        .precedence_orderings
+        .iter()
+        .flat_map(|l| l.iter())
+        .filter_map(|p| {
+            if let PrecedenceEntry::Name(n) = p {
+                Some(n)
+            } else {
+                None
+            }
+        })
+        .collect::<HashSet<&String>>();
+    for variable in &grammar.variables {
+        validate(&variable.name, &variable.rule, &precedence_names)?;
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::{InputGrammar, Variable, VariableType};
+
+    #[test]
+    fn test_validate_precedences_with_undeclared_precedence() {
+        let grammar = InputGrammar {
+            precedence_orderings: vec![
+                vec![
+                    PrecedenceEntry::Name("a".to_string()),
+                    PrecedenceEntry::Name("b".to_string()),
+                ],
+                vec![
+                    PrecedenceEntry::Name("b".to_string()),
+                    PrecedenceEntry::Name("c".to_string()),
+                    PrecedenceEntry::Name("d".to_string()),
+                ],
+            ],
+            variables: vec![
+                Variable {
+                    name: "v1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::Seq(vec![
+                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
+                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
+                    ]),
+                },
+                Variable {
+                    name: "v2".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::repeat(Rule::Choice(vec![
+                        Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")),
+                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
+                    ])),
+                },
+            ],
+            ..Default::default()
+        };
+
+        let result = validate_precedences(&grammar);
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Undeclared precedence 'omg' in rule 'v2'",
+        );
+    }
+
+    #[test]
+    fn test_validate_precedences_with_conflicting_order() {
+        let grammar = InputGrammar {
+            precedence_orderings: vec![
+                vec![
+                    PrecedenceEntry::Name("a".to_string()),
+                    PrecedenceEntry::Name("b".to_string()),
+                ],
+                vec![
+                    PrecedenceEntry::Name("b".to_string()),
+                    PrecedenceEntry::Name("c".to_string()),
+                    PrecedenceEntry::Name("a".to_string()),
+                ],
+            ],
+            variables: vec![
+                Variable {
+                    name: "v1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::Seq(vec![
+                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
+                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
+                    ]),
+                },
+                Variable {
+                    name: "v2".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::repeat(Rule::Choice(vec![
+                        Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")),
+                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
+                    ])),
+                },
+            ],
+            ..Default::default()
+        };
+
+        let result = validate_precedences(&grammar);
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Conflicting orderings for precedences 'a' and 'b'",
+        );
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/process_inlines.rs
@ -0,0 +1,539 @@
+use crate::generate::{
+    grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
+    rules::SymbolType,
+};
+use anyhow::{anyhow, Result};
+use std::collections::HashMap;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct ProductionStepId {
+    // A `None` value here means that the production itself was produced via inlining,
+    // and is stored in the the builder's `productions` vector, as opposed to being
+    // stored in one of the grammar's variables.
+    variable_index: Option<usize>,
+    production_index: usize,
+    step_index: usize,
+}
+
+struct InlinedProductionMapBuilder {
+    production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
+    productions: Vec<Production>,
+}
+
+impl InlinedProductionMapBuilder {
+    fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap {
+        let mut step_ids_to_process = Vec::new();
+        for (variable_index, variable) in grammar.variables.iter().enumerate() {
+            for production_index in 0..variable.productions.len() {
+                step_ids_to_process.push(ProductionStepId {
+                    variable_index: Some(variable_index),
+                    production_index,
+                    step_index: 0,
+                });
+                while !step_ids_to_process.is_empty() {
+                    let mut i = 0;
+                    while i < step_ids_to_process.len() {
+                        let step_id = step_ids_to_process[i];
+                        if let Some(step) = self.production_step_for_id(step_id, grammar) {
+                            if grammar.variables_to_inline.contains(&step.symbol) {
+                                let inlined_step_ids = self
+                                    .inline_production_at_step(step_id, grammar)
+                                    .into_iter()
+                                    .cloned()
+                                    .map(|production_index| ProductionStepId {
+                                        variable_index: None,
+                                        production_index,
+                                        step_index: step_id.step_index,
+                                    });
+                                step_ids_to_process.splice(i..i + 1, inlined_step_ids);
+                            } else {
+                                step_ids_to_process[i] = ProductionStepId {
+                                    variable_index: step_id.variable_index,
+                                    production_index: step_id.production_index,
+                                    step_index: step_id.step_index + 1,
+                                };
+                                i += 1;
+                            }
+                        } else {
+                            step_ids_to_process.remove(i);
+                        }
+                    }
+                }
+            }
+        }
+
+        let productions = self.productions;
+        let production_indices_by_step_id = self.production_indices_by_step_id;
+        let production_map = production_indices_by_step_id
+            .into_iter()
+            .map(|(step_id, production_indices)| {
+                let production = if let Some(variable_index) = step_id.variable_index {
+                    &grammar.variables[variable_index].productions[step_id.production_index]
+                } else {
+                    &productions[step_id.production_index]
+                } as *const Production;
+                ((production, step_id.step_index as u32), production_indices)
+            })
+            .collect();
+
+        InlinedProductionMap {
+            productions,
+            production_map,
+        }
+    }
+
+    fn inline_production_at_step<'a>(
+        &'a mut self,
+        step_id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> &'a Vec<usize> {
+        // Build a list of productions produced by inlining rules.
+        let mut i = 0;
+        let step_index = step_id.step_index;
+        let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
+        while i < productions_to_add.len() {
+            if let Some(step) = productions_to_add[i].steps.get(step_index) {
+                let symbol = step.symbol.clone();
+                if grammar.variables_to_inline.contains(&symbol) {
+                    // Remove the production from the vector, replacing it with a placeholder.
+                    let production = productions_to_add
+                        .splice(i..i + 1, [Production::default()].iter().cloned())
+                        .next()
+                        .unwrap();
+
+                    // Replace the placeholder with the inlined productions.
+                    productions_to_add.splice(
+                        i..i + 1,
+                        grammar.variables[symbol.index].productions.iter().map(|p| {
+                            let mut production = production.clone();
+                            let removed_step = production
+                                .steps
+                                .splice(step_index..(step_index + 1), p.steps.iter().cloned())
+                                .next()
+                                .unwrap();
+                            let inserted_steps =
+                                &mut production.steps[step_index..(step_index + p.steps.len())];
+                            if let Some(alias) = removed_step.alias {
+                                for inserted_step in inserted_steps.iter_mut() {
+                                    inserted_step.alias = Some(alias.clone());
+                                }
+                            }
+                            if let Some(field_name) = removed_step.field_name {
+                                for inserted_step in inserted_steps.iter_mut() {
+                                    inserted_step.field_name = Some(field_name.clone());
+                                }
+                            }
+                            if let Some(last_inserted_step) = inserted_steps.last_mut() {
+                                if last_inserted_step.precedence.is_none() {
+                                    last_inserted_step.precedence = removed_step.precedence;
+                                }
+                                if last_inserted_step.associativity == None {
+                                    last_inserted_step.associativity = removed_step.associativity;
+                                }
+                            }
+                            if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
+                                production.dynamic_precedence = p.dynamic_precedence;
+                            }
+                            production
+                        }),
+                    );
+
+                    continue;
+                }
+            }
+            i += 1;
+        }
+
+        // Store all the computed productions.
+        let result = productions_to_add
+            .into_iter()
+            .map(|production| {
+                self.productions
+                    .iter()
+                    .position(|p| *p == production)
+                    .unwrap_or({
+                        self.productions.push(production);
+                        self.productions.len() - 1
+                    })
+            })
+            .collect();
+
+        // Cache these productions based on the original production step.
+        self.production_indices_by_step_id
+            .entry(step_id)
+            .or_insert(result)
+    }
+
+    fn production_for_id<'a>(
+        &'a self,
+        id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> &'a Production {
+        if let Some(variable_index) = id.variable_index {
+            &grammar.variables[variable_index].productions[id.production_index]
+        } else {
+            &self.productions[id.production_index]
+        }
+    }
+
+    fn production_step_for_id<'a>(
+        &'a self,
+        id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> Option<&'a ProductionStep> {
+        self.production_for_id(id, grammar).steps.get(id.step_index)
+    }
+}
+
+pub(super) fn process_inlines(
+    grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+) -> Result<InlinedProductionMap> {
+    for symbol in &grammar.variables_to_inline {
+        match symbol.kind {
+            SymbolType::External => {
+                return Err(anyhow!(
+                    "External token `{}` cannot be inlined",
+                    grammar.external_tokens[symbol.index].name
+                ))
+            }
+            SymbolType::Terminal => {
+                return Err(anyhow!(
+                    "Token `{}` cannot be inlined",
+                    lexical_grammar.variables[symbol.index].name,
+                ))
+            }
+            _ => {}
+        }
+    }
+
+    Ok(InlinedProductionMapBuilder {
+        productions: Vec::new(),
+        production_indices_by_step_id: HashMap::new(),
+    }
+    .build(grammar))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::{
+        LexicalVariable, ProductionStep, SyntaxVariable, VariableType,
+    };
+    use crate::generate::rules::{Associativity, Precedence, Symbol};
+
+    #[test]
+    fn test_basic_inlining() {
+        let grammar = SyntaxGrammar {
+            variables_to_inline: vec![Symbol::non_terminal(1)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(12)),
+                                ProductionStep::new(Symbol::terminal(13)),
+                            ],
+                        },
+                        Production {
+                            dynamic_precedence: -2,
+                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
+                        },
+                    ],
+                },
+            ],
+            ..Default::default()
+        };
+
+        let inline_map = process_inlines(&grammar, &Default::default()).unwrap();
+
+        // Nothing to inline at step 0.
+        assert!(inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 0)
+            .is_none());
+
+        // Inlining variable 1 yields two productions.
+        assert_eq!(
+            inline_map
+                .inlined_productions(&grammar.variables[0].productions[0], 1)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                        ProductionStep::new(Symbol::terminal(13)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: -2,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(14)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                    ],
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_nested_inlining() {
+        let grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
+                            ProductionStep::new(Symbol::terminal(12)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
+                                ProductionStep::new(Symbol::terminal(14)),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-3".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
+                    }],
+                },
+            ],
+            variables_to_inline: vec![
+                Symbol::non_terminal(1),
+                Symbol::non_terminal(2),
+                Symbol::non_terminal(3),
+            ],
+            ..Default::default()
+        };
+
+        let inline_map = process_inlines(&grammar, &Default::default()).unwrap();
+
+        let productions: Vec<&Production> = inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 1)
+            .unwrap()
+            .collect();
+
+        assert_eq!(
+            productions.iter().cloned().cloned().collect::<Vec<_>>(),
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(13)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(16)),
+                        ProductionStep::new(Symbol::terminal(14)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                    ],
+                },
+            ]
+        );
+
+        assert_eq!(
+            inline_map
+                .inlined_productions(productions[0], 3)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::terminal(10)),
+                    ProductionStep::new(Symbol::terminal(13)),
+                    ProductionStep::new(Symbol::terminal(11)),
+                    ProductionStep::new(Symbol::terminal(15)),
+                    ProductionStep::new(Symbol::terminal(12)),
+                ],
+            },]
+        );
+    }
+
+    #[test]
+    fn test_inlining_with_precedence_and_alias() {
+        let grammar = SyntaxGrammar {
+            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // inlined
+                            ProductionStep::new(Symbol::non_terminal(1))
+                                .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
+                            ProductionStep::new(Symbol::terminal(10)),
+                            // inlined
+                            ProductionStep::new(Symbol::non_terminal(2))
+                                .with_alias("outer_alias", true),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(11))
+                                .with_prec(Precedence::Integer(2), None)
+                                .with_alias("inner_alias", true),
+                            ProductionStep::new(Symbol::terminal(12)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                    }],
+                },
+            ],
+            ..Default::default()
+        };
+
+        let inline_map = process_inlines(&grammar, &Default::default()).unwrap();
+
+        let productions: Vec<_> = inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 0)
+            .unwrap()
+            .collect();
+
+        assert_eq!(
+            productions.iter().cloned().cloned().collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    // The first step in the inlined production retains its precedence
+                    // and alias.
+                    ProductionStep::new(Symbol::terminal(11))
+                        .with_prec(Precedence::Integer(2), None)
+                        .with_alias("inner_alias", true),
+                    // The final step of the inlined production inherits the precedence of
+                    // the inlined step.
+                    ProductionStep::new(Symbol::terminal(12))
+                        .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::terminal(10)),
+                    ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
+                ]
+            }],
+        );
+
+        assert_eq!(
+            inline_map
+                .inlined_productions(productions[0], 3)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::terminal(11))
+                        .with_prec(Precedence::Integer(2), None)
+                        .with_alias("inner_alias", true),
+                    ProductionStep::new(Symbol::terminal(12))
+                        .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::terminal(10)),
+                    // All steps of the inlined production inherit their alias from the
+                    // inlined step.
+                    ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
+                ]
+            }],
+        );
+    }
+
+    #[test]
+    fn test_error_when_inlining_tokens() {
+        let lexical_grammar = LexicalGrammar {
+            variables: vec![LexicalVariable {
+                name: "something".to_string(),
+                kind: VariableType::Named,
+                implicit_precedence: 0,
+                start_state: 0,
+            }],
+            ..Default::default()
+        };
+
+        let grammar = SyntaxGrammar {
+            variables_to_inline: vec![Symbol::terminal(0)],
+            variables: vec![SyntaxVariable {
+                name: "non-terminal-0".to_string(),
+                kind: VariableType::Named,
+                productions: vec![Production {
+                    dynamic_precedence: 0,
+                    steps: vec![ProductionStep::new(Symbol::terminal(0))],
+                }],
+            }],
+            ..Default::default()
+        };
+
+        if let Err(error) = process_inlines(&grammar, &lexical_grammar) {
+            assert_eq!(error.to_string(), "Token `something` cannot be inlined");
+        } else {
+            panic!("expected an error, but got none");
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-categories.json
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-categories.json
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-category-aliases.json
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-category-aliases.json
@ -0,0 +1 @@
+{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-properties.json
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-properties.json
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-property-aliases.json
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/unicode-property-aliases.json
@ -0,0 +1 @@
+{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/render.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/render.rs
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/rules.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/rules.rs
@ -0,0 +1,480 @@
+use super::grammars::VariableType;
+use smallbitvec::SmallBitVec;
+use std::iter::FromIterator;
+use std::{collections::HashMap, fmt};
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) enum SymbolType {
+    External,
+    End,
+    EndOfNonTerminalExtra,
+    Terminal,
+    NonTerminal,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) enum Associativity {
+    Left,
+    Right,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct Alias {
+    pub value: String,
+    pub is_named: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum Precedence {
+    None,
+    Integer(i32),
+    Name(String),
+}
+
+pub(crate) type AliasMap = HashMap<Symbol, Alias>;
+
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub(crate) struct MetadataParams {
+    pub precedence: Precedence,
+    pub dynamic_precedence: i32,
+    pub associativity: Option<Associativity>,
+    pub is_token: bool,
+    pub is_string: bool,
+    pub is_active: bool,
+    pub is_main_token: bool,
+    pub alias: Option<Alias>,
+    pub field_name: Option<String>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct Symbol {
+    pub kind: SymbolType,
+    pub index: usize,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub(crate) enum Rule {
+    Blank,
+    String(String),
+    Pattern(String),
+    NamedSymbol(String),
+    Symbol(Symbol),
+    Choice(Vec<Rule>),
+    Metadata {
+        params: MetadataParams,
+        rule: Box<Rule>,
+    },
+    Repeat(Box<Rule>),
+    Seq(Vec<Rule>),
+}
+
+// Because tokens are represented as small (~400 max) unsigned integers,
+// sets of tokens can be efficiently represented as bit vectors with each
+// index corresponding to a token, and each value representing whether or not
+// the token is present in the set.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct TokenSet {
+    terminal_bits: SmallBitVec,
+    external_bits: SmallBitVec,
+    eof: bool,
+    end_of_nonterminal_extra: bool,
+}
+
+impl Rule {
+    pub fn field(name: String, content: Rule) -> Self {
+        add_metadata(content, move |params| {
+            params.field_name = Some(name);
+        })
+    }
+
+    pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
+        add_metadata(content, move |params| {
+            params.alias = Some(Alias { is_named, value });
+        })
+    }
+
+    pub fn token(content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.is_token = true;
+        })
+    }
+
+    pub fn immediate_token(content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.is_token = true;
+            params.is_main_token = true;
+        })
+    }
+
+    pub fn prec(value: Precedence, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.precedence = value;
+        })
+    }
+
+    pub fn prec_left(value: Precedence, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.associativity = Some(Associativity::Left);
+            params.precedence = value;
+        })
+    }
+
+    pub fn prec_right(value: Precedence, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.associativity = Some(Associativity::Right);
+            params.precedence = value;
+        })
+    }
+
+    pub fn prec_dynamic(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.dynamic_precedence = value;
+        })
+    }
+
+    pub fn repeat(rule: Rule) -> Self {
+        Rule::Repeat(Box::new(rule))
+    }
+
+    pub fn choice(rules: Vec<Rule>) -> Self {
+        let mut elements = Vec::with_capacity(rules.len());
+        for rule in rules {
+            choice_helper(&mut elements, rule);
+        }
+        Rule::Choice(elements)
+    }
+
+    pub fn seq(rules: Vec<Rule>) -> Self {
+        Rule::Seq(rules)
+    }
+}
+
+impl Alias {
+    pub fn kind(&self) -> VariableType {
+        if self.is_named {
+            VariableType::Named
+        } else {
+            VariableType::Anonymous
+        }
+    }
+}
+
+impl Precedence {
+    pub fn is_none(&self) -> bool {
+        matches!(self, Precedence::None)
+    }
+}
+
+#[cfg(test)]
+impl Rule {
+    pub fn terminal(index: usize) -> Self {
+        Rule::Symbol(Symbol::terminal(index))
+    }
+
+    pub fn non_terminal(index: usize) -> Self {
+        Rule::Symbol(Symbol::non_terminal(index))
+    }
+
+    pub fn external(index: usize) -> Self {
+        Rule::Symbol(Symbol::external(index))
+    }
+
+    pub fn named(name: &'static str) -> Self {
+        Rule::NamedSymbol(name.to_string())
+    }
+
+    pub fn string(value: &'static str) -> Self {
+        Rule::String(value.to_string())
+    }
+
+    pub fn pattern(value: &'static str) -> Self {
+        Rule::Pattern(value.to_string())
+    }
+}
+
+impl Symbol {
+    pub fn is_terminal(&self) -> bool {
+        self.kind == SymbolType::Terminal
+    }
+
+    pub fn is_non_terminal(&self) -> bool {
+        self.kind == SymbolType::NonTerminal
+    }
+
+    pub fn is_external(&self) -> bool {
+        self.kind == SymbolType::External
+    }
+
+    pub fn is_eof(&self) -> bool {
+        self.kind == SymbolType::End
+    }
+
+    pub fn non_terminal(index: usize) -> Self {
+        Symbol {
+            kind: SymbolType::NonTerminal,
+            index,
+        }
+    }
+
+    pub fn terminal(index: usize) -> Self {
+        Symbol {
+            kind: SymbolType::Terminal,
+            index,
+        }
+    }
+
+    pub fn external(index: usize) -> Self {
+        Symbol {
+            kind: SymbolType::External,
+            index,
+        }
+    }
+
+    pub fn end() -> Self {
+        Symbol {
+            kind: SymbolType::End,
+            index: 0,
+        }
+    }
+
+    pub fn end_of_nonterminal_extra() -> Self {
+        Symbol {
+            kind: SymbolType::EndOfNonTerminalExtra,
+            index: 0,
+        }
+    }
+}
+
+impl From<Symbol> for Rule {
+    fn from(symbol: Symbol) -> Self {
+        Rule::Symbol(symbol)
+    }
+}
+
+impl TokenSet {
+    pub fn new() -> Self {
+        Self {
+            terminal_bits: SmallBitVec::new(),
+            external_bits: SmallBitVec::new(),
+            eof: false,
+            end_of_nonterminal_extra: false,
+        }
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
+        self.terminal_bits
+            .iter()
+            .enumerate()
+            .filter_map(|(i, value)| {
+                if value {
+                    Some(Symbol::terminal(i))
+                } else {
+                    None
+                }
+            })
+            .chain(
+                self.external_bits
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, value)| {
+                        if value {
+                            Some(Symbol::external(i))
+                        } else {
+                            None
+                        }
+                    }),
+            )
+            .chain(if self.eof { Some(Symbol::end()) } else { None })
+            .chain(if self.end_of_nonterminal_extra {
+                Some(Symbol::end_of_nonterminal_extra())
+            } else {
+                None
+            })
+    }
+
+    pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
+        self.terminal_bits
+            .iter()
+            .enumerate()
+            .filter_map(|(i, value)| {
+                if value {
+                    Some(Symbol::terminal(i))
+                } else {
+                    None
+                }
+            })
+    }
+
+    pub fn contains(&self, symbol: &Symbol) -> bool {
+        match symbol.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
+            SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
+            SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
+            SymbolType::End => self.eof,
+            SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra,
+        }
+    }
+
+    pub fn contains_terminal(&self, index: usize) -> bool {
+        self.terminal_bits.get(index).unwrap_or(false)
+    }
+
+    pub fn insert(&mut self, other: Symbol) {
+        let vec = match other.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
+            SymbolType::Terminal => &mut self.terminal_bits,
+            SymbolType::External => &mut self.external_bits,
+            SymbolType::End => {
+                self.eof = true;
+                return;
+            }
+            SymbolType::EndOfNonTerminalExtra => {
+                self.end_of_nonterminal_extra = true;
+                return;
+            }
+        };
+        if other.index >= vec.len() {
+            vec.resize(other.index + 1, false);
+        }
+        vec.set(other.index, true);
+    }
+
+    pub fn remove(&mut self, other: &Symbol) -> bool {
+        let vec = match other.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
+            SymbolType::Terminal => &mut self.terminal_bits,
+            SymbolType::External => &mut self.external_bits,
+            SymbolType::End => {
+                return if self.eof {
+                    self.eof = false;
+                    true
+                } else {
+                    false
+                }
+            }
+            SymbolType::EndOfNonTerminalExtra => {
+                return if self.end_of_nonterminal_extra {
+                    self.end_of_nonterminal_extra = false;
+                    true
+                } else {
+                    false
+                };
+            }
+        };
+        if other.index < vec.len() {
+            if vec[other.index] {
+                vec.set(other.index, false);
+                return true;
+            }
+        }
+        false
+    }
+
+    pub fn is_empty(&self) -> bool {
+        !self.eof
+            && !self.end_of_nonterminal_extra
+            && !self.terminal_bits.iter().any(|a| a)
+            && !self.external_bits.iter().any(|a| a)
+    }
+
+    pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
+        let mut result = false;
+        if other.terminal_bits.len() > self.terminal_bits.len() {
+            self.terminal_bits.resize(other.terminal_bits.len(), false);
+        }
+        for (i, element) in other.terminal_bits.iter().enumerate() {
+            if element {
+                result |= !self.terminal_bits[i];
+                self.terminal_bits.set(i, element);
+            }
+        }
+        result
+    }
+
+    fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
+        let mut result = false;
+        if other.external_bits.len() > self.external_bits.len() {
+            self.external_bits.resize(other.external_bits.len(), false);
+        }
+        for (i, element) in other.external_bits.iter().enumerate() {
+            if element {
+                result |= !self.external_bits[i];
+                self.external_bits.set(i, element);
+            }
+        }
+        result
+    }
+
+    pub fn insert_all(&mut self, other: &TokenSet) -> bool {
+        let mut result = false;
+        if other.eof {
+            result |= !self.eof;
+            self.eof = true;
+        }
+        if other.end_of_nonterminal_extra {
+            result |= !self.end_of_nonterminal_extra;
+            self.end_of_nonterminal_extra = true;
+        }
+        result |= self.insert_all_terminals(other);
+        result |= self.insert_all_externals(other);
+        result
+    }
+}
+
+impl FromIterator<Symbol> for TokenSet {
+    fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
+        let mut result = Self::new();
+        for symbol in iter {
+            result.insert(symbol);
+        }
+        result
+    }
+}
+
+fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
+    match input {
+        Rule::Metadata { rule, mut params } if !params.is_token => {
+            f(&mut params);
+            Rule::Metadata { rule, params }
+        }
+        _ => {
+            let mut params = MetadataParams::default();
+            f(&mut params);
+            Rule::Metadata {
+                rule: Box::new(input),
+                params,
+            }
+        }
+    }
+}
+
+fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
+    match rule {
+        Rule::Choice(elements) => {
+            for element in elements {
+                choice_helper(result, element);
+            }
+        }
+        _ => {
+            if !result.contains(&rule) {
+                result.push(rule);
+            }
+        }
+    }
+}
+
+impl fmt::Display for Precedence {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Precedence::Integer(i) => write!(f, "{}", i),
+            Precedence::Name(s) => write!(f, "'{}'", s),
+            Precedence::None => write!(f, "none"),
+        }
+    }
+}
+
+impl Default for Precedence {
+    fn default() -> Self {
+        Precedence::None
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/tables.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/tables.rs
@ -0,0 +1,168 @@
+use super::nfa::CharacterSet;
+use super::rules::{Alias, Symbol, TokenSet};
+use std::collections::BTreeMap;
+pub(crate) type ProductionInfoId = usize;
+pub(crate) type ParseStateId = usize;
+pub(crate) type LexStateId = usize;
+
+use std::hash::BuildHasherDefault;
+
+use indexmap::IndexMap;
+use rustc_hash::FxHasher;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub(crate) enum ParseAction {
+    Accept,
+    Shift {
+        state: ParseStateId,
+        is_repetition: bool,
+    },
+    ShiftExtra,
+    Recover,
+    Reduce {
+        symbol: Symbol,
+        child_count: usize,
+        dynamic_precedence: i32,
+        production_id: ProductionInfoId,
+    },
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum GotoAction {
+    Goto(ParseStateId),
+    ShiftExtra,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub(crate) struct ParseTableEntry {
+    pub actions: Vec<ParseAction>,
+    pub reusable: bool,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub(crate) struct ParseState {
+    pub id: ParseStateId,
+    pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
+    pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
+    pub lex_state_id: usize,
+    pub external_lex_state_id: usize,
+    pub core_id: usize,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub(crate) struct FieldLocation {
+    pub index: usize,
+    pub inherited: bool,
+}
+
+#[derive(Debug, Default, PartialEq, Eq)]
+pub(crate) struct ProductionInfo {
+    pub alias_sequence: Vec<Option<Alias>>,
+    pub field_map: BTreeMap<String, Vec<FieldLocation>>,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct ParseTable {
+    pub states: Vec<ParseState>,
+    pub symbols: Vec<Symbol>,
+    pub production_infos: Vec<ProductionInfo>,
+    pub max_aliased_production_length: usize,
+    pub external_lex_states: Vec<TokenSet>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct AdvanceAction {
+    pub state: LexStateId,
+    pub in_main_token: bool,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct LexState {
+    pub accept_action: Option<Symbol>,
+    pub eof_action: Option<AdvanceAction>,
+    pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexTable {
+    pub states: Vec<LexState>,
+}
+
+impl ParseTableEntry {
+    pub fn new() -> Self {
+        Self {
+            reusable: true,
+            actions: Vec::new(),
+        }
+    }
+}
+
+impl Default for LexTable {
+    fn default() -> Self {
+        LexTable { states: Vec::new() }
+    }
+}
+
+impl ParseState {
+    pub fn is_end_of_non_terminal_extra(&self) -> bool {
+        self.terminal_entries
+            .contains_key(&Symbol::end_of_nonterminal_extra())
+    }
+
+    pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
+        self.terminal_entries
+            .iter()
+            .flat_map(|(_, entry)| {
+                entry.actions.iter().filter_map(|action| match action {
+                    ParseAction::Shift { state, .. } => Some(*state),
+                    _ => None,
+                })
+            })
+            .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
+                if let GotoAction::Goto(state) = action {
+                    Some(*state)
+                } else {
+                    None
+                }
+            }))
+    }
+
+    pub fn update_referenced_states<F>(&mut self, mut f: F)
+    where
+        F: FnMut(usize, &ParseState) -> usize,
+    {
+        let mut updates = Vec::new();
+        for (symbol, entry) in &self.terminal_entries {
+            for (i, action) in entry.actions.iter().enumerate() {
+                if let ParseAction::Shift { state, .. } = action {
+                    let result = f(*state, self);
+                    if result != *state {
+                        updates.push((*symbol, i, result));
+                    }
+                }
+            }
+        }
+        for (symbol, action) in &self.nonterminal_entries {
+            if let GotoAction::Goto(other_state) = action {
+                let result = f(*other_state, self);
+                if result != *other_state {
+                    updates.push((*symbol, 0, result));
+                }
+            }
+        }
+        for (symbol, action_index, new_state) in updates {
+            if symbol.is_non_terminal() {
+                self.nonterminal_entries
+                    .insert(symbol, GotoAction::Goto(new_state));
+            } else {
+                let entry = self.terminal_entries.get_mut(&symbol).unwrap();
+                if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
+                    entry.actions[action_index] = ParseAction::Shift {
+                        state: new_state,
+                        is_repetition,
+                    };
+                }
+            }
+        }
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.cc
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.cc
@ -0,0 +1,28 @@
+#include "tree_sitter/parser.h"
+#include <node.h>
+#include "nan.h"
+
+using namespace v8;
+
+extern "C" TSLanguage * tree_sitter_PARSER_NAME();
+
+namespace {
+
+NAN_METHOD(New) {}
+
+void Init(Local<Object> exports, Local<Object> module) {
+  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
+  tpl->SetClassName(Nan::New("Language").ToLocalChecked());
+  tpl->InstanceTemplate()->SetInternalFieldCount(1);
+
+  Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
+  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
+  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_PARSER_NAME());
+
+  Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("PARSER_NAME").ToLocalChecked());
+  Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
+}
+
+NODE_MODULE(tree_sitter_PARSER_NAME_binding, Init)
+
+}  // namespace
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.gyp
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.gyp
@ -0,0 +1,19 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_PARSER_NAME_binding",
+      "include_dirs": [
+        "<!(node -e \"require('nan')\")",
+        "src"
+      ],
+      "sources": [
+        "bindings/node/binding.cc",
+        "src/parser.c",
+        # If your language uses an external scanner, add it here.
+      ],
+      "cflags_c": [
+        "-std=c99",
+      ]
+    }
+  ]
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/build.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/build.rs
@ -0,0 +1,40 @@
+fn main() {
+    let src_dir = std::path::Path::new("src");
+
+    let mut c_config = cc::Build::new();
+    c_config.include(&src_dir);
+    c_config
+        .flag_if_supported("-Wno-unused-parameter")
+        .flag_if_supported("-Wno-unused-but-set-variable")
+        .flag_if_supported("-Wno-trigraphs");
+    let parser_path = src_dir.join("parser.c");
+    c_config.file(&parser_path);
+
+    // If your language uses an external scanner written in C,
+    // then include this block of code:
+
+    /*
+    let scanner_path = src_dir.join("scanner.c");
+    c_config.file(&scanner_path);
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+
+    c_config.compile("parser");
+    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
+
+    // If your language uses an external scanner written in C++,
+    // then include this block of code:
+
+    /*
+    let mut cpp_config = cc::Build::new();
+    cpp_config.cpp(true);
+    cpp_config.include(&src_dir);
+    cpp_config
+        .flag_if_supported("-Wno-unused-parameter")
+        .flag_if_supported("-Wno-unused-but-set-variable");
+    let scanner_path = src_dir.join("scanner.cc");
+    cpp_config.file(&scanner_path);
+    cpp_config.compile("scanner");
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/cargo.toml
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/cargo.toml
@ -0,0 +1,26 @@
+[package]
+name = "tree-sitter-PARSER_NAME"
+description = "PARSER_NAME grammar for the tree-sitter parsing library"
+version = "0.0.1"
+keywords = ["incremental", "parsing", "PARSER_NAME"]
+categories = ["parsing", "text-editors"]
+repository = "https://github.com/tree-sitter/tree-sitter-PARSER_NAME"
+edition = "2018"
+license = "MIT"
+
+build = "bindings/rust/build.rs"
+include = [
+  "bindings/rust/*",
+  "grammar.js",
+  "queries/*",
+  "src/*",
+]
+
+[lib]
+path = "bindings/rust/lib.rs"
+
+[dependencies]
+tree-sitter = "~RUST_BINDING_VERSION"
+
+[build-dependencies]
+cc = "1.0"
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.js
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.js
@ -0,0 +1,19 @@
+try {
+  module.exports = require("../../build/Release/tree_sitter_PARSER_NAME_binding");
+} catch (error1) {
+  if (error1.code !== 'MODULE_NOT_FOUND') {
+    throw error1;
+  }
+  try {
+    module.exports = require("../../build/Debug/tree_sitter_PARSER_NAME_binding");
+  } catch (error2) {
+    if (error2.code !== 'MODULE_NOT_FOUND') {
+      throw error2;
+    }
+    throw error1
+  }
+}
+
+try {
+  module.exports.nodeTypeInfo = require("../../src/node-types.json");
+} catch (_) {}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/lib.rs
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/lib.rs
@ -0,0 +1,52 @@
+//! This crate provides PARSER_NAME language support for the [tree-sitter][] parsing library.
+//!
+//! Typically, you will use the [language][language func] function to add this language to a
+//! tree-sitter [Parser][], and then use the parser to parse some code:
+//!
+//! ```
+//! let code = "";
+//! let mut parser = tree_sitter::Parser::new();
+//! parser.set_language(tree_sitter_PARSER_NAME::language()).expect("Error loading PARSER_NAME grammar");
+//! let tree = parser.parse(code, None).unwrap();
+//! ```
+//!
+//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+//! [language func]: fn.language.html
+//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
+//! [tree-sitter]: https://tree-sitter.github.io/
+
+use tree_sitter::Language;
+
+extern "C" {
+    fn tree_sitter_PARSER_NAME() -> Language;
+}
+
+/// Get the tree-sitter [Language][] for this grammar.
+///
+/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+pub fn language() -> Language {
+    unsafe { tree_sitter_PARSER_NAME() }
+}
+
+/// The content of the [`node-types.json`][] file for this grammar.
+///
+/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
+
+// Uncomment these to include any queries that this grammar contains
+
+// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
+// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
+// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
+// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_can_load_grammar() {
+        let mut parser = tree_sitter::Parser::new();
+        parser
+            .set_language(super::language())
+            .expect("Error loading PARSER_NAME language");
+    }
+}
--- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/package.json
+++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/package.json
@ -0,0 +1,19 @@
+{
+  "name": "tree-sitter-PARSER_NAME",
+  "version": "0.0.1",
+  "description": "PARSER_NAME grammar for tree-sitter",
+  "main": "bindings/node",
+  "keywords": [
+    "parsing",
+    "incremental"
+  ],
+  "dependencies": {
+    "nan": "^2.12.1"
+  },
+  "devDependencies": {
+    "tree-sitter-cli": "^CLI_VERSION"
+  },
+  "scripts": {
+    "test": "tree-sitter test"
+  }
+}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
				`@ -0,0 +1 @@`
				{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}