more tests

This commit is contained in:
dylwil3 2025-12-12 09:45:53 -06:00
parent d9d941691f
commit a991e003a1
2 changed files with 217 additions and 0 deletions

View File

@ -216,3 +216,54 @@ max_message_id = (
.baz() .baz()
) )
# Note in preview we split at `pl` which some
# folks may dislike. (Similarly with common
# `np` and `pd` invocations).
expr = (
pl.scan_parquet("/data/pypi-parquet/*.parquet")
.filter(
[
pl.col("path").str.contains(
r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"
),
~pl.col("path").str.contains(r"(^|/)test(|s|ing)"),
~pl.col("path").str.contains("/site-packages/", literal=True),
]
)
.with_columns(
month=pl.col("uploaded_on").dt.truncate("1mo"),
ext=pl.col("path")
.str.extract(pattern=r"\.([a-z0-9]+)$", group_index=1)
.str.replace_all(pattern=r"cxx|cpp|cc|c|hpp|h", value="C/C++")
.str.replace_all(pattern="^f.*$", value="Fortran")
.str.replace("rs", "Rust", literal=True)
.str.replace("go", "Go", literal=True)
.str.replace("asm", "Assembly", literal=True)
.replace({"": None}),
)
.group_by(["month", "ext"])
.agg(project_count=pl.col("project_name").n_unique())
.drop_nulls(["ext"])
.sort(["month", "project_count"], descending=True)
)
def indentation_matching_for_loop_in_preview():
if make_this:
if more_nested_because_line_length:
identical_hidden_layer_sizes = all(
current_hidden_layer_sizes == first_hidden_layer_sizes
for current_hidden_layer_sizes in self.component_config[
HIDDEN_LAYERS_SIZES
].values().attr
)
def indentation_matching_walrus_in_preview():
if make_this:
if more_nested_because_line_length:
with self.read_ctx(book_type) as cursor:
if (entry_count := len(names := cursor.execute(
'SELECT name FROM address_book WHERE address=?',
(address,),
).fetchall().some_attr)) == 0 or len(set(names)) > 1:
return

View File

@ -222,6 +222,57 @@ max_message_id = (
.baz() .baz()
) )
# Note in preview we split at `pl` which some
# folks may dislike. (Similarly with common
# `np` and `pd` invocations).
expr = (
pl.scan_parquet("/data/pypi-parquet/*.parquet")
.filter(
[
pl.col("path").str.contains(
r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"
),
~pl.col("path").str.contains(r"(^|/)test(|s|ing)"),
~pl.col("path").str.contains("/site-packages/", literal=True),
]
)
.with_columns(
month=pl.col("uploaded_on").dt.truncate("1mo"),
ext=pl.col("path")
.str.extract(pattern=r"\.([a-z0-9]+)$", group_index=1)
.str.replace_all(pattern=r"cxx|cpp|cc|c|hpp|h", value="C/C++")
.str.replace_all(pattern="^f.*$", value="Fortran")
.str.replace("rs", "Rust", literal=True)
.str.replace("go", "Go", literal=True)
.str.replace("asm", "Assembly", literal=True)
.replace({"": None}),
)
.group_by(["month", "ext"])
.agg(project_count=pl.col("project_name").n_unique())
.drop_nulls(["ext"])
.sort(["month", "project_count"], descending=True)
)
def indentation_matching_for_loop_in_preview():
if make_this:
if more_nested_because_line_length:
identical_hidden_layer_sizes = all(
current_hidden_layer_sizes == first_hidden_layer_sizes
for current_hidden_layer_sizes in self.component_config[
HIDDEN_LAYERS_SIZES
].values().attr
)
def indentation_matching_walrus_in_preview():
if make_this:
if more_nested_because_line_length:
with self.read_ctx(book_type) as cursor:
if (entry_count := len(names := cursor.execute(
'SELECT name FROM address_book WHERE address=?',
(address,),
).fetchall().some_attr)) == 0 or len(set(names)) > 1:
return
``` ```
## Output ## Output
@ -465,6 +516,68 @@ max_message_id = (
.sum() .sum()
.baz() .baz()
) )
# Note in preview we split at `pl` which some
# folks may dislike. (Similarly with common
# `np` and `pd` invocations).
expr = (
pl.scan_parquet("/data/pypi-parquet/*.parquet")
.filter(
[
pl.col("path").str.contains(
r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"
),
~pl.col("path").str.contains(r"(^|/)test(|s|ing)"),
~pl.col("path").str.contains("/site-packages/", literal=True),
]
)
.with_columns(
month=pl.col("uploaded_on").dt.truncate("1mo"),
ext=pl.col("path")
.str.extract(pattern=r"\.([a-z0-9]+)$", group_index=1)
.str.replace_all(pattern=r"cxx|cpp|cc|c|hpp|h", value="C/C++")
.str.replace_all(pattern="^f.*$", value="Fortran")
.str.replace("rs", "Rust", literal=True)
.str.replace("go", "Go", literal=True)
.str.replace("asm", "Assembly", literal=True)
.replace({"": None}),
)
.group_by(["month", "ext"])
.agg(project_count=pl.col("project_name").n_unique())
.drop_nulls(["ext"])
.sort(["month", "project_count"], descending=True)
)
def indentation_matching_for_loop_in_preview():
if make_this:
if more_nested_because_line_length:
identical_hidden_layer_sizes = all(
current_hidden_layer_sizes == first_hidden_layer_sizes
for current_hidden_layer_sizes in self.component_config[
HIDDEN_LAYERS_SIZES
]
.values()
.attr
)
def indentation_matching_walrus_in_preview():
if make_this:
if more_nested_because_line_length:
with self.read_ctx(book_type) as cursor:
if (
entry_count := len(
names := cursor.execute(
"SELECT name FROM address_book WHERE address=?",
(address,),
)
.fetchall()
.some_attr
)
) == 0 or len(set(names)) > 1:
return
``` ```
@ -562,4 +675,57 @@ max_message_id = (
.groupby( .groupby(
1, 1,
) )
@@ -243,19 +252,19 @@
# `np` and `pd` invocations).
expr = (
- pl.scan_parquet("/data/pypi-parquet/*.parquet")
- .filter(
- [
- pl.col("path").str.contains(
- r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"
- ),
- ~pl.col("path").str.contains(r"(^|/)test(|s|ing)"),
- ~pl.col("path").str.contains("/site-packages/", literal=True),
- ]
- )
+ pl
+ .scan_parquet("/data/pypi-parquet/*.parquet")
+ .filter([
+ pl.col("path").str.contains(
+ r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"
+ ),
+ ~pl.col("path").str.contains(r"(^|/)test(|s|ing)"),
+ ~pl.col("path").str.contains("/site-packages/", literal=True),
+ ])
.with_columns(
month=pl.col("uploaded_on").dt.truncate("1mo"),
- ext=pl.col("path")
+ ext=pl
+ .col("path")
.str.extract(pattern=r"\.([a-z0-9]+)$", group_index=1)
.str.replace_all(pattern=r"cxx|cpp|cc|c|hpp|h", value="C/C++")
.str.replace_all(pattern="^f.*$", value="Fortran")
@@ -276,9 +285,8 @@
if more_nested_because_line_length:
identical_hidden_layer_sizes = all(
current_hidden_layer_sizes == first_hidden_layer_sizes
- for current_hidden_layer_sizes in self.component_config[
- HIDDEN_LAYERS_SIZES
- ]
+ for current_hidden_layer_sizes in self
+ .component_config[HIDDEN_LAYERS_SIZES]
.values()
.attr
)
@@ -290,7 +298,8 @@
with self.read_ctx(book_type) as cursor:
if (
entry_count := len(
- names := cursor.execute(
+ names := cursor
+ .execute(
"SELECT name FROM address_book WHERE address=?",
(address,),
)
``` ```