Spaces:
Running
Running
Pin requirements for reproducibility
Browse files- polars/09_strings.py +36 -9
polars/09_strings.py
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import marimo
|
| 2 |
|
| 3 |
-
__generated_with = "0.11.
|
| 4 |
app = marimo.App(width="medium")
|
| 5 |
|
| 6 |
|
|
@@ -573,13 +583,30 @@ def _(expressions_df, pl):
|
|
| 573 |
|
| 574 |
@app.cell
|
| 575 |
def _(mo):
|
| 576 |
-
mo.md(r"""As a more practical example, we can use the `split` expression with some aggregation to count the number of times a particular word occurs in member names across all namespaces.""")
|
| 577 |
return
|
| 578 |
|
| 579 |
|
| 580 |
@app.cell(hide_code=True)
|
| 581 |
-
def _(
|
| 582 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 583 |
expressions_df.select(pl.col("member").str.split("_"))
|
| 584 |
.explode("member")
|
| 585 |
.group_by("member")
|
|
@@ -587,17 +614,17 @@ def _(alt, expressions_df, pl, random):
|
|
| 587 |
# Generating random x and y coordinates to distribute the words in the 2D space
|
| 588 |
.with_columns(
|
| 589 |
x=pl.col("member").map_elements(
|
| 590 |
-
lambda e: random.randint(0,
|
| 591 |
return_dtype=pl.UInt8,
|
| 592 |
),
|
| 593 |
y=pl.col("member").map_elements(
|
| 594 |
-
lambda e: random.randint(0,
|
| 595 |
return_dtype=pl.UInt8,
|
| 596 |
),
|
| 597 |
)
|
| 598 |
)
|
| 599 |
|
| 600 |
-
alt.Chart(
|
| 601 |
x=alt.X("x:O", axis=None),
|
| 602 |
y=alt.Y("y:O", axis=None),
|
| 603 |
text="member:N",
|
|
@@ -605,7 +632,7 @@ def _(alt, expressions_df, pl, random):
|
|
| 605 |
size=alt.Size("len:Q", legend=None),
|
| 606 |
tooltip=["member", "len"],
|
| 607 |
).configure_view(strokeWidth=0)
|
| 608 |
-
return
|
| 609 |
|
| 610 |
|
| 611 |
@app.cell
|
|
@@ -955,7 +982,7 @@ def _():
|
|
| 955 |
import altair as alt
|
| 956 |
import random
|
| 957 |
|
| 958 |
-
random.seed(
|
| 959 |
return alt, mo, pl, random
|
| 960 |
|
| 961 |
|
|
|
|
| 1 |
+
# /// script
|
| 2 |
+
# requires-python = ">=3.12"
|
| 3 |
+
# dependencies = [
|
| 4 |
+
# "altair==5.5.0",
|
| 5 |
+
# "marimo",
|
| 6 |
+
# "numpy==2.2.3",
|
| 7 |
+
# "polars==1.24.0",
|
| 8 |
+
# ]
|
| 9 |
+
# ///
|
| 10 |
+
|
| 11 |
import marimo
|
| 12 |
|
| 13 |
+
__generated_with = "0.11.17"
|
| 14 |
app = marimo.App(width="medium")
|
| 15 |
|
| 16 |
|
|
|
|
| 583 |
|
| 584 |
@app.cell
|
| 585 |
def _(mo):
|
| 586 |
+
mo.md(r"""As a more practical example, we can use the `split` expression with some aggregation to count the number of times a particular word occurs in member names across all namespaces. This enables us to create a word cloud of the API members' constituents!""")
|
| 587 |
return
|
| 588 |
|
| 589 |
|
| 590 |
@app.cell(hide_code=True)
|
| 591 |
+
def _(mo, wordcloud, wordcloud_height, wordcloud_width):
|
| 592 |
+
mo.vstack([
|
| 593 |
+
wordcloud_width,
|
| 594 |
+
wordcloud_height,
|
| 595 |
+
wordcloud,
|
| 596 |
+
])
|
| 597 |
+
return
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
@app.cell(hide_code=True)
|
| 601 |
+
def _(mo):
|
| 602 |
+
wordcloud_width = mo.ui.slider(0, 64, step=1, value=32, label="Word Cloud Width")
|
| 603 |
+
wordcloud_height = mo.ui.slider(0, 32, step=1, value=16, label="Word Cloud Height")
|
| 604 |
+
return wordcloud_height, wordcloud_width
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
@app.cell(hide_code=True)
|
| 608 |
+
def _(alt, expressions_df, pl, random, wordcloud_height, wordcloud_width):
|
| 609 |
+
wordcloud_df = (
|
| 610 |
expressions_df.select(pl.col("member").str.split("_"))
|
| 611 |
.explode("member")
|
| 612 |
.group_by("member")
|
|
|
|
| 614 |
# Generating random x and y coordinates to distribute the words in the 2D space
|
| 615 |
.with_columns(
|
| 616 |
x=pl.col("member").map_elements(
|
| 617 |
+
lambda e: random.randint(0, wordcloud_width.value),
|
| 618 |
return_dtype=pl.UInt8,
|
| 619 |
),
|
| 620 |
y=pl.col("member").map_elements(
|
| 621 |
+
lambda e: random.randint(0, wordcloud_height.value),
|
| 622 |
return_dtype=pl.UInt8,
|
| 623 |
),
|
| 624 |
)
|
| 625 |
)
|
| 626 |
|
| 627 |
+
wordcloud = alt.Chart(wordcloud_df).mark_text(baseline="middle").encode(
|
| 628 |
x=alt.X("x:O", axis=None),
|
| 629 |
y=alt.Y("y:O", axis=None),
|
| 630 |
text="member:N",
|
|
|
|
| 632 |
size=alt.Size("len:Q", legend=None),
|
| 633 |
tooltip=["member", "len"],
|
| 634 |
).configure_view(strokeWidth=0)
|
| 635 |
+
return wordcloud, wordcloud_df
|
| 636 |
|
| 637 |
|
| 638 |
@app.cell
|
|
|
|
| 982 |
import altair as alt
|
| 983 |
import random
|
| 984 |
|
| 985 |
+
random.seed(42)
|
| 986 |
return alt, mo, pl, random
|
| 987 |
|
| 988 |
|