Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!

Tips and Traps

  1. LazyFrame.filter filters rows using an Expr while DataFrame.filter filters rows using a mask of the type ChunkedArray<BooleanType>.

:timing
:sccache 1
:dep polars = { version = "0.21.1", features = ["lazy", "parquet"] }
use polars::prelude::*;
use polars::df;
// use macro
let frame = df![
    "names" => ["a", "b", "c"],
    "values" => [1, 2, 3],
    "values_nulls" => [Some(1), None, Some(3)]
].unwrap();
df
shape: (3, 3) ┌───────┬────────┬──────────────┐ │ names ┆ values ┆ values_nulls │ │ --- ┆ --- ┆ --- │ │ str ┆ i32 ┆ i32 │ ╞═══════╪════════╪══════════════╡ │ a ┆ 1 ┆ 1 │ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ b ┆ 2 ┆ null │ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ c ┆ 3 ┆ 3 │ └───────┴────────┴──────────────┘
frame["values_nulls"].is_not_null()
shape: (3,) ChunkedArray: 'values_nulls' [bool] [ true false true ]
frame.filter(&frame["values_nulls"].is_not_null())
Ok(shape: (2, 3) ┌───────┬────────┬──────────────┐ │ names ┆ values ┆ values_nulls │ │ --- ┆ --- ┆ --- │ │ str ┆ i32 ┆ i32 │ ╞═══════╪════════╪══════════════╡ │ a ┆ 1 ┆ 1 │ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ c ┆ 3 ┆ 3 │ └───────┴────────┴──────────────┘)
fn predicator(s1: &Series, s2: &Series) -> ChunkedArray<BooleanType> {
    ChunkedArray::new("x", &[
        Some(true),
        Some(false),
        None,
    ])
} 
df.filter(&predicator(&df["names"], &df["values"]))
Ok(shape: (1, 3) ┌───────┬────────┬──────────────┐ │ names ┆ values ┆ values_nulls │ │ --- ┆ --- ┆ --- │ │ str ┆ i32 ┆ i32 │ ╞═══════╪════════╪══════════════╡ │ a ┆ 1 ┆ 1 │ └───────┴────────┴──────────────┘)
fn predicator2(s1: &Series, s2: &Series) -> ChunkedArray<BooleanType> {
    let arr = [10, 5, 3];
    BooleanChunked::from_iter_values("x", arr.iter().map(|&x| x > 6))
} 
df.filter(&predicator2(&df["names"], &df["values"]))
Ok(shape: (1, 3) ┌───────┬────────┬──────────────┐ │ names ┆ values ┆ values_nulls │ │ --- ┆ --- ┆ --- │ │ str ┆ i32 ┆ i32 │ ╞═══════╪════════╪══════════════╡ │ a ┆ 1 ┆ 1 │ └───────┴────────┴──────────────┘)