Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
:dep polars = { version = "0.30", features = ["lazy", "round_series"] }fn my_filter_func(col1: &Series, col2: &Series, col2: &Series) -> ReturnType {
let it = (0..n).map(|i| {
let col1 = match col.get(i) {
AnyValue::UInt64(val) => val,
_ => panic!("Wrong type of col1!"),
};
// similar for col2 and col3
// apply user-defined function to col1, col2 and col3
}
// convert it to a collection of the required type
}fn my_black_box_function(a: f32, b: f32) -> f32 {
// do something
a
}
fn apply_multiples(col_a: &Series, col_b: &Series) -> Float32Chunked {
match (col_a.dtype(), col_b.dtype()) {
(DataType::Float32, DataType::Float32) => {
let a = col_a.f32().unwrap();
let b = col_b.f32().unwrap();
a.into_iter()
.zip(b.into_iter())
.map(|(opt_a, opt_b)| match (opt_a, opt_b) {
(Some(a), Some(b)) => Some(my_black_box_function(a, b)),
_ => None,
})
.collect()
}
_ => panic!("unpexptected dtypes"),
}
}fn apply_multiples(lf: LazyFrame) -> Result<DataFrame> {
df![
"a" => [1.0, 2.0, 3.0],
"b" => [3.0, 5.1, 0.3]
]?
.lazy()
.select([concat_lst(["col_a", "col_b"]).map(
|s| {
let ca = s.struct_()?;
let b = ca.field_by_name("col_a")?;
let a = ca.field_by_name("col_b")?;
let a = a.f32()?;
let b = b.f32()?;
let out: Float32Chunked = a
.into_iter()
.zip(b.into_iter())
.map(|(opt_a, opt_b)| match (opt_a, opt_b) {
(Some(a), Some(b)) => Some(my_black_box_function(a, b)),
_ => None,
})
.collect();
Ok(out.into_series())
},
GetOutput::from_type(DataType::Float32),
)])
.collect()
}use polars::{
prelude::*,
datatypes::DataType,
};let df = df!(
"column integers" => &[1, 2, 3, 4, 5, 6],
"column float64 A" => [23.654, 0.319, 10.0049, 89.01999, -3.41501, 52.0766],
"column options" => [Some(28), Some(300), None, Some(2), Some(-30), None],
"column float64 B" => [23.6499, 0.399, 10.0061, 89.0105, -3.4331, 52.099999],
).unwrap();
dfshape: (6, 4)
┌─────────────────┬──────────────────┬────────────────┬──────────────────┐
│ column integers ┆ column float64 A ┆ column options ┆ column float64 B │
│ --- ┆ --- ┆ --- ┆ --- │
│ i32 ┆ f64 ┆ i32 ┆ f64 │
╞═════════════════╪══════════════════╪════════════════╪══════════════════╡
│ 1 ┆ 23.654 ┆ 28 ┆ 23.6499 │
│ 2 ┆ 0.319 ┆ 300 ┆ 0.399 │
│ 3 ┆ 10.0049 ┆ null ┆ 10.0061 │
│ 4 ┆ 89.01999 ┆ 2 ┆ 89.0105 │
│ 5 ┆ -3.41501 ┆ -30 ┆ -3.4331 │
│ 6 ┆ 52.0766 ┆ null ┆ 52.099999 │
└─────────────────┴──────────────────┴────────────────┴──────────────────┘let columns_with_float64 = vec![
"column float64 A",
"column float64 B",
];
columns_with_float64["column float64 A", "column float64 B"]lazyframe
.with_columns([
cols(col1, col2, ..., colN)
.apply(|series|
some_function(series),
GetOutput::from_type(DataType::Float64)
)
]);References¶
How to apply a function to multiple columns of a polars DataFrame in Rust