Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
In [ ]:
:timing
:sccache 1
:dep polars = { version = "0.26.1", features = ["lazy", "parquet"] }
In [ ]:
use polars::df;
use polars::prelude::*;
use polars::datatypes::DataType;
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;
In [8]:
let mut frame = LazyFrame::scan_parquet(
"part-000.parquet",
ScanArgsParquet::default(),
)?
.collect()?;
frame
Out[8]:
Count the Total Number of Rows of All Parquet Files¶
In [46]:
LazyFrame::scan_parquet(
"data/test/**/*.parquet",
ScanArgsParquet::default(),
).unwrap().select(
&[count().cast(DataType::UInt64).alias("n")]
).collect().unwrap()["n"].u64().unwrap().get(0).unwrap()
Out[46]:
Out[46]:
In [37]:
let frame = LazyFrame::scan_parquet(
"data/test/**/*.parquet",
ScanArgsParquet::default(),
).unwrap();
let df = frame.select(&[count().cast(DataType::UInt64).alias("n")]).collect().unwrap();
df
Out[37]:
Out[37]: