Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
Tips and Traps¶
LazyCsvReader is more limited compared to CsvReader. CsvReader support specifying schema while LazyCsvReader does not.
An empty filed is parsed as
nullinstead of an empty string by default. And there is no way to change this behavior at this time. Please refer to this issue for more discussions. Characters other than empty are NOT parsed asnullby default. However, parsing special characters intonullis supported via the APICsvReader::with_null_values.
:timing
:sccache 1
:dep polars = { version = "0.26.1", features = ["lazy", "parquet", "dtype-full"] }Timing: true
sccache: true
Loading...
use polars::df;
use polars::prelude::*;
use polars::datatypes::DataType;
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;Loading...
CsvReader and DataFrame¶
let mut s = Schema::new();
s.with_column("column_1".into(), DataType::UInt8);
s.with_column("column_2".into(), DataType::UInt8);
s.with_column("column_3".into(), DataType::UInt8);
s.with_column("column_4".into(), DataType::UInt16);
s.with_column("column_5".into(), DataType::Utf8);
sSchema:
name: column_1, data type: UInt8
name: column_2, data type: UInt8
name: column_3, data type: UInt8
name: column_4, data type: UInt16
name: column_5, data type: Utf8
Loading...
let df = CsvReader::from_path("rank53_j0_j0.csv")?
.has_header(false)
.with_dtypes(Some(&s))
.with_null_values(None)
.finish()?;
dfshape: (10, 5)
┌──────────┬──────────┬──────────┬──────────┬────────────────┐
│ column_1 ┆ column_2 ┆ column_3 ┆ column_4 ┆ column_5 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ u8 ┆ u8 ┆ u8 ┆ u16 ┆ str │
╞══════════╪══════════╪══════════╪══════════╪════════════════╡
│ 0 ┆ 1 ┆ 2 ┆ 0 ┆ 56229711839232 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 57324928499712 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 2 ┆ 37744977903616 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ NA │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ... ┆ ... ┆ ... ┆ ... ┆ ... │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 6 ┆ 38019855810560 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 7 ┆ null │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 8 ┆ 38157294764032 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 9 ┆ 38226014240768 │
└──────────┴──────────┴──────────┴──────────┴────────────────┘Loading...
df.filter(
&df.column("column_5")?.equal("")?
)?shape: (0, 5)
┌──────────┬──────────┬──────────┬──────────┬──────────┐
│ column_1 ┆ column_2 ┆ column_3 ┆ column_4 ┆ column_5 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ u8 ┆ u8 ┆ u8 ┆ u16 ┆ str │
╞══════════╪══════════╪══════════╪══════════╪══════════╡
└──────────┴──────────┴──────────┴──────────┴──────────┘Loading...
df.filter(
&df.column("column_5")?.equal("NA")?
)?shape: (1, 5)
┌──────────┬──────────┬──────────┬──────────┬──────────┐
│ column_1 ┆ column_2 ┆ column_3 ┆ column_4 ┆ column_5 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ u8 ┆ u8 ┆ u8 ┆ u16 ┆ str │
╞══════════╪══════════╪══════════╪══════════╪══════════╡
│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ NA │
└──────────┴──────────┴──────────┴──────────┴──────────┘Loading...
df.filter(
&df.column("column_5")?.is_null()
)?shape: (1, 5)
┌──────────┬──────────┬──────────┬──────────┬──────────┐
│ column_1 ┆ column_2 ┆ column_3 ┆ column_4 ┆ column_5 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ u8 ┆ u8 ┆ u8 ┆ u16 ┆ str │
╞══════════╪══════════╪══════════╪══════════╪══════════╡
│ 0 ┆ 1 ┆ 2 ┆ 7 ┆ null │
└──────────┴──────────┴──────────┴──────────┴──────────┘Loading...
LazyCsvReader and LazyFrame¶
let df: LazyFrame = LazyCsvReader::new("rank53_j0_j0.csv")
.has_header(false)
.with_null_values(None)
.finish()?;
df.collect()?shape: (10, 5)
┌──────────┬──────────┬──────────┬──────────┬────────────────┐
│ column_1 ┆ column_2 ┆ column_3 ┆ column_4 ┆ column_5 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ str │
╞══════════╪══════════╪══════════╪══════════╪════════════════╡
│ 0 ┆ 1 ┆ 2 ┆ 0 ┆ 56229711839232 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 57324928499712 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 2 ┆ 37744977903616 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 3 ┆ NA │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ... ┆ ... ┆ ... ┆ ... ┆ ... │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 6 ┆ 38019855810560 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 7 ┆ null │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 8 ┆ 38157294764032 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 0 ┆ 1 ┆ 2 ┆ 9 ┆ 38226014240768 │
└──────────┴──────────┴──────────┴──────────┴────────────────┘Loading...