import pandas as pd
DataFrame from Dictionary¶
By default each key-value is a column in the resulting data frame. You can specify the option
orient = 'index'
to make each key-value a row in the resulting data frame when using the methodpandas.DataFrame.from_dict
.Starting from Python 3.7, a dict preserves insertion orders. This effectively makes a pandas DataFrame keep the insertion order of columns.
df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [5, 4, 3, 2, 1], "z": [1, 1, 1, 1, 1]})
df.head()
df = pd.DataFrame.from_dict({"x": [1, 2, 3, 4, 5], "a": [5, 4, 3, 2, 1]})
df.head()
df = pd.DataFrame.from_dict(
{"x": [1, 2, 3, 4, 5], "a": [5, 4, 3, 2, 1]}, orient="index"
)
df.head()
df = pd.DataFrame.from_dict(
{"how": 9, "are": 3, "you": 7, "doing": 5, "today": 6},
orient="index",
columns=["freq"],
)
df
DataFrame from List of Dictionaries (as Rows)¶
Each dictionary is a row in the resulting data frame.
d = [
{"points": 50, "time": "5:00", "year": 2010},
{"points": 25, "time": "6:00", "month": "february"},
{"points": 90, "time": "9:00", "month": "january"},
{"points_h1": 20, "month": "june"},
]
pd.DataFrame(d)
DataFrame from List of Lists/Tuples (as Rows)¶
Each list/tuple is a row in the resulting data frame.
df = pd.DataFrame(
data=[
["foo", "one", "small", 1],
["foo", "one", "large", 2],
["foo", "one", "large", 2],
["foo", "two", "small", 3],
["foo", "two", "small", 3],
["bar", "one", "large", 4],
["bar", "one", "small", 5],
["bar", "two", "small", 6],
["bar", "two", "large", 7],
],
columns=["a", "b", "c", "d"],
)
df.head()
df = pd.DataFrame.from_records(
data=[
["foo", "one", "small", 1],
["foo", "one", "large", 2],
["foo", "one", "large", 2],
["foo", "two", "small", 3],
["foo", "two", "small", 3],
["bar", "one", "large", 4],
["bar", "one", "small", 5],
["bar", "two", "small", 6],
["bar", "two", "large", 7],
],
columns=["a", "b", "c", "d"],
)
df.head()
DataFrame from List of Lists/Tuples (as Columns)¶
Each list/tuple is a row in the resulting data frame.
Note that pd.concat
on a list of Lists/Tuples won't here.
You have to first create a DataFrame with the list of Lists/Tuples as rows
and then transpose it.
df = pd.DataFrame(
data=[
["foo", "one", "small", 1],
["foo", "one", "large", 2],
["foo", "one", "large", 2],
["foo", "two", "small", 3],
["foo", "two", "small", 3],
["bar", "one", "large", 4],
["bar", "one", "small", 5],
["bar", "two", "small", 6],
["bar", "two", "large", 7],
],
columns=["a", "b", "c", "d"],
).transpose()
df.head()
DataFrame from One Series (as a Row)¶
The sereis is a row in the resulting data frame.
id = pd.Series([1, 2, 3, 4, 5], name="id")
pd.DataFrame(data=[id])
id = pd.Series([1, 2, 3, 4, 5], name="id")
pd.DataFrame([id])
DataFrame from Multiple Serieses (as Rows)¶
The sereises are rows in the resulting data frame.
id = pd.Series([1, 2, 3, 4, 5], name="id")
x = pd.Series(["a", "b", "c", "d", "e"], name="x")
pd.DataFrame([id, x])
DataFrame from One Series (as a Column)¶
The sereis is a column in the resulting data frame.
id = pd.Series([1, 2, 3, 4, 5], name="id")
id.to_frame()
id = pd.Series([1, 2, 3, 4, 5], name="id")
pd.DataFrame(id)
DataFrame from Multiple Series (as Columns)¶
The serieses are columns in the resulting data frame.
id = pd.Series([1, 2, 3, 4, 5], name="id")
x = pd.Series(["a", "b", "c", "d", "e"], name="x")
pd.concat([id, x], axis=1)
Series to Underlying Data¶
id = pd.Series([1, 2, 3, 4, 5], name="id")
id.tolist()
DataFrame to Underlying Data¶
df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "a": [5, 4, 3, 2, 1]})
print(df.head())
df.values.tolist()
Index¶
An index will always be created. By default, a sequence of integers (starting from 0) is used as the index.
import pandas as pd
df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "a": [5, 4, 3, 2, 1]}, index=None)
df
Column Names¶
Similar to the index, a sequence of integers (starting from 9) is used as the column names by default.
import pandas as pd
df = pd.DataFrame([(1, "a"), (2, "b")], columns=None)
df
Empty DataFrame¶
Create an empty DataFrame without any column or row.
pd.DataFrame({})
Create an empty (no rows) DataFrame with 1 column named x
.
df = pd.DataFrame({"x": []})
df
Create an empty (no rows) DataFrame with 2 column x
and y
.
df = pd.DataFrame([], columns=["x", "y"])
df
You can use the variable DataFrame.empty
to check whether a DataFrame is empty or not.
df.empty
You can operate on columns of an empty (no rows) DataFrame as usual.
df = pd.DataFrame({"cal_dt": []})
df.cal_dt = pd.to_datetime(df.cal_dt)
df
len(df.cal_dt.unique())
d = df.cal_dt.max() - df.cal_dt.min()
d
pd.isnull(d)
pd.isnull(None)
len(df.cal_dt.unique())