Ben Chuanlong Du's Blog

It is never too late to learn.

Manipulate pandas DataFrame

Reshape DataFrame

In [6]:
import pandas as pd
import numpy as np
In [7]:
df1 = pd.DataFrame({"x": [1, 2, 3], "y": [5, 4, 3]})
df1
Out[7]:
x y
0 1 5
1 2 4
2 3 3
In [8]:
df2 = pd.DataFrame({"x": [0, 0, 3000], "y": [78, 4, 3]})
df2
Out[8]:
x y
0 0 78
1 0 4
2 3000 3

Melt

In [24]:
df = pd.DataFrame(
    {"id": ["a", "b", "c"], "x": [1, 3, 5], "y": [2, 4, 6], "z": [7, 8, 9]}
)
df
Out[24]:
id x y z
0 a 1 2 7
1 b 3 4 8
2 c 5 6 9
In [26]:
dfm = pd.melt(df, id_vars="id", value_vars=["x", "y", "z"])
dfm
Out[26]:
id variable value
0 a x 1
1 b x 3
2 c x 5
3 a y 2
4 b y 4
5 c y 6
6 a z 7
7 b z 8
8 c z 9

Stack

In [52]:
df = pd.DataFrame(
    {"id": ["a", "b", "c"], "x": [1, 3, 5], "y": [2, 4, 6], "z": [7, 8, 9]}
)
df
Out[52]:
id x y z
0 a 1 2 7
1 b 3 4 8
2 c 5 6 9
In [57]:
x = df.stack()
x
Out[57]:
0  id    a
   x     1
   y     2
   z     7
1  id    b
   x     3
   y     4
   z     8
2  id    c
   x     5
   y     6
   z     9
dtype: object
In [58]:
x.unstack()
Out[58]:
id x y z
0 a 1 2 7
1 b 3 4 8
2 c 5 6 9
In [60]:
type(x)
Out[60]:
pandas.core.series.Series

Insert Columns - pandas.DataFrame.insert

In [3]:
df = pd.DataFrame(
    {"id": ["a", "b", "c"], "x": [1, 3, 5], "y": [2, 4, 6], "z": [7, 8, 9]}
)
df
Out[3]:
id x y z
0 a 1 2 7
1 b 3 4 8
2 c 5 6 9
In [4]:
df.insert(0, "col", 0)
In [5]:
df
Out[5]:
col id x y z
0 0 a 1 2 7
1 0 b 3 4 8
2 0 c 5 6 9

reshape

In [ ]:
overlap.spot_sample_detail_id.reshape(100, 3)

rename

In [10]:
df = pd.DataFrame(
    {"id": ["a", "b", "c"], "x": [1, 3, 5], "y": [2, 4, 6], "z": [7, 8, 9]}
)
df
Out[10]:
id x y z
0 a 1 2 7
1 b 3 4 8
2 c 5 6 9
In [13]:
df.rename(columns=df.iloc[0])
Out[13]:
a 1 2 7
0 a 1 2 7
1 b 3 4 8
2 c 5 6 9
In [11]:
help(df.rename)
Help on method rename in module pandas.core.frame:

rename(index=None, columns=None, **kwargs) method of pandas.core.frame.DataFrame instance
    Alter axes input function or functions. Function / dict values must be
    unique (1-to-1). Labels not contained in a dict / Series will be left
    as-is. Extra labels listed don't throw an error. Alternatively, change
    ``Series.name`` with a scalar value (Series only).
    
    Parameters
    ----------
    index, columns : scalar, list-like, dict-like or function, optional
        Scalar or list-like will alter the ``Series.name`` attribute,
        and raise on DataFrame or Panel.
        dict-like or functions are transformations to apply to
        that axis' values
    copy : boolean, default True
        Also copy underlying data
    inplace : boolean, default False
        Whether to return a new DataFrame. If True then value of copy is
        ignored.
    level : int or level name, default None
        In case of a MultiIndex, only rename labels in the specified
        level.
    
    Returns
    -------
    renamed : DataFrame (new object)
    
    See Also
    --------
    pandas.NDFrame.rename_axis
    
    Examples
    --------
    >>> s = pd.Series([1, 2, 3])
    >>> s
    0    1
    1    2
    2    3
    dtype: int64
    >>> s.rename("my_name") # scalar, changes Series.name
    0    1
    1    2
    2    3
    Name: my_name, dtype: int64
    >>> s.rename(lambda x: x ** 2)  # function, changes labels
    0    1
    1    2
    4    3
    dtype: int64
    >>> s.rename({1: 3, 2: 5})  # mapping, changes labels
    0    1
    3    2
    5    3
    dtype: int64
    >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
    >>> df.rename(2)
    Traceback (most recent call last):
    ...
    TypeError: 'int' object is not callable
    >>> df.rename(index=str, columns={"A": "a", "B": "c"})
       a  c
    0  1  4
    1  2  5
    2  3  6
    >>> df.rename(index=str, columns={"A": "a", "C": "c"})
       a  B
    0  1  4
    1  2  5
    2  3  6

In [ ]:
 

Comments