[1]:
import pandas as pd
pd.set_option("display.max_rows", 5)
Mutate¶
[2]:
from siuba import _, group_by, mutate, select
from siuba.data import mtcars
small_cars = mtcars[["mpg", "cyl", "hp"]]
Assign new column¶
[3]:
mutate(small_cars, cyl2 = _.cyl * 2)
[3]:
mpg | cyl | hp | cyl2 | |
---|---|---|---|---|
0 | 21.0 | 6 | 110 | 12 |
1 | 21.0 | 6 | 110 | 12 |
... | ... | ... | ... | ... |
30 | 15.0 | 8 | 335 | 16 |
31 | 21.4 | 4 | 109 | 8 |
32 rows × 4 columns
[4]:
mutate(small_cars, cyl2 = _.cyl * 2, cyl4 = _.cyl2 * 2)
[4]:
mpg | cyl | hp | cyl2 | cyl4 | |
---|---|---|---|---|---|
0 | 21.0 | 6 | 110 | 12 | 24 |
1 | 21.0 | 6 | 110 | 12 | 24 |
... | ... | ... | ... | ... | ... |
30 | 15.0 | 8 | 335 | 16 | 32 |
31 | 21.4 | 4 | 109 | 8 | 16 |
32 rows × 5 columns
[5]:
mutate(small_cars, misc = "hey")
[5]:
mpg | cyl | hp | misc | |
---|---|---|---|---|
0 | 21.0 | 6 | 110 | hey |
1 | 21.0 | 6 | 110 | hey |
... | ... | ... | ... | ... |
30 | 15.0 | 8 | 335 | hey |
31 | 21.4 | 4 | 109 | hey |
32 rows × 4 columns
Used with group_by¶
[6]:
(small_cars
>> group_by(_.cyl)
>> mutate(
hp_mean = _.hp.mean(),
demeaned_hp = _.hp - _.hp_mean
)
)
[6]:
(grouped data frame)
mpg | cyl | hp | hp_mean | demeaned_hp | |
---|---|---|---|---|---|
0 | 21.0 | 6 | 110 | 122.285714 | -12.285714 |
1 | 21.0 | 6 | 110 | 122.285714 | -12.285714 |
... | ... | ... | ... | ... | ... |
30 | 15.0 | 8 | 335 | 209.214286 | 125.785714 |
31 | 21.4 | 4 | 109 | 82.636364 | 26.363636 |
32 rows × 5 columns
[7]:
(small_cars
>> group_by(_.cyl)
>> mutate(
hp_per_cyl = _.hp / _.cyl,
diff = _.hp_per_cyl - _.hp_per_cyl.shift(1)
)
)
[7]:
(grouped data frame)
mpg | cyl | hp | hp_per_cyl | diff | |
---|---|---|---|---|---|
0 | 21.0 | 6 | 110 | 18.333333 | NaN |
1 | 21.0 | 6 | 110 | 18.333333 | 0.000 |
... | ... | ... | ... | ... | ... |
30 | 15.0 | 8 | 335 | 41.875000 | 8.875 |
31 | 21.4 | 4 | 109 | 27.250000 | -1.000 |
32 rows × 5 columns
Edit page on github here. Interactive version: