[1]:
import pandas as pd
pd.set_option("display.max_rows", 20)
Gather¶
[2]:
from siuba import _, gather, spread
from siuba.data import mtcars
[3]:
costs = pd.DataFrame({
'id': [1,2],
'price_x': [.1, .2],
'price_y': [.4, .5],
'price_z': [.7, .8]
})
costs
[3]:
id | price_x | price_y | price_z | |
---|---|---|---|---|
0 | 1 | 0.1 | 0.4 | 0.7 |
1 | 2 | 0.2 | 0.5 | 0.8 |
[4]:
# selecting each variable manually
costs >> gather('measure', 'value', _.price_x, _.price_y, _.price_z)
# selecting variables using a slice
costs >> gather('measure', 'value', _["price_x":"price_z"])
# selecting by excluding id
costs >> gather('measure', 'value', -_.id)
[4]:
id | measure | value | |
---|---|---|---|
0 | 1 | price_x | 0.1 |
1 | 2 | price_x | 0.2 |
2 | 1 | price_y | 0.4 |
3 | 2 | price_y | 0.5 |
4 | 1 | price_z | 0.7 |
5 | 2 | price_z | 0.8 |
Spread is the inverse of gather¶
[5]:
(costs
>> gather('measure', 'value', -_.id)
>> spread('measure', 'value')
)
[5]:
id | price_x | price_y | price_z | |
---|---|---|---|---|
0 | 1 | 0.1 | 0.4 | 0.7 |
1 | 2 | 0.2 | 0.5 | 0.8 |
Edit page on github here. Interactive version: