[1]:
import pandas as pd
pd.set_option("display.max_rows", 20)

Gather

[2]:
from siuba import _, gather, spread
from siuba.data import mtcars
[3]:
costs = pd.DataFrame({
    'id': [1,2],
    'price_x': [.1, .2],
    'price_y': [.4, .5],
    'price_z': [.7, .8]
})

costs
[3]:
id price_x price_y price_z
0 1 0.1 0.4 0.7
1 2 0.2 0.5 0.8
[4]:
# selecting each variable manually
costs >> gather('measure', 'value', _.price_x, _.price_y, _.price_z)

# selecting variables using a slice
costs >> gather('measure', 'value', _["price_x":"price_z"])

# selecting by excluding id
costs >> gather('measure', 'value', -_.id)
[4]:
id measure value
0 1 price_x 0.1
1 2 price_x 0.2
2 1 price_y 0.4
3 2 price_y 0.5
4 1 price_z 0.7
5 2 price_z 0.8

Spread is the inverse of gather

[5]:

(costs >> gather('measure', 'value', -_.id) >> spread('measure', 'value') )
[5]:
id price_x price_y price_z
0 1 0.1 0.4 0.7
1 2 0.2 0.5 0.8

Edit page on github here. Interactive version: Binder badge