[1]:

import pandas as pd
pd.set_option("display.max_rows", 20)

Gather¶

[2]:

from siuba import _, gather, spread
from siuba.data import mtcars

[3]:

costs = pd.DataFrame({
    'id': [1,2],
    'price_x': [.1, .2],
    'price_y': [.4, .5],
    'price_z': [.7, .8]
})

costs

[3]:

	id	price_x	price_y	price_z
0	1	0.1	0.4	0.7
1	2	0.2	0.5	0.8

[4]:

# selecting each variable manually
costs >> gather('measure', 'value', _.price_x, _.price_y, _.price_z)

# selecting variables using a slice
costs >> gather('measure', 'value', _["price_x":"price_z"])

# selecting by excluding id
costs >> gather('measure', 'value', -_.id)

[4]:

	id	measure	value
0	1	price_x	0.1
1	2	price_x	0.2
2	1	price_y	0.4
3	2	price_y	0.5
4	1	price_z	0.7
5	2	price_z	0.8

Spread is the inverse of gather¶

[5]:

(costs
    >> gather('measure', 'value', -_.id)
    >> spread('measure', 'value')
)

[5]:

	id	price_x	price_y	price_z
0	1	0.1	0.4	0.7
1	2	0.2	0.5	0.8

Edit page on github here. Interactive version:

Fork me on GitHub