[1]:
import pandas as pd
pd.set_option("display.max_rows", 5)

Joins

[2]:
from siuba import _, inner_join, left_join, full_join

df1 = pd.DataFrame({'id': [1,2], 'x': ['a', 'b']})
df2 = pd.DataFrame({'id': [2,2,3], 'y': ['l', 'm', 'n']})
[3]:
df1
[3]:
id x
0 1 a
1 2 b
[4]:
df2
[4]:
id y
0 2 l
1 2 m
2 3 n

⚠️Note on piping: Currently, when you use a join in a pipe, you need to pass _ as the first argument. This is because it requires two DataFrames. For single DataFrame verbs it is optional.

[5]:
df1 >> inner_join(_, df2, on = "id")
[5]:
id x y
0 2 b l
1 2 b m

Inner join

[6]:
inner_join(df1, df2, on = "id")
[6]:
id x y
0 2 b l
1 2 b m

Left join

[7]:
left_join(df1, df2, on = "id")
[7]:
id x y
0 1 a NaN
1 2 b l
2 2 b m

Full join

[8]:

full_join(df1, df2, on = "id")
[8]:
id x y
0 1 a NaN
1 2 b l
2 2 b m
3 3 NaN n

Semi and anti join

[9]:
from siuba import semi_join, anti_join

semi_join(df1, df2, on = "id")
[9]:
id x
1 2 b
[10]:
# TODO: implement
#anti_join(df1, df2, on = "id")

Edit page on github here. Interactive version: Binder badge