siuba | dplython | pandas | |
---|---|---|---|
Column operations are pandas Series methods | ✅ | ✅ | ✅ |
Table verbs supports user defined functions | ✅ | ✅ | ✅ |
pipe syntax (`>>`) | ✅ | ✅ | ❌ |
concise, lazy expressions (`_.a + _.b`) | ✅ | ✅ | ❌ |
No more reset_index | ✅ | ✅ | ❌ |
unified API over (un)grouped data | ✅ | ✅ | ❌ |
generate fast grouped operations | ✅ | ❌ | ✅ |
generate SQL queries | ✅ | ❌ | ❌ |
Abstract syntax trees for transforming operations | ✅ | ❌ | ❌ |
handles nested data | ✅ | ❌ | ⚠️ |
siuba | dplython | pandas | |
---|---|---|---|
Column operations are pandas Series methods | ✅ | ✅ | ✅ |
Table verbs supports user defined functions | ✅ | ✅ | ✅ |
pipe syntax (`>>`) | ✅ | ✅ | ❌ |
concise, lazy expressions (`_.a + _.b`) | ✅ | ✅ | ❌ |
No more reset_index | ✅ | ✅ | ❌ |
unified API over (un)grouped data | ✅ | ✅ | ❌ |
generate fast grouped operations | ✅ | ❌ | ✅ |
generate SQL queries | ✅ | ❌ | ❌ |
Abstract syntax trees for transforming operations | ✅ | ❌ | ❌ |
handles nested data | ✅ | ❌ | ⚠️ |
\n", " | g | \n", "x | \n", "avg | \n", "
---|---|---|---|
0 | \n", "a | \n", "1 | \n", "2.0 | \n", "
1 | \n", "a | \n", "2 | \n", "2.0 | \n", "
2 | \n", "b | \n", "3 | \n", "2.0 | \n", "
\n", " | g | \n", "x | \n", "avg | \n", "
---|---|---|---|
0 | \n", "a | \n", "1 | \n", "2.0 | \n", "
1 | \n", "a | \n", "2 | \n", "2.0 | \n", "
\n", " | g | \n", "x | \n", "y | \n", "
---|---|---|---|
1 | \n", "a | \n", "2 | \n", "3 | \n", "
0 | \n", "a | \n", "1 | \n", "2 | \n", "
2 | \n", "b | \n", "3 | \n", "4 | \n", "
\n", " | a | \n", "b | \n", "c | \n", "
---|---|---|---|
0 | \n", "1 | \n", "2 | \n", "4 | \n", "
1 | \n", "2 | \n", "3 | \n", "5 | \n", "
2 | \n", "3 | \n", "4 | \n", "6 | \n", "
\n", " | hp | \n", "mpg | \n", "
---|---|---|
cyl | \n", "\n", " | \n", " |
4 | \n", "82.636364 | \n", "26.663636 | \n", "
6 | \n", "122.285714 | \n", "19.742857 | \n", "
8 | \n", "209.214286 | \n", "15.100000 | \n", "
\n", " | cyl | \n", "hp | \n", "mpg | \n", "
---|---|---|---|
0 | \n", "4 | \n", "82.636364 | \n", "26.663636 | \n", "
1 | \n", "6 | \n", "122.285714 | \n", "19.742857 | \n", "
2 | \n", "8 | \n", "209.214286 | \n", "15.100000 | \n", "
\n", " | cyl | \n", "hp | \n", "mpg | \n", "
---|---|---|---|
0 | \n", "4 | \n", "82.636364 | \n", "26.663636 | \n", "
1 | \n", "6 | \n", "122.285714 | \n", "19.742857 | \n", "
2 | \n", "8 | \n", "209.214286 | \n", "15.100000 | \n", "
grouped? | \n", "siuba | \n", "pandas | \n", "
---|---|---|
\n", " yes\n", " | \n", "\n",
" \n",
"mutate(mtcars,\n",
" res = _.hp - _.hp.mean()\n",
") \n",
" | \n",
" \n",
" \n",
"mtcars.assign(\n",
" res = lambda d: d.hp - d.hp.mean()\n",
") \n",
" | \n",
"
\n", " no\n", " | \n", "\n",
" \n",
"mutate(g_cyl,\n",
" res = _.hp - _.hp.mean()\n",
") \n",
" | \n",
" \n",
" \n",
"mtcars.assign(\n",
" res = mtcars.hp - g_cyl.hp.transform(\"mean\")\n",
") \n",
" | \n",
"
(grouped data frame)
\n", " | mpg | \n", "cyl | \n", "disp | \n", "hp | \n", "drat | \n", "wt | \n", "qsec | \n", "vs | \n", "am | \n", "gear | \n", "carb | \n", "demeaned | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "21.0 | \n", "6 | \n", "160.0 | \n", "110 | \n", "3.90 | \n", "2.620 | \n", "16.46 | \n", "0 | \n", "1 | \n", "4 | \n", "4 | \n", "-12.285714 | \n", "
1 | \n", "21.0 | \n", "6 | \n", "160.0 | \n", "110 | \n", "3.90 | \n", "2.875 | \n", "17.02 | \n", "0 | \n", "1 | \n", "4 | \n", "4 | \n", "-12.285714 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
30 | \n", "15.0 | \n", "8 | \n", "301.0 | \n", "335 | \n", "3.54 | \n", "3.570 | \n", "14.60 | \n", "0 | \n", "1 | \n", "5 | \n", "8 | \n", "125.785714 | \n", "
31 | \n", "21.4 | \n", "4 | \n", "121.0 | \n", "109 | \n", "4.11 | \n", "2.780 | \n", "18.60 | \n", "1 | \n", "1 | \n", "4 | \n", "2 | \n", "26.363636 | \n", "
32 rows × 12 columns
\n", "\n", " | mpg | \n", "cyl | \n", "disp | \n", "hp | \n", "drat | \n", "wt | \n", "qsec | \n", "vs | \n", "am | \n", "gear | \n", "carb | \n", "demeaned | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "21.0 | \n", "6 | \n", "160.0 | \n", "110 | \n", "3.90 | \n", "2.620 | \n", "16.46 | \n", "0 | \n", "1 | \n", "4 | \n", "4 | \n", "-12.285714 | \n", "
1 | \n", "21.0 | \n", "6 | \n", "160.0 | \n", "110 | \n", "3.90 | \n", "2.875 | \n", "17.02 | \n", "0 | \n", "1 | \n", "4 | \n", "4 | \n", "-12.285714 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
30 | \n", "15.0 | \n", "8 | \n", "301.0 | \n", "335 | \n", "3.54 | \n", "3.570 | \n", "14.60 | \n", "0 | \n", "1 | \n", "5 | \n", "8 | \n", "125.785714 | \n", "
31 | \n", "21.4 | \n", "4 | \n", "121.0 | \n", "109 | \n", "4.11 | \n", "2.780 | \n", "18.60 | \n", "1 | \n", "1 | \n", "4 | \n", "2 | \n", "26.363636 | \n", "
32 rows × 12 columns
\n", "(grouped data frame)
\n", " | student_id | \n", "course_id | \n", "score | \n", "
---|---|---|---|
0 | \n", "0 | \n", "14 | \n", "38 | \n", "
1 | \n", "0 | \n", "3 | \n", "40 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
19998 | \n", "1999 | \n", "11 | \n", "32 | \n", "
19999 | \n", "1999 | \n", "17 | \n", "10 | \n", "
20000 rows × 3 columns
\n", "\n", " | student_id | \n", "course_id | \n", "score | \n", "
---|---|---|---|
2 | \n", "0 | \n", "3 | \n", "17 | \n", "
10 | \n", "1 | \n", "10 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
19987 | \n", "1998 | \n", "17 | \n", "31 | \n", "
19997 | \n", "1999 | \n", "3 | \n", "1 | \n", "
2117 rows × 3 columns
\n", "\n", " | id | \n", "tags | \n", "split_tags | \n", "
---|---|---|---|
0 | \n", "1 | \n", "a,b,c | \n", "a | \n", "
1 | \n", "1 | \n", "a,b,c | \n", "b | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
4 | \n", "2 | \n", "d,e | \n", "e | \n", "
5 | \n", "3 | \n", "f | \n", "f | \n", "
6 rows × 3 columns
\n", "