groupby

Published onesixx on

import pandas as pd
import plotly.express as px
iris = px.data.iris()
iris.head(3)


g_species = iris.groupby('species')
g_species.groups

for nm, grp in g_species:
    print(f"===> {nm} : {len(grp)}")
    print(grp.head(3))
    
    
    
gSummary = pd.DataFrame( {'cnt': g_species.size() })
gSummary = gSummary.reset_index()
gSummary
# \tspecies\t    cnt
# 0\tsetosa\t    50
# 1\tversicolor\t50
# 2\tvirginica\t50
# https://ponyozzang.tistory.com/291
import pandas as pd
import numpy as np
import seaborn as sns

titanic = sns.load_dataset("titanic")
df = titanic.loc[: , ['age', 'sex', 'class', 'fare', 'survived']]
df['class'].unique()
# ['Third', 'First', 'Second']
grouped = df.groupby('class')

grouped.first()  # 각 그룹의 첫번째 행
grouped.get_group('Second')  # 해당 그룹의 데이터 확인
grouped.size()               # 각 그룹별 행 갯수

# aggregation
grouped.agg(np.mean)
grouped.mean()            # same above
grouped.agg({'age':np.mean})
grouped.agg({'age':np.mean, 'fare':np.sum})

df.agg({'age':np.mean})
grouped.apply(lambda x: (x.fare * x.survived).sum())

def uf_total_series(x):
    return x.fare *x.survived
def uf_total_keepidx(x):
    return pd.DataFrame({'total': x.fare *x.survived})

grouped.apply(uf_total_series)
Categories: pandas

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x