tidyr

Published by onesixx on

https://rpubs.com/bradleyboehmke/data_wrangling

gather() vs. spread()

# install.packages("gapminder")
# or
# install.packages("http://cran.r-project.org/src/contrib/Archive/gapminder/gapminder_0.2.0.tar.gz", repos= NULL, type="source")
# gapminder data란
# 국가별 경제 수준과 의료 수준 동향을 정리한 DataSet으로써,
# 1. gdpPercap 일인당GDP(경제수준)
# 2. lifeExp 평균기대수명(의료수준)
# --------------------------------------------------------------
as.data.frame(gapminder::gapminder)
data.kr <- data.table(gapminder::gapminder) %>%
\t\t\t\tfilter(country=="Korea, Rep.") %>% 
\t\t\t\tselect(year, lifeExp, gdpPercap) %>% 
\t\t\t\tarrange(year) 
#     year lifeExp gdpPercap
#  1: 1952  47.453   1030.59
#  2: 1957  52.681   1487.59
#  3: 1962  55.292   1536.34
#  4: 1967  57.716   2029.23
#  5: 1972  62.612   3030.88
#  6: 1977  64.766   4657.22
#  7: 1982  67.123   5622.94
#  8: 1987  69.810   8533.09
#  9: 1992  72.244  12104.28
# 10: 1997  74.647  15993.53
# 11: 2002  77.045  19233.99
# 12: 2007  78.623  23348.14
https://statkclee.github.io/r-novice-gapminder/14-tidyr-kr.html
### gather() -----------------------------------------------------------------
# wide to long (row:obs, column:variable) => pivoting

#df %>% gather("key", "value", x, y, z) 
#df %>% pivot_longer(c(x, y, z), names_to="key", values_to="value")

# gather(data, 
#        key="key", value = "value",  ...,
#   \t\t na.rm=FALSE, convert=FALSE, factor_key=FALSE)
long.data.kr <- data.kr %>% gather(key="key",value="value", -year)
# equivalent
long.data.kr <- data.kr %>% gather(key=key,value=value, lifeExp, gdpPercap)
#data.kr %>% gather( key,  value,  lifeExp, gdpPercap)
#data.kr %>% gather( key,  value,  lifeExp:gdpPercap)
#data.kr %>% gather( key,  value,  -year)
#data.kr %>% gather( key,  value,  2:3)

#    year       key     value
# 1  1952   lifeExp    47.453
# 2  1957   lifeExp    52.681
# 3  1962   lifeExp    55.292
# 4  1967   lifeExp    57.716
# 5  1972   lifeExp    62.612
# 6  1977   lifeExp    64.766
# 7  1982   lifeExp    67.123
# 8  1987   lifeExp    69.810
# 9  1992   lifeExp    72.244
# 10 1997   lifeExp    74.647
# 11 2002   lifeExp    77.045
# 12 2007   lifeExp    78.623
# 13 1952 gdpPercap  1030.592
# 14 1957 gdpPercap  1487.594
# 15 1962 gdpPercap  1536.344
# 16 1967 gdpPercap  2029.228
# 17 1972 gdpPercap  3030.877
# 18 1977 gdpPercap  4657.221
# 19 1982 gdpPercap  5622.942
# 20 1987 gdpPercap  8533.089
# 21 1992 gdpPercap 12104.279
# 22 1997 gdpPercap 15993.528
# 23 2002 gdpPercap 19233.988
# 24 2007 gdpPercap 23348.140
### spread() ------------------------------------------------------------------
# long to wide

# spread(data, key, value, 
#         fill=NA, convert=FALSE, drop=TRUE, sep=NULL)
long.data.kr %>% spread(key=key, value=value)
long.data.kr %>% spread(key, value)
#     year lifeExp gdpPercap
#  1: 1952  47.453   1030.59
#  2: 1957  52.681   1487.59
#  3: 1962  55.292   1536.34
#  4: 1967  57.716   2029.23
#  5: 1972  62.612   3030.88
#  6: 1977  64.766   4657.22
#  7: 1982  67.123   5622.94
#  8: 1987  69.810   8533.09
#  9: 1992  72.244  12104.28
# 10: 1997  74.647  15993.53
# 11: 2002  77.045  19233.99
# 12: 2007  78.623  23348.14

unite() vs. separate()

as.data.frame(gapminder::gapminder)
set.seed(666)
data <- data.table(gapminder::gapminder) %>% 
\t\t\t\t#rownames_to_column('rowNum') %>% 
\t\t\t\tsample_n(10, weight=country) %>% 
\t\t\t\tselect(continent, country, year, pop, lifeExp, gdpPercap) %>% 
\t\t\t\tarrange(continent, country, year) 
data
#     continent                country year      pop lifeExp gdpPercap
#  1:    Africa                  Ghana 1982 11400338  53.744   876.033
#  2:    Africa           Sierra Leone 1992  4260884  38.333  1068.696
#  3:    Africa              Swaziland 1967   420690  46.633  2613.102
#  4:  Americas              Nicaragua 1952  1165790  42.314  3112.364
#  5:      Asia                Lebanon 1972  2680018  65.421  7486.384
#  6:      Asia                  Nepal 1992 20326209  55.727   897.740
#  7:      Asia                  Syria 1987 11242847  66.974  3116.774
#  8:    Europe Bosnia and Herzegovina 2007  4552198  74.852  7446.299
#  9:    Europe                  Spain 1952 28549870  64.940  3834.035
# 10:    Europe            Switzerland 1982  6468126  76.210 28397.715
### unite ---------------------------------------------------------------
# 두 변수를 merge 
# unite(data, col, ..., sep = "_", 
#\t\t\t\tremove = TRUE, na.rm = FALSE)
data.unite <- data %>% unite(newName, 
\t\t\t\t\t\t\tcontinent, country, sep=".")
#                           newName year      pop lifeExp gdpPercap
#  1:                  Africa.Ghana 1982 11400338  53.744   876.033
#  2:           Africa.Sierra Leone 1992  4260884  38.333  1068.696
#  3:              Africa.Swaziland 1967   420690  46.633  2613.102
#  4:            Americas.Nicaragua 1952  1165790  42.314  3112.364
#  5:                  Asia.Lebanon 1972  2680018  65.421  7486.384
#  6:                    Asia.Nepal 1992 20326209  55.727   897.740
#  7:                    Asia.Syria 1987 11242847  66.974  3116.774
#  8: Europe.Bosnia and Herzegovina 2007  4552198  74.852  7446.299
#  9:                  Europe.Spain 1952 28549870  64.940  3834.035
# 10:            Europe.Switzerland 1982  6468126  76.210 28397.715
### separate -------------------------------------------------------------
#값을 분리시켜주는 함수
data.unite %>% separate(newName, sep="\\\\.", into=c("continent", "country"))
#     continent                country year      pop lifeExp gdpPercap
#  1:    Africa                  Ghana 1982 11400338  53.744   876.033
#  2:    Africa           Sierra Leone 1992  4260884  38.333  1068.696
#  3:    Africa              Swaziland 1967   420690  46.633  2613.102
#  4:  Americas              Nicaragua 1952  1165790  42.314  3112.364
#  5:      Asia                Lebanon 1972  2680018  65.421  7486.384
#  6:      Asia                  Nepal 1992 20326209  55.727   897.740
#  7:      Asia                  Syria 1987 11242847  66.974  3116.774
#  8:    Europe Bosnia and Herzegovina 2007  4552198  74.852  7446.299
#  9:    Europe                  Spain 1952 28549870  64.940  3834.035
# 10:    Europe            Switzerland 1982  6468126  76.210 28397.715
Categories: R Reshaping

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Oldest
Newest Most Voted
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x