tidyr Published by onesixx on 20-10-14 20-10-14
gather() vs. spread()
# install.packages("gapminder")
# or
# install.packages("http://cran.r-project.org/src/contrib/Archive/gapminder/gapminder_0.2.0.tar.gz", repos= NULL, type="source")
# gapminder data란
# 국가별 경제 수준과 의료 수준 동향을 정리한 DataSet으로써,
# 1. gdpPercap 일인당GDP(경제수준)
# 2. lifeExp 평균기대수명(의료수준)
# --------------------------------------------------------------
as.data.frame(gapminder::gapminder)
data.kr <- data.table(gapminder::gapminder) %>%
\t\t\t\tfilter(country=="Korea, Rep.") %>%
\t\t\t\tselect(year, lifeExp, gdpPercap) %>%
\t\t\t\tarrange(year)
# year lifeExp gdpPercap
# 1: 1952 47.453 1030.59
# 2: 1957 52.681 1487.59
# 3: 1962 55.292 1536.34
# 4: 1967 57.716 2029.23
# 5: 1972 62.612 3030.88
# 6: 1977 64.766 4657.22
# 7: 1982 67.123 5622.94
# 8: 1987 69.810 8533.09
# 9: 1992 72.244 12104.28
# 10: 1997 74.647 15993.53
# 11: 2002 77.045 19233.99
# 12: 2007 78.623 23348.14
### gather() -----------------------------------------------------------------
# wide to long (row:obs, column:variable) => pivoting
#df %>% gather("key", "value", x, y, z)
#df %>% pivot_longer(c(x, y, z), names_to="key", values_to="value")
# gather(data,
# key="key", value = "value", ...,
# \t\t na.rm=FALSE, convert=FALSE, factor_key=FALSE)
long.data.kr <- data.kr %>% gather(key="key",value="value", -year)
# equivalent
long.data.kr <- data.kr %>% gather(key=key,value=value, lifeExp, gdpPercap)
#data.kr %>% gather( key, value, lifeExp, gdpPercap)
#data.kr %>% gather( key, value, lifeExp:gdpPercap)
#data.kr %>% gather( key, value, -year)
#data.kr %>% gather( key, value, 2:3)
# year key value
# 1 1952 lifeExp 47.453
# 2 1957 lifeExp 52.681
# 3 1962 lifeExp 55.292
# 4 1967 lifeExp 57.716
# 5 1972 lifeExp 62.612
# 6 1977 lifeExp 64.766
# 7 1982 lifeExp 67.123
# 8 1987 lifeExp 69.810
# 9 1992 lifeExp 72.244
# 10 1997 lifeExp 74.647
# 11 2002 lifeExp 77.045
# 12 2007 lifeExp 78.623
# 13 1952 gdpPercap 1030.592
# 14 1957 gdpPercap 1487.594
# 15 1962 gdpPercap 1536.344
# 16 1967 gdpPercap 2029.228
# 17 1972 gdpPercap 3030.877
# 18 1977 gdpPercap 4657.221
# 19 1982 gdpPercap 5622.942
# 20 1987 gdpPercap 8533.089
# 21 1992 gdpPercap 12104.279
# 22 1997 gdpPercap 15993.528
# 23 2002 gdpPercap 19233.988
# 24 2007 gdpPercap 23348.140
### spread() ------------------------------------------------------------------
# long to wide
# spread(data, key, value,
# fill=NA, convert=FALSE, drop=TRUE, sep=NULL)
long.data.kr %>% spread(key=key, value=value)
long.data.kr %>% spread(key, value)
# year lifeExp gdpPercap
# 1: 1952 47.453 1030.59
# 2: 1957 52.681 1487.59
# 3: 1962 55.292 1536.34
# 4: 1967 57.716 2029.23
# 5: 1972 62.612 3030.88
# 6: 1977 64.766 4657.22
# 7: 1982 67.123 5622.94
# 8: 1987 69.810 8533.09
# 9: 1992 72.244 12104.28
# 10: 1997 74.647 15993.53
# 11: 2002 77.045 19233.99
# 12: 2007 78.623 23348.14
unite() vs. separate()
as.data.frame(gapminder::gapminder)
set.seed(666)
data <- data.table(gapminder::gapminder) %>%
\t\t\t\t#rownames_to_column('rowNum') %>%
\t\t\t\tsample_n(10, weight=country) %>%
\t\t\t\tselect(continent, country, year, pop, lifeExp, gdpPercap) %>%
\t\t\t\tarrange(continent, country, year)
data
# continent country year pop lifeExp gdpPercap
# 1: Africa Ghana 1982 11400338 53.744 876.033
# 2: Africa Sierra Leone 1992 4260884 38.333 1068.696
# 3: Africa Swaziland 1967 420690 46.633 2613.102
# 4: Americas Nicaragua 1952 1165790 42.314 3112.364
# 5: Asia Lebanon 1972 2680018 65.421 7486.384
# 6: Asia Nepal 1992 20326209 55.727 897.740
# 7: Asia Syria 1987 11242847 66.974 3116.774
# 8: Europe Bosnia and Herzegovina 2007 4552198 74.852 7446.299
# 9: Europe Spain 1952 28549870 64.940 3834.035
# 10: Europe Switzerland 1982 6468126 76.210 28397.715
### unite ---------------------------------------------------------------
# 두 변수를 merge
# unite(data, col, ..., sep = "_",
#\t\t\t\tremove = TRUE, na.rm = FALSE)
data.unite <- data %>% unite(newName,
\t\t\t\t\t\t\tcontinent, country, sep=".")
# newName year pop lifeExp gdpPercap
# 1: Africa.Ghana 1982 11400338 53.744 876.033
# 2: Africa.Sierra Leone 1992 4260884 38.333 1068.696
# 3: Africa.Swaziland 1967 420690 46.633 2613.102
# 4: Americas.Nicaragua 1952 1165790 42.314 3112.364
# 5: Asia.Lebanon 1972 2680018 65.421 7486.384
# 6: Asia.Nepal 1992 20326209 55.727 897.740
# 7: Asia.Syria 1987 11242847 66.974 3116.774
# 8: Europe.Bosnia and Herzegovina 2007 4552198 74.852 7446.299
# 9: Europe.Spain 1952 28549870 64.940 3834.035
# 10: Europe.Switzerland 1982 6468126 76.210 28397.715
### separate -------------------------------------------------------------
#값을 분리시켜주는 함수
data.unite %>% separate(newName, sep="\\\\.", into=c("continent", "country"))
# continent country year pop lifeExp gdpPercap
# 1: Africa Ghana 1982 11400338 53.744 876.033
# 2: Africa Sierra Leone 1992 4260884 38.333 1068.696
# 3: Africa Swaziland 1967 420690 46.633 2613.102
# 4: Americas Nicaragua 1952 1165790 42.314 3112.364
# 5: Asia Lebanon 1972 2680018 65.421 7486.384
# 6: Asia Nepal 1992 20326209 55.727 897.740
# 7: Asia Syria 1987 11242847 66.974 3116.774
# 8: Europe Bosnia and Herzegovina 2007 4552198 74.852 7446.299
# 9: Europe Spain 1952 28549870 64.940 3834.035
# 10: Europe Switzerland 1982 6468126 76.210 28397.715