Time series data

Published by onesixx on

시계열 데이터 (ts)

co2 {datasets}

1959~1997년까지(39년간) 월별 CO2농도 468개 (from Mauna Loa observatory )

> co2
#         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct    Nov    Dec
# 1959 315.42 316.31 316.50 317.56 318.13 318.00 316.39 314.65 313.68 313.18 314.66 315.43
# 1960 316.27 316.81 317.42 318.87 319.87 319.43 318.01 315.74 314.00 313.68 314.84 316.03
# 1961 316.73 317.54 318.38 319.31 320.42 319.61 318.42 316.63 314.83 315.16 315.94 316.85
# ...

co2 %>% str
##  Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...

co2 %>% attributes()
## $tsp
## [1] 1959.000 1997.917   12.000
## $class
## [1] "ts"

sunspots {datasets}

https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/sunspot.month.html

흑점 갯수의 월별평균값 1749–1983

dt_sunspots <- data.table(
    date =sunspots %>% time() %>% as.yearmon() %>% as.Date(),
\tvalue=as.vector(sunspots))
p <- dt_sunspots %>% ggplot(aes(x=date,y=value)) + 
  geom_line(alpha=.66) +
  scale_x_date(date_breaks="20 years", date_labels="%Y") + 
  theme_ipsum() 
p

AirPassengers {datasets}

> AirPassengers
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
1949 112 118 132 129 121 135 148 148 136 119 104 118
1950 115 126 141 135 125 149 170 170 158 133 114 140
...
library(ggfortify)
AirPassengers %>% ggplot2::autoplot()

TS => Data.table

from co2 (using zoo)

http:// Heading https://www.rdocumentation.org/packages/zoo/versions/1.8-6

co2 %>% time() %>% as.yearmon() %>% as.Date() %>% class

> library(zoo)
> data.table( index= as.Date(as.yearmon(time(co2))),
              value=as.matrix(co2))

from AirPassengers (using broom)

broom::tidy.ts (fortify) 로 data.frame형태로 변경가능

https://cran.r-project.org/web/packages/broom/vignettes/broom.html
https://ggplot2.tidyverse.org/reference/fortify.html

fortify()대신 broom의 tidy()사용

> library(broom)
> tidy(AirPassengers)
# A tibble: 144 x 2
   index value
    
 1 1949    112
 2 1949.   118
 3 1949.   132
 4 1949.   129
 5 1949.   121
 6 1949.   135
 7 1950.   148
 8 1950.   148
 9 1950.   136
10 1950.   119
# … with 134 more rows

ggfortyfy는 ggplot2::autoplot()를 이용해서 ts object를 그릴수 있도록 도와준다.

tidy(AirPassengers) %>% ggplot(aes(x=index, y=value)) + geom_line()

Data.table => Xts

co2ts (from co2new)

최근 Full data  from Earth System Research Laboratory
library(lubridate)
url <- "ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt"
if (!file.exists("co2new.dat") || now() > file.mtime("co2new.dat") + weeks(4))
   download.file(url, "co2new.dat")

co2new <- read.table("co2new.dat") %>% data.table()
names(co2new) <- c("year", "month", "decimal_data", "average", "interpolated", "trend", "ndays")

co2ts <- ts(co2new$interpolated, start=c(1958, 3), frequency=12)
co2ts <- ts(co2new$interpolated, start=c(1958, 3), frequency=12)

library(zoo)
co2zoo <- as.zoo(co2ts)

library(xts)
co2xts1 <- as.xts(co2ts)
co2xts2 <- as.xts(co2zoo)
co2xts3 <- xts(co2new$interpolated, 
               order.by= seq(as.Date("1958-03-01"), by="months", length=nrow(co2new)) %>% as.yearmon)

ts object 만들기

  • index : Index (i.e. date or time) for the "ts" object.
  • series : Name of the series (multivariate "ts" objects only).
  • value : Value of the observation.
ts_single <- ts(co2new$average, start=c(1958, 3), frequency=12)
#         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct    Nov    Dec
# 1958               315.71 317.45 317.50 -99.99 315.86 314.93 313.20 -99.99 313.33 314.67
# 1959 315.62 316.38 316.71 317.72 318.29 318.15 316.54 314.80 313.84 313.26 314.80 315.58
# 1960 316.43 316.97 317.58 319.02 320.03 319.59 318.18 315.91 314.16 313.83 315.00 316.19
# ...

ts_multi <- ts(co2new[, .(decimal_data,interpolated,trend)], start=c(1958, 3), frequency=12)
#          decimal_data interpolated  trend
# Mar 1958     1958.208       315.71 314.62
# Apr 1958     1958.292       317.45 315.29
# May 1958     1958.375       317.50 314.71
# ....

Airline Flights From NYC

nycflights13 2013년 New York를 출발한 항공데이터

library("nycflights13")
flight <- flights %>% data.table()
dd <- data.table(index=make_date(flight$year, flight$month, flight$day))

dd <- dd[order(index),.N, by=.(index)]
dd[ , weekdays:=index %>% wday(label=T)]
ufunc_mw <- function(d, w) ceiling((d-w)/7) + 1
dd[ , monthweeks:=ufunc_mw(mday(index), wday(index)) %>% factor()]
dd[ , monthweeks:=factor(monthweeks, levels=rev(levels(monthweeks)))]

numeric => Data.table (with Index)

river ( Index가 없는 Single Numeric)

river <- scan("https://www.stat.uiowa.edu/~luke/data/river.dat")
riverDT <- data.table(index=seq_along(river), data=river)

from Web Data

GDP Growth Data

# Scrape Web Pages --------------------------------------------------------
library("rvest") # Easily Harvest (Scrape) Web Pages
URL = "https://www.multpl.com/us-real-gdp-growth-rate/table/by-quarter" # GDP data
gdptbl <- URL %>% read_html() %>% html_table()   # Read the data from the web page
gdptbl <- gdptbl[[1]] %>% data.table()
names(gdptbl) <- c("index", "value")

# Clean the data ----------------------------------------------------------
library(lubridate)
gdptbl[ , index:=mdy(index)]
gdptbl[ , value:=str_replace(value,"%","") %>% as.numeric()]
# irregular (unequally spaced)
gdptbl <- gdptbl[index>=make_date(1948, 1, 1), ]
gdptbl <- gdptbl[order(index),]                   # increasing order
> gdptbl
#           index value
#   1: 1948-03-31  2.60
#   2: 1948-06-30  4.58
#   3: 1948-09-30  5.39
#   4: 1948-12-31  3.89
#   5: 1949-03-31  0.94
# ---                 
# 282: 2018-06-30  3.20
# 283: 2018-09-30  3.13
# 284: 2018-12-31  2.52
# 285: 2019-03-31  2.65
# 286: 2019-06-30  2.28
> gdpts <- ts(gdptbl$value, start=1948, frequency=4)

example

https://www.neonscience.org/dc-convert-date-time-POSIX-r
# https://machinelearningmastery.com/time-series-datasets-for-machine-learning/

#Shampoo Sales Dataset
URL <- "https://raw.githubusercontent.com/jbrownlee/Datasets/master/shampoo.csv"
dd <- fread(URL)
dd %>% ggplot(aes(Month, Sales, group=1))+ geom_path() + 
\t      themeSixx
# https://machinelearningmastery.com/time-series-datasets-for-machine-learning/

# Minimum Daily Temperatures Dataset
URL <- "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv"
dd <- fread(URL)
dd %>% ggplot(aes(Date, Temp, group=1))+ geom_path()
# https://stackoverflow.com/questions/16596811/display-the-x-axis-on-ggplot-as-month-only-in-r

url <- "http://nwis.waterdata.usgs.gov/usa/nwis/uv/?cb_00060=on&cb_00065=on&format=rdb&period=&begin_date=2009-01-01&end_date=2012-12-31&site_no=02428400"
dd <- fread(url, header=T)

sapply(dd,class)
dd <- dd[-1,]
names(dd)<- c("Agency","SiteNo","Datetime", "TZ","Discharge","Status","Gageheight","gstatus")
dd$date <- anydate(dd$Datetime)

dd[ ,`:=`(Discharge=as.numeric(Discharge), date=anydate(date),Year=as.numeric(format(date, "%Y")))]

dd %>% ggplot(aes(x=date,y=Discharge))+
\tgeom_line()+
\tfacet_wrap(~Year,scales=("free_x"))+
\tscale_x_date(labels=scales::date_format("%b"), 
\t\t\t\t\t\t\t breaks=scales::date_breaks("month"))+
\tthemeSixx
Categories: tsR Basic

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x