Datetime format in r

Published by onesixx on

Date or Datetime 변환 개요

http://statkclee.github.io/data-science/ds-date-basics.html
https://www.cyclismo.org/tutorial/R/time.html#time-data-types

날짜 와 시간 차이

R에서 날짜와 시간은 각각  다른 클래스를 통해 구현되는데,
날짜(Date)는 Date 클래스이고, (1970-01-01을 기준으로 R 내부적으로) 정수형으로 저장되고,
시간(Time)은 POSIXct, POSIXlt 클래스이고, (1970-01-01을 기준으로 R 내부적으로)  초단위로 저장된다.

KST (Korea Standard Time),
GMT (Greenwich Mean Time) :  time zone
= UTC (Coordinated Universal Time ) : time standard

### 현재시간 
> Sys.time()
[1] "2017-09-20 16:56:08 KST"
> Sys.time() %>% class
[1] "POSIXct" "POSIXt"
> Sys.time() %>% as.number()
[1] 1578494887

### 현재 TimeZone 
> Sys.timezone(location = T)
[1] "Asia/Seoul"

### 연산을 위해, 1970년1월1일을 기준으로 초단위로 환산
### (날짜, 연도단위로 환산할수 있지만, 반대연산은 불가)
> dtime <- "1970-01-01 00:00:06" %>% as.POSIXct(tz="GMT")
> dtime %>% as.numeric()
[1] 6
# Asia/Seoul = GMT+9
> dtime <- "1970-01-01 00:00:00" %>% as.POSIXct(tz="Asia/Seoul")
> dtime %>% as.numeric()
[1] -32400

> dday <- "1970-01-02" %>% as.Date()
> dday %>% as.numeric()
[1] 1

# hour*min*sec   24*60*60=86400
> dtime <- "1970-01-02 00:00:00" %>% as.POSIXct(tz="GMT")
> dtime %>% as.numeric()
[1] 86400

00:00:00 disappears 

> dtime <- "2017-09-20 00:00:00"

> dtime %>% as.POSIXct(tz=Sys.timezone())
[1] "2017-09-20 KST"
> 
> print.POSIXct <- function(x,...)print(format(x,"%Y-%m-%d %H:%M:%S"))
> dtime %>% as.POSIXct(tz=Sys.timezone())
[1] "2017-09-20 00:00:00"

변환 작업 순서

  • ISO 8601  표준문자열(“1970-01-01”) 에 근거하여 표현된 날짜/시간은 R에서는 String로 인식하고,
  • 이런 String 데이터는 날짜/시간 변환 함수(as.Date, as.POSIXct, as.POSIXlt, strptime)를 통해 DateTime 데이터로 변환한 후,
  • 날짜/시간 팩키지(zoo, xts, lubridate)를 통해 작업을 진행한다.

String (charactor) -> Date (Date)

lubridate::ymd()
lubridate::as_date()

"2001-10-06" %>% ymd()   
"2001-10-06 00:00:00" %>% as_date()  #[1] "2001-10-06"

as. Date(format=””)

> "06-07-19" %>% as.Date(format="%y-%m-%d")
[1] "2006-07-19"
"1970-01-02" %>% as.Date()                  # "1970-01-02"
"1970-01-02" %>% as.Date() %>% class        # "Date"
"1970-01-02" %>% as.Date() %>% as.numeric() # 1
"1970-01-02" %>% as.Date() %>% typeof()     # "double"
Sys.time() %>% format("%B%d, %a, %H:%M")
[1] "July07, Tue, 13:49"

String (charactor) -> Datetime (POSIXct, POSIXt)

POSIX :  Portable Operating System Interface for Unix (application programming interface)
– ct  : continuous time  (the number of seconds)
POSIXct 클래스는 매우 큰 정수로 시간정보를 데이터프레임으로 저장할 때 유용하다.

– lt :  list time,  (keeps the date as a list of time attributes (such as “hour” and “mon”))
POSIXlt 클래스는  요일, 년, 월, 일 등의 정보를 리스트 자료형으로  리스트 내부 원소로 저장되어 유용하다.

lubridate::mdy_hms
lubridate::parse_date_time
lubridate::as_datetime()

https://lubridate.tidyverse.org/
https://rawgit.com/rstudio/cheatsheets/master/lubridate.pdf
"01/01/2010 00:00:00" %>% mdy_hms(tz="Asia/Seoul") 
"01/01/2010 00:00:00" %>% parse_date_time(orders="%m/%d/%Y %H:%M:%S", tz="Asia/Seoul")
"2001-10-06 00:00:00" %>% as_datetime()   # [1] "2001-10-06 00:00:00"

hour단위 time series 만들기

https://stackoverflow.com/questions/33782218/how-to-create-a-time-series-of-hourly-data
library("lubridate")
startDateTime <- ymd_hms("2020-02-28 23:00:00")  
endDateTime   <- ymd_hms("2020-03-01 13:00:00") 
elapsedHours <- as.numeric(endDateTime- startDateTime)*24

rlt <- startDateTime + hours(0:elapsedHours)
rlt

#  [1] "2020-02-28 23:00:00" "2020-02-29 00:00:00" "2020-02-29 01:00:00" "2020-02-29 02:00:00" "2020-02-29 03:00:00" "2020-02-29 04:00:00"
#  [7] "2020-02-29 05:00:00" "2020-02-29 06:00:00" "2020-02-29 07:00:00" "2020-02-29 08:00:00" "2020-02-29 09:00:00" "2020-02-29 10:00:00"
# ...
# [37] "2020-03-01 11:00:00" "2020-03-01 12:00:00" "2020-03-01 13:00:00"

base:: strptime()

문자열 (character 벡터)를 POSIXlt 클래스로 변환한다.

> "1970-01-01 00:00:00" %>% strptime(format="%Y-%M-%d %H:%M:%S")

> "1970-01-01 00:00:00" %>% strptime(format="%Y-%M-%d %H:%M:%S") %>% typeof()      # [1] "list"
> "1970-01-01 00:00:00" %>% strptime(format="%Y-%M-%d %H:%M:%S") %>% attributes()
  $names
  [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"   "yday"   "isdst"  "zone"   "gmtoff"
  $class
  [1] "POSIXlt" "POSIXt" 
> strptime("1970-01-01 00:00:00", format="%Y-%M-%d %H:%M:%S") %>% class
[1] "POSIXlt" "POSIXt" 
> strptime("1970-01-01 00:00:00", format="%Y-%M-%d %H:%M:%S") %>% attributes()
$names
 [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"   "yday"   "isdst"  "zone"   "gmtoff"
$class
[1] "POSIXlt" "POSIXt" 
> strptime("1970-01-01 00:00:00", format="%Y-%M-%d %H:%M:%S") %>% typeof()
[1] "list"
> strptime("1970-01-01 00:00:00", format="%Y-%M-%d %H:%M:%S") %>% as.numeric()
[1] -32400

> strptime("1970-01-01 00:00:00.975", format="%Y-%M-%d %H:%M:%OS")
[1] "1970-07-01 00:00:00.975 KST"

as.POSIXct()

(tz="")

"1970-01-01 00:00:06" %>% as.POSIXct(tz="GMT")                  # "1970-01-01 00:00:06"
"1970-01-01 00:00:06" %>% as.POSIXct(tz="GMT") %>% class        # "POSIXct" "POSIXt"
"1970-01-01 00:00:06" %>% as.POSIXct(tz="GMT") %>% as.numeric() # 6
# Asia/Seoul = GMT+9
# hour*min*sec   24*60*60=86400

(format="")

dtime1 <- "2018-11-01 \\xbf\\xc0\\xc0\\xfc 10:36:07" %>% str_replace("\\xbf\\xc0\\xc0\\xfc", "AM")
#dtime2 <- "2018-11-01 \\xbf\\xc0\\xc8\\xc4 10:02:14" %>% str_replace("\\xbf\\xc0\\xc8\\xc4", "PM")
dtime1 %>% as.POSIXct(format="%Y-%m-%d %p %I:%M:%S")

연도별 평균값, 월별 평균, 일별 평균 등을 구하기 위해 list형태로 시간을 나누어, year, mon, wday등을 활용한다.

> as.POSIXlt("1970-01-01 00:00:00") %>% attributes()
$names
 [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"   "yday"   "isdst"  "zone"   "gmtoff"
$class
[1] "POSIXlt" "POSIXt" 

> as.POSIXlt("1970-01-01 00:00:00") %>% typeof()
[1] "list"

> as.POSIXlt("1970-01-01 00:00:00") %>% unlist
   sec    min   hour   mday    mon   year   wday   yday  isdst   zone gmtoff 
   "0"    "0"    "0"    "1"    "0"   "70"    "4"    "0"    "0"  "KST"     NA 

> as.POSIXlt("1970-01-01 00:00:00") %>% unclass()
$sec
[1] 0
$min
[1] 0
$hour
[1] 0
$mday
[1] 1
$mon
[1] 0
$year
[1] 70
$wday
[1] 4
$yday
[1] 0
$isdst
[1] 0
$zone
[1] "KST"
$gmtoff
[1] NA

as.POSIXlt()

연도별 평균값, 월별 평균, 일별 평균 등을 구하기 위해 list형태로 시간을 나누어, year, mon, wday등을 활용한다.

> as.POSIXlt("1970-01-01 00:00:00") %>% attributes()
$names
 [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"   "yday"   "isdst"  "zone"   "gmtoff"
$class
[1] "POSIXlt" "POSIXt" 
> as.POSIXlt("1970-01-01 00:00:00") %>% typeof()
[1] "list"
> as.POSIXlt("1970-01-01 00:00:00") %>% as.numeric()
[1] -32400

> as.POSIXlt("1970-01-01 00:00:00") %>% unlist
   sec    min   hour   mday    mon   year   wday   yday  isdst   zone gmtoff 
   "0"    "0"    "0"    "1"    "0"   "70"    "4"    "0"    "0"  "KST"     NA 
> as.POSIXlt("1970-01-01 00:00:00") %>% unclass()
$sec
[1] 0
$min
[1] 0
$hour
[1] 0
$mday
[1] 1
$mon
[1] 0
$year
[1] 70
$wday
[1] 4
$yday
[1] 0
$isdst
[1] 0
$zone
[1] "KST"
$gmtoff
[1] NA

Date, DateTime (Date, POSIXct) -> String (charactor)

format(), strftime()

### Date -> String
> as.POSIXlt("1970-01-01 01:02:03") %>% format("%Y-%M-%d")   # [1] "1970-02-01"
> as.POSIXlt("1970-01-01 01:02:03") %>% strftime("%Y-%m-%d") # [1] "1970-01-01"

### DateTime -> String
> as.POSIXlt("1970-01-01 01:02:03") %>% format("%Y-%M-%d %H:%M:%S")    # [1] "1970-02-01 01:02:03"
> as.POSIXlt("1970-01-01 01:02:03") %>% format("%y-%m-%d %I:%M:%S %p") # [1] "70-01-01 01:02:03 AM"
# strftime is wrapper for format.POSIXlt, and it and format.POSIXct
> as.POSIXlt("1970-01-01 01:02:03") %>% strftime("%y-%m-%d %I:%M:%S %p") #[1] "70-01-01 01:02:03 AM"

stringr:: str_to_sentence()

ymd_hms("1970-01-01 01:02:03 KST") %>% str_to_sentence()

Date -> DateTime

> tt <- c("09:12", "17:01")
[1] "09:12" "17:01"
> tt %>% class
[1] "character"

> library(chron)
> times(str_c(tt, ":00.100"))
[1] 09:12:00 17:01:00
> times(str_c(tt, ":00.100")) %>% class
[1] "times"

String-> time

dtime1 <- "2018-11-01 \\xbf\\xc0\\xc0\\xfc 10:36:07" %>% str_replace("\\xbf\\xc0\\xc0\\xfc", "AM")
dtime2 <- "2018-11-01 \\xbf\\xc0\\xc8\\xc4 10:02:14" %>% str_replace("\\xbf\\xc0\\xc8\\xc4", "PM")

dtime1 %>% as.POSIXct(format="%Y-%m-%d %p %I:%M:%S")

String-> 날짜형식String (ISO8601표준에 맞게)

dtime1 <- "2018-11-01 \\xbf\\xc0\\xc0\\xfc 10:36:07" %>% str_replace("\\xbf\\xc0\\xc0\\xfc", "AM")
dtime2 <- "2018-11-01 \\xbf\\xc0\\xc8\\xc4 10:02:14" %>% str_replace("\\xbf\\xc0\\xc8\\xc4", "PM")

dtime1 %>% as.POSIXct(format="%Y-%m-%d %p %I:%M:%S")

여러 포멧의 날짜데이터를 (format정의없이) POSIX나 Date 포멧을 converting해준다.

> anytime::anydate(c("19760811","1976-08-12","08-13-1976"))
[1] "1976-08-11" "1976-08-12" "1976-08-13"
> anytime::anydate(c("1976-Aug-14","1976aug15","16aug1976","Aug-17-1976","18 Aug 1976"))
[1] "1976-08-14" "1976-08-15" "1976-08-16" "1976-08-17" "1976-08-18"
> anytime::anydate(c("19th August 1976"))
[1] NA
http://dirk.eddelbuettel.com/code/anytime.html
result <- dd$tot_time %>% lapply( function(x){
    #x <- tstr[10]
    tstr <- str_split(x,"d ")[[1]]
    if(length(tstr)>1) {
        dtime <- str_c("2018-07-18"," ",tstr[2])
        dtime <- strptime(dtime, "%Y-%d-%d %H:%M:%OS") + days(tstr[1])
    }else{
        dtime <- str_c("2018-07-18",tstr)
        dtime <- strptime(dtime, "%Y-%d-%d %H:%M:%OS")
    }
    dtime
})
result <- do.call("c", result)

library(anytime)
dd[ , tot_time0:= anytime(result)]

<참고> as.POSIXct 에 관해

POSIXct 타입은 내부적으로는 정수를 저장하고 있기 때문에,
Plotting할대 숫자처럼 순차적으로 활용할수 있으면, ggplot에서 scale_x_date / scale_x_time/ scale_x_datetime 를 활용하여 표현가능하다.

DT <- data.table(
  date = Sys.Date() - 0:29,
  price = runif(30)
)
p <- DT %>% ggplot(aes(date, price)) + geom_line()
p + scale_x_date(date_breaks="1 week", date_labels="%W",date_minor_breaks="1 day")
> ymd_hms("2020-01-22 13:01:59.23") %>% ceiling_date("day")  #[1] "2020-01-23 00:00:00"
> ymd_hms("2020-01-22 13:01:59.23") %>% round_date("day")    #[1] "2020-01-23 00:00:00"
> ymd_hms("2020-01-22 13:01:59.23") %>% floor_date("day")    #[1] "2020-01-22 00:00:00"
## strings into POSIXct date-time.
library("lubridate")

"01/01/2010 00:00:00" %>% parse_date_time("%m/%d/%Y %H:%M:%S", tz="Asia/Seoul")
"01/01/2010 00:00:00" %>% mdy_hms(tz="Asia/Seoul") %>% attributes()
20101215 %>%  ymd()

## POSIXct date-time into strings/ numeric
ymd_hms("2010-12-13 15:30:30 KST") %>% force_tz("America/Chicago") %>% with_tz()
ymd_hms("2010-12-13 15:30:30 KST", tz="Asia/Seoul")  %>% with_tz()

time <- ymd_hms("2010-12-13 15:30:30", tz="Asia/Seoul")
time %>% with_tz()

# 연산
> dmy("14/10/1979") %>% month()
[1] 10
> diffdd <- (ymd_hms("2010-12-13 16:30:30") - ymd_hms("2010-12-13 15:10:20"))
> diffdd %>% as.period 
[1] "1H 20M 10S"
> diffdd %>% as.period %>% as.numeric(unit="hours")
[1] 1.336111

3. 날짜/시간 팩키지 활용

String에서 변환된 날짜/시간 객체는 여러 팩키지를 활용가능하다.

zoo

xts

Categories: R Reshaping

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Oldest
Newest Most Voted
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x