scan vs. readLines
http://www.ats.ucla.edu/stat/r/modules/raw_data.htm
http://www.clemson.edu/economics/faculty/wilson/R-tutorial/data.html
scan() 같은 type 일때나 같은 type으로 읽기=>what으로 type설정
라인별 separator 설정가능(sep=”
“), 파일을 읽는 가장 빠른 방법
readLines() 헤더에 숫자와 문자 섞여 있는 경우 유용
scan(file="", what=double(), sep="", n=-1, nmax=-1, encoding="unknown", skipNul=FALSE, quote=if(identical(sep, " ")) "" else "'\"", dec = ".", skip=0, nlines=0, na.strings="NA", flush=F, fill=F, strip.white=F, quiet=F, blank.lines.skip=T, multi.line=T, comment.char="", allowEscapes=F, fileEncoding="", text) readLines(con=stdin(), n=-1L, encoding="unknown", skipNul=F,ok=T, warn=T)
- 데이터 문자일 경우, 꼭 what=”character”지정
- n=읽어들일 데이터 최대갯수, nlines=읽어들일 라인의 갯수, skip=지정한 갯수만큼 라인 건너뜀
URL <- "https://www.stat.uiowa.edu/~luke/data/river.dat" river <- scan(URL) river <- readLines(URL) riverData <- data.table(index=seq_along(river), data=river)
myData=" abc 123 6 y " write(myData,"raw.txt")
scan()을 사용해 행단위로 읽기
matrix( , byrow=) 행렬의 원소를 행단위로, 파일내 행렬
> rowVec <- scan("raw.txt", what="character") Read 4 items # [1] "abc" "123" "6" "y" > rowVec %>% matrix(nrow=2) [,1] [,2] [1,] "abc" "6" [2,] "123" "y"
ex)
https://stackoverflow.com/questions/29803117/in-r-how-to-read-file-with-custom-end-of-line-eol
11,Walking,1787072368000,-2.56,8.58,1.879608; 11,Walking,1867172313000,4.4,4.4, 11,Walking,1867222270000,5.48,8.43,9.724928; 13,Downstairs,3498582255000,0.72,10.12,3.173541; 13,Downstairs,3498732310000,5.13,10.42,4.671779;15,Jogging,328982249000,5.37,-3.36,7.3141265; 15,Jogging,329032176000,-1.42,2.26,-5.6252036; 19,Sitting,131583611665000,9.04,-0.46,2.53; 19,Sitting,131583691469000,8.85,-0.46,2.41;
myText=" 11,Walking,1787072368000,-2.56,8.58,1.879608; 11,Walking,1867172313000,4.4,4.4, 11,Walking,1867222270000,5.48,8.43,9.724928; 13,Downstairs,3498582255000,0.72,10.12,3.173541; 13,Downstairs,3498732310000,5.13,10.42,4.671779;15,Jogging,328982249000,5.37,-3.36,7.3141265; 15,Jogging,329032176000,-1.42,2.26,-5.6252036; 19,Sitting,131583611665000,9.04,-0.46,2.53; 19,Sitting,131583691469000,8.85,-0.46,2.41; " write(myText,"raw.txt")
rowVec <- scan("raw.txt", what=character(), sep=";") #rowVec <- rowVec[rowVec!=""] rowList <- sapply(rowVec, strsplit, ",") dd <- do.call(rbind, rowList) %>% data.table()
rowVec <- scan("raw.txt", what=character(), sep=";") rowList <- lapply(rowVec, function(x){ rlt <- strsplit(x, ",") %>% unlist() if (length(rlt)==6) return(rlt) }) dd <- do.call(rbind, rowList) %>% data.table()