Data Parsing

Creating A Matrix

tibble(
  x = 1:5, 
  y = 1, 
  z = x ^ 2 + y
)

#    A tibble: 5 x 3
#       x     y     z
#    <int> <dbl> <dbl>
# 1     1     1     2
# 2     2     1     5
# 3     3     1    10
# 4     4     1    17
# 5     5     1    26

#names of each colomn can be named as "..."
tribble(
  ~x, ~y, ~z,
  "a", 2, 3.6,
  "b", 1, 8.5
)

#form the matrix according to the original position of elements

tibble--some tips

1.

df <- data.frame(abc = 1, xyz = "a")
df$x
#> [1] "a"

The $ operator will match any column name that starts with the name following it. Since there is a column named xyz, the expression df$x will be expanded to df$xyz.

2.

enframe() converts named vectors to a data frame with names and values

enframe(c(a = 1, b = 2, c = 3))
#> # A tibble: 3 x 2
#>   name  value
#>   <chr> <dbl>
#> 1 a         1
#> 2 b         2
#> 3 c         3

 

Subsetting 

# Extract by name
df$x
df[["x"]]

# Extract by position
df[[1]]

#use a pipe
df %>% .$x
df %>% .[["x"]]

readr

`read_csv()` reads comma delimited files

`read_delim()` reads in files with any delimiter

read_csv(
"a,b,c  #the first row is regarded as names of variables
1,2,3
4,5,6")  

read_csv(
"a,b,c
1,2,3
4,5,6",skip=2)
#add `skip = n` to skip the first `n` lines; or add `comment = "#"` to drop all lines that start with (e.g.) `#`
#add `col_names = FALSE` to label the first row sequentially from `X1` to `Xn`;or add col_names = c("x", "y", "z") to label them as designed
#add na = "." to treat `.` in the data as NA

read_delim("a;b\n1;3",delim=";")

#a special example about quotes
x <- "x,y\n1,'a,b'"
read_delim(x, delim=",", quote = "'")
#> # A tibble: 1 x 2
#>       x y    
#>   <dbl> <chr>
#> 1     1 a,b

parse_* 

1.parse_logical(c("TRUE", "FALSE", "NA"))  

2.parse_integer(c("1", "2", "3"))

3.parse_number()

parse_number("$100")
parse_number("20%")
parse_number("It costs $123.45")
#[1] 100
#[1] 20
#[1] 123.45

parse_number("123.456.789", locale = locale(grouping_mark = "."))
#[1] 123456789
parse_number("123.456.789", locale = locale(decimal_mark = "."))
#[1] 123.456
grouping_mark is the delimiter that devides a number;
decimal_mark denotes the decimal point

4.parse_factor()

fruit <- c("apple", "banana")
parse_factor(c("apple", "banana", "bananana"), levels = fruit)
parse_factor(c("apple", "banana", "banana"), levels = fruit)

#[1] apple  banana <NA>  
#[1] apple  banana banana

5.parse_datetime()

parse_datetime("2010-10-01T2010")
parse_datetime("20101010")
#[1] "2010-10-01 20:10:00 UTC"
#[1] "2010-10-10 UTC"

parse_datetime("01/02/15", "%m/%d/%y")
#[1] "2015-01-02 UTC"

6.parse_time() #the same goes parse_date()

parse_time("01:10 pm")
parse_time("20:10:01")
#13:10:00
#20:10:01

Settings about date and time:

Year:

%Y(4 figures) 

%y(2 figures)

Month:

%m(2 figures)

%b(simplified,like Jan)

%B(complete,like January)

Day:

%d

Hour:

%H(0-23)

%I(0-12,which must pair with %p)

%p(a.m. or p.m.)  #like ("%I%M:%OS %p")

Minute:

%M

Second:

%OS

#Exercise
d1 <- "January 1, 2010"
d2 <- "2015-Mar-07"
d3 <- "06-Jun-2017"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d5 <- "12/30/14" # Dec 30, 2014
t1 <- "1705"
t2 <- "11:15:10.12 PM"

parse_date(d1, "%B %d, %Y")
#> [1] "2010-01-01"
parse_date(d2, "%Y-%b-%d")
#> [1] "2015-03-07"
parse_date(d3, "%d-%b-%Y")
#> [1] "2017-06-06"
parse_date(d4, "%B %d (%Y)")
#> [1] "2015-08-19" "2015-07-01"
parse_date(d5, "%m/%d/%y")
#> [1] "2014-12-30"
parse_time(t1, "%H%M")
#> 17:05:00
parse_time(t2, "%H:%M:%OS %p")
#> 23:15:10.12

 

Guess you like

Origin blog.csdn.net/weixin_51674826/article/details/116695749