my_string <- " A cat is a small
and furry animal. "
my_string2 <- str_squish(my_string)
my_string2
## [1] "A cat is a small and furry animal."
str_to_lower(my_string2)
## [1] "a cat is a small and furry animal."
str_to_upper(my_string2)
## [1] "A CAT IS A SMALL AND FURRY ANIMAL."
str_to_sentence(my_string2)
## [1] "A cat is a small and furry animal."
str_to_title(my_string2)
## [1] "A Cat Is A Small And Furry Animal."
text_and_numbers <- tibble( text = c("Andrew", "33", "12.45",
"-1.00", "Inf"))
text_and_numbers |> mutate(numbers = as.numeric(text),
integers = as.integer(text)) |> kable()
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `numbers = as.numeric(text)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
text | numbers | integers |
---|---|---|
Andrew | NA | NA |
33 | 33.00 | 33 |
12.45 | 12.45 | 12 |
-1.00 | -1.00 | -1 |
Inf | Inf | NA |
sets <- c("A1", "A2", "B1", "B4", "C5")
str_extract(sets, "[0-9]")
## [1] "1" "2" "1" "4" "5"
str_extract(sets, "[A-Z]")
## [1] "A" "A" "B" "B" "C"
library(glue)
library(unglue)
a <- 1
b <- 6
c <- 15.63
my_string3 <- glue("The numbers a, b, and c are {a}, {b}, and {c}, respectively. Their sum is {a+b+c}.")
my_string3
## The numbers a, b, and c are 1, 6, and 15.63, respectively. Their sum is 22.63.
unglue(my_string3, "The numbers a, b, and c are {a}, {b}, and {c}, respectively. Their sum is {d}.")
## $`1`
## a b c d
## 1 1 6 15.63 22.63
my_strings1 <- tibble(greeting = c("My name is Andrew.",
"My name is Li.",
"My name is Emily."))
unglue_unnest(my_strings1,
greeting,
"My name is {name}.",
remove=FALSE) |> kable()
greeting | name |
---|---|
My name is Andrew. | Andrew |
My name is Li. | Li |
My name is Emily. | Emily |
mpg |> ggplot(aes(x = cty,
y = trans)) +
geom_boxplot()
mpg |> ggplot(aes(x = cty,
y = fct_reorder(trans, cty, min, .desc=TRUE))) +
geom_boxplot()
Simplify the number of categories
mpg |>
ggplot(aes(y = cty,
x = fct_lump(trans, 4) |> fct_reorder(cty, median))) +
geom_boxplot()
mpg |>
ggplot(aes(x = cty,
y = fct_lump(trans, 4) |> fct_reorder(cty, median))) +
geom_boxplot()
Using lubridate package
today()
## [1] "2024-03-26"
now()
## [1] "2024-03-26 11:17:39 ADT"
now(tz = "UTC")
## [1] "2024-03-26 14:17:39 UTC"
now(tz = "America/Toronto")
## [1] "2024-03-26 10:17:39 EDT"
now(tz = "Asia/Shanghai")
## [1] "2024-03-26 22:17:39 CST"
List of timezones: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
dt1 <- tibble(text_date = c("1999-01-31", "2000-02-28", "2010-06-28",
"2024-03-14", "2021-02-29"),
date = ymd(text_date))
## Warning: 1 failed to parse.
dt1 |> arrange(date)
## # A tibble: 5 × 2
## text_date date
## <chr> <date>
## 1 1999-01-31 1999-01-31
## 2 2000-02-28 2000-02-28
## 3 2010-06-28 2010-06-28
## 4 2024-03-14 2024-03-14
## 5 2021-02-29 NA
Crazy formats!
tibble(date = c("Jan 5, 1999", "Saturday May 16, 70", "8-8-88",
"December 31/99", "Jan 1, 01"),
decoded = mdy(date)) |> kable()
date | decoded |
---|---|
Jan 5, 1999 | 1999-01-05 |
Saturday May 16, 70 | 1970-05-16 |
8-8-88 | 1988-08-08 |
December 31/99 | 1999-12-31 |
Jan 1, 01 | 2001-01-01 |
With times
dt2 <- tibble(text_date = c("1999-01-31 09:14", "2000-02-28 12:15",
"2010-06-28 23:45",
"2023-03-10 00:15", "2023-03-10 01:15", "2023-03-10 02:15", "2023-03-10 03:15",
"2024-03-14 07:00 AM", "2021-03-01 6:16 PM"),
date_time = ymd_hm(text_date, tz="America/Halifax"))
dt2 |> kable()
text_date | date_time |
---|---|
1999-01-31 09:14 | 1999-01-31 09:14:00 |
2000-02-28 12:15 | 2000-02-28 12:15:00 |
2010-06-28 23:45 | 2010-06-28 23:45:00 |
2023-03-10 00:15 | 2023-03-10 00:15:00 |
2023-03-10 01:15 | 2023-03-10 01:15:00 |
2023-03-10 02:15 | 2023-03-10 02:15:00 |
2023-03-10 03:15 | 2023-03-10 03:15:00 |
2024-03-14 07:00 AM | 2024-03-14 07:00:00 |
2021-03-01 6:16 PM | 2021-03-01 18:16:00 |
t1 <- now()
year(t1)
## [1] 2024
day(t1)
## [1] 26
hour(t1)
## [1] 11
decimal_date(t1)
## [1] 2024.233
yday(t1)
## [1] 86
date_decimal(2022.95)
## [1] "2022-12-13 18:00:00 UTC"
dt2 |> mutate(r = rnorm(n(), 20, 3)) |>
ggplot(aes(x = date_time, y = r)) +
geom_point() +
# scale_x_datetime(date_labels = "%Y %Z\n%b-%d %H:%M:%S")
scale_x_datetime(date_labels = "%Y-%b-%d")