In this live demonstration I’ll review many important features of R markdown reports and give tips for how to work productively while writing reports you can reproduce later.
library(palmerpenguins)
peng_subset <- penguins |> filter(species == "Adelie")
peng_subset |> ggplot(aes(bill_depth_mm)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
A paragraph is a sequence of lines separated by a blank line.
This is a second paragraph.
penguins |> ggplot(aes(bill_length_mm)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
A histogram of bill length from three species of penguins on three islands in the Antarctic peninsula near Palmer Station.
Contrast with ‘source’ mode
Figure 1. A histogram of penguin bill lengths (in mm).
library(tidyverse)
)here::here()
function)read_csv("../tasks/test-data.csv")
## Rows: 3 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Name, Species
## dbl (1): Mass_kg
## lgl (1): Friendly
## date (1): Birthdate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 3 × 5
## Name Birthdate Mass_kg Species Friendly
## <chr> <date> <dbl> <chr> <lgl>
## 1 A nonymouse 2001-01-01 14.5 <NA> FALSE
## 2 Frank 2008-09-01 4.1 Cat TRUE
## 3 Boojum 1982-07-11 7.2 Dog TRUE
## read_csv("/Users/airwin/git-repos/Stat2430/data-viz-course/data-viz/tasks/test-data.csv")
read_csv(here::here("data-viz/tasks", "test-data.csv"))
## Rows: 3 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Name, Species
## dbl (1): Mass_kg
## lgl (1): Friendly
## date (1): Birthdate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 3 × 5
## Name Birthdate Mass_kg Species Friendly
## <chr> <date> <dbl> <chr> <lgl>
## 1 A nonymouse 2001-01-01 14.5 <NA> FALSE
## 2 Frank 2008-09-01 4.1 Cat TRUE
## 3 Boojum 1982-07-11 7.2 Dog TRUE
model1 <- lm(body_mass_g ~ species,
data = penguins) # delete bracket to see red X
summary(model1)
##
## Call:
## lm(formula = body_mass_g ~ species, data = penguins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1126.02 -333.09 -33.09 316.91 1223.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3700.66 37.62 98.37 <2e-16 ***
## speciesChinstrap 32.43 67.51 0.48 0.631
## speciesGentoo 1375.35 56.15 24.50 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 462.3 on 339 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.6697, Adjusted R-squared: 0.6677
## F-statistic: 343.6 on 2 and 339 DF, p-value: < 2.2e-16
p1 <- penguins |> ggplot(aes(body_mass_g)) + geom_histogram()
|>
or
%>%
) vs. assigning objects to temporary names to break
down computations into component partsDo a calculation two ways: with just pipes, and then in a series of steps with named objects I’ll create.
penguins |> na.omit() |>
group_by(species) |>
summarize(mean_body_mass = mean(body_mass_g))
## # A tibble: 3 × 2
## species mean_body_mass
## <fct> <dbl>
## 1 Adelie 3706.
## 2 Chinstrap 3733.
## 3 Gentoo 5092.
penguins |> summarize(mean_flipper_lenght = mean(flipper_length_mm))
## # A tibble: 1 × 1
## mean_flipper_lenght
## <dbl>
## 1 NA
“step-by-step”
penguins_no_na <- penguins |> na.omit()
penguins_group <- penguins_no_na |> group_by(species)
penguins_group |> summarize(mean_body_mass = mean(body_mass_g))
## # A tibble: 3 × 2
## species mean_body_mass
## <fct> <dbl>
## 1 Adelie 3706.
## 2 Chinstrap 3733.
## 3 Gentoo 5092.
penguins_no_na |> summarize(mean_flipper_lenght = mean(flipper_length_mm))
## # A tibble: 1 × 1
## mean_flipper_lenght
## <dbl>
## 1 201.