Practice making plots of these data: gss_sm
,
gss_lon
, penguins
, penguins_raw
,
gapminder
, and many others in the datasets
package.
Examine the data using View
, glimpse
.
glimpse(ToothGrowth)
## Rows: 60
## Columns: 3
## $ len <dbl> 4.2, 11.5, 7.3, 5.8, 6.4, 10.0, 11.2, 11.2, 5.2, 7.0, 16.5, 16.5,…
## $ supp <fct> VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, VC, V…
## $ dose <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0, …
x, y, color, fill, shape, size, group
Which should you use with categorical data? quantitative data? A mixture? What happens if you use the wrong kind of data?
Take a look at the help for each geom using the Help panel in Rstudio or on the ggplot website.
As we practice, we will describe each geom here and provide some examples.
gss_sm |> ggplot(aes(y = region)) + geom_bar()
gss_sm |> ggplot(aes(y = region, color = sex)) + geom_bar()
gss_sm |> ggplot(aes(y = region, fill = sex)) + geom_bar()
glimpse(penguins)
## Rows: 344
## Columns: 8
## $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel…
## $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse…
## $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
## $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
## $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
## $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
## $ sex <fct> male, female, female, NA, female, male, female, male…
## $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…
penguins |> ggplot(aes(x = flipper_length_mm)) + geom_histogram(binwidth = 5)
## Warning: Removed 2 rows containing non-finite values (`stat_bin()`).
penguins |> ggplot(aes(x = flipper_length_mm, fill = species)) + geom_histogram(binwidth = 5)
## Warning: Removed 2 rows containing non-finite values (`stat_bin()`).
penguins |> ggplot(aes(x = flipper_length_mm, fill = island)) + geom_histogram(binwidth = 5)
## Warning: Removed 2 rows containing non-finite values (`stat_bin()`).
Can we show two categorical variables on one plot?
penguins |> ggplot(aes(species, island)) + geom_count()
penguins |> ggplot(aes(species, island)) + geom_jitter()
penguins |> ggplot(aes(species, island)) + geom_jitter(aes(color = sex)) # a bit random for my taste
penguins |> ggplot(aes(species, island)) + geom_count(aes(color = sex)) # not sure I understand this one
labs(x = "...", y = "...", title = "...", color = "...", fill = "...")
.penguins |> ggplot(aes(species, island, color = sex)) + geom_jitter() +
labs(x = "Species", y = "Island", color = "Sex")
Make the text bigger with
theme(text = element_text(size = 15))
.
Turn off the grey shading with theme_bw()
Always use +
to “add” parts of a ggplot
together.
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
gapminder |> ggplot(aes(x = gdpPercap, y = lifeExp )) + geom_point() +
labs(x = "GDP per capita ($)", y = "Life expectancy (years)")
gapminder |> ggplot(aes(x = gdpPercap, y = lifeExp,
color = year, shape = continent)) + geom_point() +
labs(x = "GDP per capita ($)", y = "Life expectancy (years)")
gapminder |> ggplot(aes(x = log10(gdpPercap), y = lifeExp,
color = year, shape = continent)) + geom_point() +
labs(x = "GDP per capita ($)", y = "Life expectancy (years)")
Can we see how GDP per capita varies with year? And maybe continent too?
gapminder |> ggplot(aes(y = log10(gdpPercap), x = year)) + geom_point()
gapminder |> ggplot(aes(y = log10(gdpPercap), x = year)) + geom_boxplot() # ugh!!
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
gapminder |> ggplot(aes(y = log10(gdpPercap), x = factor(year))) + geom_boxplot()
gapminder |> ggplot(aes(y = log10(gdpPercap), x = factor(year), fill = continent)) + geom_boxplot()
gapminder |> ggplot(aes(y = log10(gdpPercap), fill = factor(year), x = continent)) + geom_boxplot()
Left here as more examples.
penguins |> ggplot(aes(x = sex)) + geom_bar()
penguins |> ggplot(aes(x = sex, color = species)) + geom_bar()
penguins |> ggplot(aes(x = sex, fill = species)) + geom_bar()
penguins |> ggplot(aes(fill = sex, x = species)) + geom_bar()
penguins |> ggplot(aes(fill = sex, x = species)) + geom_bar(position = "dodge")
penguins |> ggplot(aes(x = species, y = sex)) + geom_count()
# penguins |> ggplot(aes(x = species)) + geom_count() # fails can't drqw with only one of x or y
penguins |> ggplot(aes(x = species, y = island, color = sex)) + geom_count(position = "jitter") # terrible, but fun!
penguins |> ggplot(aes(flipper_length_mm, bill_length_mm)) + geom_point()
## Warning: Removed 2 rows containing missing values (`geom_point()`).
penguins |> ggplot(aes(flipper_length_mm, bill_length_mm, color = species)) + geom_point()
## Warning: Removed 2 rows containing missing values (`geom_point()`).
penguins |> ggplot(aes(flipper_length_mm, bill_length_mm, color = sex)) + geom_point()
## Warning: Removed 2 rows containing missing values (`geom_point()`).
penguins |> na.omit() |> ggplot(aes(flipper_length_mm, bill_length_mm, color = species, size = sex)) + geom_point()
## Warning: Using size for a discrete variable is not advised.
penguins |> ggplot(aes(body_mass_g, bill_length_mm)) + geom_line() # works, but silly
## Warning: Removed 2 rows containing missing values (`geom_line()`).
gapminder |> filter(country %in% c("China", "Canada", "India", "France", "Argentina", "Lybia")) |>
ggplot(aes(year, lifeExp)) + geom_line()
gapminder |> filter(country %in% c("China", "Canada", "India", "France", "Argentina", "Lybia")) |>
ggplot(aes(year, lifeExp, color = country)) + geom_line()
gapminder |>
ggplot(aes(year, lifeExp, group = country)) + geom_line()
gapminder |> ggplot(aes(x = continent, y = lifeExp)) + geom_boxplot()
gapminder |> ggplot(aes(x = year, y = lifeExp)) + geom_boxplot() # doesn't work the way I hoped!
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
gapminder |> ggplot(aes(x = factor(year), y = lifeExp)) + geom_boxplot()
gapminder |> ggplot(aes(x = factor(year), y = lifeExp, color = continent)) + geom_boxplot()
gapminder |> ggplot(aes(lifeExp, fill = continent)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
gapminder |> ggplot(aes(lifeExp, color = continent)) + geom_density()
gapminder |> ggplot(aes(y = lifeExp, x = continent, color = factor(year))) + stat_summary() +
labs(x = "Continent", y = "Life Expectancy (years)", color = "Year") +
theme_bw()
## No summary function supplied, defaulting to `mean_se()`