In this demonstration, we will practice adding smooths (regression lines) to plots, including using colour and facets to create multiple lines.
Plot life expectancy by year, facet on continent, add regression line by continent, show a thin line for each country. Possibly show a regression line for each country instead of data.
gapminder |> ggplot(aes(x = year, y = lifeExp)) +
# geom_point() +
facet_wrap(~ continent) +
geom_smooth(method = "lm", formula = y ~ x, aes(group = country), se = FALSE,
linewidth = 0.5, color = "lightgray", alpha = 0.1) +
geom_smooth(method = "lm", formula = y ~ x) +
theme_bw()
Use geom_quantile
and quantiles = ...
to do
quantile regression. Try a polynomial instead of a straight line.
gapminder |> ggplot(aes(x = year, y = lifeExp)) +
# geom_point() +
facet_wrap(~ continent) +
geom_quantile(method = "rqss", formula = y ~ x, quantiles = c(0.25, 0.75)) +
theme_bw()
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "ndiMatrix" of class "replValueSp"; definition not updated
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
## Warning in sparse.model.matrix(object, data = data, contrasts.arg =
## contrasts.arg, : non-list contrasts argument ignored
Show increase in life expectancy as a function of GDP per capita. Add an appropriate regression line.
gapminder |> ggplot(aes(gdpPercap, lifeExp)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x) +
scale_x_log10()
gapminder |> ggplot(aes(gdpPercap, lifeExp)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ log(x))
Compare regression on the mean (ordinary least squares, linear model, lm) with quantile regression on the median (geom_quantile, quantiles = 0.5).
Plot full data and add a smooth.
co2 <- read_table("https://www.esrl.noaa.gov/gmd/webdata/ccgg/trends/co2/co2_mm_mlo.txt",
comment = "#", col_names = FALSE) |>
rename(year = X1,
decimal_year = X3,
co2 = X4)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double()
## )
co2 |> ggplot(aes(decimal_year, co2)) + geom_line()
Show interannual cycle for CO2 after subtracting annual mean, plot as a function of the fraction of the year elapsed, smooth with a LOESS or GAM.
co2 |> group_by(year) |>
mutate(annual_mean = mean(co2),
co2_season = co2 - annual_mean) |>
ggplot(aes(decimal_year, co2_season)) +
geom_line()
co2 |> group_by(year) |>
mutate(annual_mean = mean(co2),
co2_season = co2 - annual_mean) |>
ggplot(aes(factor(decimal_year - year) , co2_season)) +
geom_boxplot()
co2 |> group_by(year) |>
mutate(annual_mean = mean(co2),
co2_season = co2 - annual_mean) |>
ggplot(aes(factor(X2) , co2_season)) +
geom_boxplot()
Try some boxplots, facet by decade, other variations.
Build up a complex plot piece by piece.
gapminder |> ggplot(aes(year, lifeExp)) + geom_point()
gapminder |> ggplot(aes(year, lifeExp)) + geom_point() +
facet_grid(continent ~ .)
gapminder |> ggplot(aes(year, lifeExp)) +
geom_smooth() +
facet_grid(continent ~ .)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
gapminder |> ggplot(aes(year, lifeExp)) +
geom_smooth(method = "lm") +
facet_grid(continent ~ .)
## `geom_smooth()` using formula = 'y ~ x'
gapminder |> ggplot(aes(year, lifeExp)) +
geom_smooth(aes(group = country), se = FALSE, color = "#00000040", size = 0.4) +
geom_smooth(color = "blue") +
facet_grid(continent ~ .) +
theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Quantile regression
gapminder |> ggplot(aes(year, lifeExp)) +
geom_quantile() +
facet_grid(continent ~ .)
## Smoothing formula not specified. Using: y ~ x
## Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
gapminder |> ggplot(aes(year, lifeExp)) +
geom_quantile(quantiles = c(0.1, 0.5, 0.9), formula = y ~ poly(x,2)) +
facet_grid(continent ~ .)
## Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
## Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
## Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
Compare regression on the mean (ordinary least squares, linear model, lm) with quantile regression on the median (tau = 0.5).
gapminder |>
# filter(continent %in% c("Africa", "Americas", "Asia")) |>
ggplot(aes(gdpPercap, lifeExp)) +
scale_x_log10() +
geom_quantile(quantiles = 0.5, formula = y ~ x) +
geom_smooth(method = "lm", formula = y ~ x, color = "green") +
facet_grid(continent ~ .)
# facet_wrap(continent ~ ., ncol=1)
Get data from previous lesson (reading data):
co2 <- read_table("https://www.esrl.noaa.gov/gmd/webdata/ccgg/trends/co2/co2_mm_mlo.txt",
comment = "#", col_names = FALSE)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double()
## )
co2 |> ggplot(aes(X3, X4)) + geom_point(size = 0.4) +
labs(x = "Year", y = "Atmospheric pCO2 (ppmv)")
co2 |> ggplot(aes(X3, X4)) + geom_point(size = 0.4) +
geom_smooth() +
labs(x = "Year", y = "Atmospheric pCO2 (ppmv)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
co2 |> ggplot(aes(X3, X4)) +
geom_smooth() +
labs(x = "Year", y = "Atmospheric pCO2 (ppmv)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
What’s the annual cycle? Subtract the mean CO2 computed from each year. Compute the fractional part of th e year.
co2 |> group_by(X1) |>
mutate(fractional_year = X3 - X1,
mean_co2 = mean(X4),
co2_deviation = X4 - mean_co2) |>
ungroup() |>
ggplot(aes(fractional_year, co2_deviation)) +
geom_point()
co2_modified <- co2 |> group_by(X1) |>
mutate(fractional_year = X3 - X1,
mean_co2 = mean(X4),
co2_deviation = X4 - mean_co2) |>
ungroup()
co2_modified |>
ggplot(aes(factor(X2), co2_deviation)) +
geom_boxplot()
co2_modified |>
ggplot(aes(fractional_year, co2_deviation)) +
geom_smooth() + theme_bw()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
lubridate::date_decimal(2022.10)
## [1] "2022-02-06 11:59:59 UTC"
lubridate::date_decimal(2022.77)
## [1] "2022-10-09 01:11:59 UTC"
lubridate::decimal_date(ymd("2022-10-11"))
## [1] 2022.775
Make a decade by decade plot
co2 |> mutate(decade = 10*floor(X1/10),
decimal_decade = X3 - decade) |>
ggplot(aes(decimal_decade, X4)) +
geom_point() +
facet_grid(decade ~ . , scale = "free_y")
co2 |> mutate(decade = 10*floor(X1/10),
decimal_decade = X3 - decade) |>
ggplot(aes(decimal_decade, X4)) +
geom_smooth(method = "loess", span = 0.1) +
geom_point(size = 0.2) +
facet_grid(decade ~ . , scale = "free_y")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 8.1939
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.093768
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 0.0082751
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 8.1939
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.093768
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 0.0082751
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
co2 |> mutate(decade = 10*floor(X1/10),
decimal_decade = X3 - decade) |>
ggplot(aes(decimal_decade, X4)) +
geom_quantile(quantiles = c(0.1, 0.9)) +
geom_point(size = 0.2) +
facet_grid(decade ~ . , scale = "free_y")
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
## Smoothing formula not specified. Using: y ~ x
co2 |> filter(X1 > 2019) |>
ggplot(aes(X3, X4)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
geom_point()
gam1 <- mgcv::gam(X4 ~ s(dd, bs = "cc") + s(X1),
data = co2 |> filter(X1 > 2010) |> mutate(dd = X3 - X1) )
broom::augment(gam1,
co2 |> filter(X1 > 2010) ) |>
ggplot() +
geom_line(aes(X3, .fitted)) +
geom_point(aes(X3, X4))