dat309-wk7-m

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.3.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
nmu <- read.csv("http://euclid.nmu.edu/~joshthom/Teaching/DAT309/Week6/10%20BOS%2010%20Year%20History%20Deidentified%2010.3.23.csv", sep  = "\t")
nmu <- janitor::clean_names(nmu)

compare ACT scores across a few major

library(ggridges)
v <- nmu |> select(major) |> distinct() |> arrange(major)
filter(nmu, major %in% c(v$major[1],v$major[158],v$major[200])) |> 
    ggplot(aes(y = major, x = act, fill = factor(stat(quantile)))) +
    stat_density_ridges(
        geom = "density_ridges_gradient", calc_ecdf = TRUE,
        quantiles = 4, quantile_lines = TRUE
    ) +
    scale_fill_viridis_d(name = "Quartiles")
Warning: `stat(quantile)` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(quantile)` instead.
Picking joint bandwidth of 1.17
Warning: Removed 227 rows containing non-finite outside the scale range
(`stat_density_ridges()`).
Warning: Using the `size` aesthetic with geom_segment was deprecated in ggplot2 3.4.0.
ℹ Please use the `linewidth` aesthetic instead.

Compare 10 random majors and Math

some <- slice_sample(nmu, n = 10) |> select(major) 
filter(nmu, major %in% as.list(some$major) |
            major == "Mathematics") |> 
    ggplot(aes(y = fct_reorder(major,act), x = act, fill = factor(stat(quantile)))) +
    stat_density_ridges(
        geom = "density_ridges_gradient", calc_ecdf = TRUE,
        quantiles = 4, quantile_lines = TRUE
    ) +
    scale_fill_viridis_d(name = "Quartiles")
Warning: `fct_reorder()` removing 1895 missing values.
ℹ Use `.na_rm = TRUE` to silence this message.
ℹ Use `.na_rm = FALSE` to preserve NAs.
Picking joint bandwidth of 0.971
Warning: Removed 1895 rows containing non-finite outside the scale range
(`stat_density_ridges()`).