<- function(data, group, var){
grouped_mean |> group_by(group) |>
data summarize(mean(var), na.rm = TRUE)}
Advanced Functions in R
functions that summarize & plot
Fun with Functions
We want a function to compute a “grouped mean” of a given dataset. Try this, and see if it works:
It doesn’t.
We use the curly-curly
operator (see ?"{{}}"
).
The embrace operator {{
is used to create functions that call other data-masking functions. It transports a data-masked argument (an argument that can refer to columns of a data frame) from one function to another.
It’s useful when passing an argument that has to be substituted in place before being evaluated in another context.
library(tidyverse)
library(nycflights13)
<- function(.data, group, var){
grouped_mean |> group_by({{group}}) |>
.data summarize("{{var}}" := mean({{var}}, na.rm = TRUE))
}
now a grouped function, as opposed to a grouped mean
<- function(.data, group, .var, fn){
grouped_f # getting a string to match our 4th paramter
<- as.character(substitute(fn))
fname # compare output with:
# fname <- as.character(quote(fn))
print(fname)
# now get R-function that matches our 4th parameter
<- match.fun(fn)
my_fun
# group_by is a data-masking function
<- .data |> group_by({{group}}) |>
df summarize("{{.var}}" := my_fun({{.var}}, na.rm = TRUE))
# vector of strings
<- df |> names()
var_names # this summarize creates twos variables, one is the "group"
# the other is the ".var" and we
# glue the 2nd variable together with fname
<- str_c(fname, "_", var_names[2])
new_name
# sanity check
print(new_name)
|> rename({{new_name}} := {{.var}})
df
}
plotting function
|> ggplot(aes(x = color)) +
diamonds geom_bar()
|> ggplot(aes(x = dest)) +
flights geom_bar()
<- function(.data, .var) {
barplot_f <- as.character(substitute(.var))
s print(s)
|> ggplot(aes(x = {{ .var }})) +
.data geom_bar() +
labs(title = str_c("My cool ", s, " plot"))
}
A big summary function
<- function(data, var) {
my_summary |> summarize(
data min = min({{ var }}, na.rm = TRUE),
mean = mean({{ var }}, na.rm = TRUE),
median = median({{ var }}, na.rm = TRUE),
max = max({{ var }}, na.rm = TRUE),
n = n(),
n_miss = sum(is.na({{ var }})),
.groups = "drop"
)
}
|> my_summary(carat) diamonds
# A tibble: 1 × 6
min mean median max n n_miss
<dbl> <dbl> <dbl> <dbl> <int> <int>
1 0.2 0.798 0.7 5.01 53940 0