f(g(x)) = g(x) %>% f()
f(g(x)) = g(x) %>% f()
f(g(x)) = x %>% g() %>% f()
f(g(x)) = g(x) %>% f()
f(g(x)) = x %>% g() %>% f()
f(x, y) = x %>% f(y)
f(g(x)) = g(x) %>% f()
f(g(x)) = x %>% g() %>% f()
f(x, y) = x %>% f(y)
f(x, y) = x |> f(y)
sprinkle( sandwich( bake( mix( what = ingredients, in = "bowl" ) in = "oven", at = 180 ) between = "jam", ) with = "sugar")
batter <- mix(what = ingredients, in = "bowl")sponge <- bake(batter, in = "oven", at = 180)assembled_cake <- sandwich(sponge, between = "jam")decorated_cake <- sprinkle(assembled_cake, with = "sugar")
mix(what = ingredients, in = "bowl") %>% bake(in = "oven", at = 180) %>% sandwich(between = "jam") %>% sprinkle(with = "sugar")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4## ✓ tibble 3.1.4 ✓ dplyr 1.0.7## ✓ tidyr 1.1.3 ✓ stringr 1.4.0## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──## x dplyr::filter() masks stats::filter()## x dplyr::lag() masks stats::lag()
library(readxl)library(janitor)
## ## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':## ## chisq.test, fisher.test
read.csv("my_data.csv")
## id_column group fruit rating sampled## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
read_csv("my_data.csv")
## Rows: 8 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────## Delimiter: ","## chr (2): group, fruit## dbl (2): id_column, rating## date (1): sampled
## ## ℹ Use `spec()` to retrieve the full column specification for this data.## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings <- read_csv("my_data.csv", col_types = cols( id_column = col_double(), group = col_character(), fruit = col_character(), rating = col_double(), sampled = col_date(format = "") ))
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings_bad <- read_csv("my_data.csv", col_types = cols( id_column = col_double(), group = col_character(), fruit = col_double(), rating = col_double(), sampled = col_date(format = "") ))
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
glimpse(ratings)
## Rows: 8## Columns: 5## $ id_column <dbl> 1, 2, 3, 4, 5, 6, 7, 8## $ group <chr> "A", "B", "A", "B", "A", "B", "A", "B"## $ fruit <chr> "apple", "apricot", "avocado", "banana", "bell pepper", "bil…## $ rating <dbl> 4, 3, 5, 1, 4, 1, 5, 1## $ sampled <date> 2021-07-05, 2021-07-13, 2021-07-09, 2021-07-26, 2021-07-22, …
data frame
ratings %>% select(id_column)
## # A tibble: 8 × 1## id_column## <dbl>## 1 1## 2 2## 3 3## 4 4## 5 5## 6 6## 7 7## 8 8
ratings %>% select(id_column, group)
## # A tibble: 8 × 2## id_column group## <dbl> <chr>## 1 1 A ## 2 2 B ## 3 3 A ## 4 4 B ## 5 5 A ## 6 6 B ## 7 7 A ## 8 8 B
ratings %>% select(id_column:rating)
## # A tibble: 8 × 4## id_column group fruit rating## <dbl> <chr> <chr> <dbl>## 1 1 A apple 4## 2 2 B apricot 3## 3 3 A avocado 5## 4 4 B banana 1## 5 5 A bell pepper 4## 6 6 B bilberry 1## 7 7 A blackberry 5## 8 8 B blackcurrant 1
ratings %>% select(-id_column)
## # A tibble: 8 × 4## group fruit rating sampled ## <chr> <chr> <dbl> <date> ## 1 A apple 4 2021-07-05## 2 B apricot 3 2021-07-13## 3 A avocado 5 2021-07-09## 4 B banana 1 2021-07-26## 5 A bell pepper 4 2021-07-22## 6 B bilberry 1 2021-07-31## 7 A blackberry 5 2021-07-01## 8 B blackcurrant 1 2021-07-18
ratings %>% select(id_column)
ratings %>% select("id_column")
data frame
ratings %>% filter(group == "A")
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
ratings %>% filter(rating >= 3)
## # A tibble: 5 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 5 A bell pepper 4 2021-07-22## 5 7 A blackberry 5 2021-07-01
ratings %>% filter(sampled > as.Date("2021-07-15"))
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 4 B banana 1 2021-07-26## 2 5 A bell pepper 4 2021-07-22## 3 6 B bilberry 1 2021-07-31## 4 8 B blackcurrant 1 2021-07-18
data frame
ratings %>% arrange(rating)
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 4 B banana 1 2021-07-26## 2 6 B bilberry 1 2021-07-31## 3 8 B blackcurrant 1 2021-07-18## 4 2 B apricot 3 2021-07-13## 5 1 A apple 4 2021-07-05## 6 5 A bell pepper 4 2021-07-22## 7 3 A avocado 5 2021-07-09## 8 7 A blackberry 5 2021-07-01
ratings %>% arrange(desc(rating))
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 3 A avocado 5 2021-07-09## 2 7 A blackberry 5 2021-07-01## 3 1 A apple 4 2021-07-05## 4 5 A bell pepper 4 2021-07-22## 5 2 B apricot 3 2021-07-13## 6 4 B banana 1 2021-07-26## 7 6 B bilberry 1 2021-07-31## 8 8 B blackcurrant 1 2021-07-18
ratings %>% arrange(group, sampled)
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 7 A blackberry 5 2021-07-01## 2 1 A apple 4 2021-07-05## 3 3 A avocado 5 2021-07-09## 4 5 A bell pepper 4 2021-07-22## 5 2 B apricot 3 2021-07-13## 6 8 B blackcurrant 1 2021-07-18## 7 4 B banana 1 2021-07-26## 8 6 B bilberry 1 2021-07-31
data frame
ratings %>% mutate(rating = rating * 20)
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 80 2021-07-05## 2 2 B apricot 60 2021-07-13## 3 3 A avocado 100 2021-07-09## 4 4 B banana 20 2021-07-26## 5 5 A bell pepper 80 2021-07-22## 6 6 B bilberry 20 2021-07-31## 7 7 A blackberry 100 2021-07-01## 8 8 B blackcurrant 20 2021-07-18
ratings %>% mutate(fruit = str_to_upper(fruit))
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A APPLE 4 2021-07-05## 2 2 B APRICOT 3 2021-07-13## 3 3 A AVOCADO 5 2021-07-09## 4 4 B BANANA 1 2021-07-26## 5 5 A BELL PEPPER 4 2021-07-22## 6 6 B BILBERRY 1 2021-07-31## 7 7 A BLACKBERRY 5 2021-07-01## 8 8 B BLACKCURRANT 1 2021-07-18
ratings %>% mutate(sampled = format(sampled, "%b %d"), rating = rating * 20)
## # A tibble: 8 × 5## id_column group fruit rating sampled## <dbl> <chr> <chr> <dbl> <chr> ## 1 1 A apple 80 Jul 05 ## 2 2 B apricot 60 Jul 13 ## 3 3 A avocado 100 Jul 09 ## 4 4 B banana 20 Jul 26 ## 5 5 A bell pepper 80 Jul 22 ## 6 6 B bilberry 20 Jul 31 ## 7 7 A blackberry 100 Jul 01 ## 8 8 B blackcurrant 20 Jul 18
ratings %>% mutate(new_col = 1:8)
## # A tibble: 8 × 6## id_column group fruit rating sampled new_col## <dbl> <chr> <chr> <dbl> <date> <int>## 1 1 A apple 4 2021-07-05 1## 2 2 B apricot 3 2021-07-13 2## 3 3 A avocado 5 2021-07-09 3## 4 4 B banana 1 2021-07-26 4## 5 5 A bell pepper 4 2021-07-22 5## 6 6 B bilberry 1 2021-07-31 6## 7 7 A blackberry 5 2021-07-01 7## 8 8 B blackcurrant 1 2021-07-18 8
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% filter(group == "A")
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
ratings %>% filter(group == "A") %>% select(fruit, sampled)
## # A tibble: 4 × 2## fruit sampled ## <chr> <date> ## 1 apple 2021-07-05## 2 avocado 2021-07-09## 3 bell pepper 2021-07-22## 4 blackberry 2021-07-01
ratings %>% filter(group == "A") %>% select(fruit, sampled) %>% mutate(month_name = format(sampled, "%b"))
## # A tibble: 4 × 3## fruit sampled month_name## <chr> <date> <chr> ## 1 apple 2021-07-05 Jul ## 2 avocado 2021-07-09 Jul ## 3 bell pepper 2021-07-22 Jul ## 4 blackberry 2021-07-01 Jul
ratings %>% filter(group == "A") %>% select(fruit, sampled) %>% mutate(month_name = format(sampled, "%b")) %>% arrange(desc(fruit))
## # A tibble: 4 × 3## fruit sampled month_name## <chr> <date> <chr> ## 1 blackberry 2021-07-01 Jul ## 2 bell pepper 2021-07-22 Jul ## 3 avocado 2021-07-09 Jul ## 4 apple 2021-07-05 Jul
data frame
data frame
ratings %>% head(n = 2)
## # A tibble: 2 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13
ratings %>% group_by(group) %>% head(n = 2)
## # A tibble: 2 × 5## # Groups: group [2]## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13
ratings %>% summarise(mean = mean(rating))
## # A tibble: 1 × 1## mean## <dbl>## 1 3
ratings %>% group_by(group) %>% summarise(mean = mean(rating))
## # A tibble: 2 × 2## group mean## <chr> <dbl>## 1 A 4.5## 2 B 1.5
ratings %>% group_by(group) %>% summarise(count = n())
## # A tibble: 2 × 2## group count## <chr> <int>## 1 A 4## 2 B 4
ratings %>% count(group, name = "count")
## # A tibble: 2 × 2## group count## <chr> <int>## 1 A 4## 2 B 4
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% group_by(group)
## # A tibble: 8 × 5## # Groups: group [2]## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% group_by(group) %>% nest()
## # A tibble: 2 × 2## # Groups: group [2]## group data ## <chr> <list> ## 1 A <tibble [4 × 4]>## 2 B <tibble [4 × 4]>
(subset <- ratings %>% filter(group == "A"))
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
(subset <- ratings %>% filter(group == "A"))my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
(subset <- ratings %>% filter(group == "A"))my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}my_func(subset)
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
## [1] "The highest rated fruit is avocado"
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}ratings %>% group_by(group) %>% nest()
## # A tibble: 2 × 2## # Groups: group [2]## group data ## <chr> <list> ## 1 A <tibble [4 × 4]>## 2 B <tibble [4 × 4]>
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}ratings %>% group_by(group) %>% nest() %>% mutate(description = map(data, my_func))
## # A tibble: 2 × 3## # Groups: group [2]## group data description## <chr> <list> <list> ## 1 A <tibble [4 × 4]> <chr [1]> ## 2 B <tibble [4 × 4]> <chr [1]>
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}ratings %>% group_by(group) %>% nest() %>% mutate(description = map(data, my_func)) %>% unnest(description)
## # A tibble: 2 × 3## # Groups: group [2]## group data description ## <chr> <list> <chr> ## 1 A <tibble [4 × 4]> The highest rated fruit is avocado## 2 B <tibble [4 × 4]> The highest rated fruit is apricot
library(gapminder)
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}gapminder
## # A tibble: 1,704 × 6## country continent year lifeExp pop gdpPercap## <fct> <fct> <int> <dbl> <int> <dbl>## 1 Afghanistan Asia 1952 28.8 8425333 779.## 2 Afghanistan Asia 1957 30.3 9240934 821.## 3 Afghanistan Asia 1962 32.0 10267083 853.## 4 Afghanistan Asia 1967 34.0 11537966 836.## 5 Afghanistan Asia 1972 36.1 13079460 740.## 6 Afghanistan Asia 1977 38.4 14880372 786.## 7 Afghanistan Asia 1982 39.9 12881816 978.## 8 Afghanistan Asia 1987 40.8 13867957 852.## 9 Afghanistan Asia 1992 41.7 16317921 649.## 10 Afghanistan Asia 1997 41.8 22227415 635.## # … with 1,694 more rows
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}gapminder %>% group_by(country) %>% nest()
## # A tibble: 142 × 2## # Groups: country [142]## country data ## <fct> <list> ## 1 Afghanistan <tibble [12 × 5]>## 2 Albania <tibble [12 × 5]>## 3 Algeria <tibble [12 × 5]>## 4 Angola <tibble [12 × 5]>## 5 Argentina <tibble [12 × 5]>## 6 Australia <tibble [12 × 5]>## 7 Austria <tibble [12 × 5]>## 8 Bahrain <tibble [12 × 5]>## 9 Bangladesh <tibble [12 × 5]>## 10 Belgium <tibble [12 × 5]>## # … with 132 more rows
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}gapminder %>% group_by(country) %>% nest() %>% mutate(model = map(data, lm_fit), plot = map(data, plot_graph))
## # A tibble: 142 × 4## # Groups: country [142]## country data model plot ## <fct> <list> <list> <list>## 1 Afghanistan <tibble [12 × 5]> <lm> <gg> ## 2 Albania <tibble [12 × 5]> <lm> <gg> ## 3 Algeria <tibble [12 × 5]> <lm> <gg> ## 4 Angola <tibble [12 × 5]> <lm> <gg> ## 5 Argentina <tibble [12 × 5]> <lm> <gg> ## 6 Australia <tibble [12 × 5]> <lm> <gg> ## 7 Austria <tibble [12 × 5]> <lm> <gg> ## 8 Bahrain <tibble [12 × 5]> <lm> <gg> ## 9 Bangladesh <tibble [12 × 5]> <lm> <gg> ## 10 Belgium <tibble [12 × 5]> <lm> <gg> ## # … with 132 more rows
gapminder_modified %>% filter(country == "United Kingdom") %>% pull(model)
## [[1]]## ## Call:## lm(formula = lifeExp ~ year, data = df)## ## Coefficients:## (Intercept) year ## -294.197 0.186
gapminder_modified %>% filter(country == "United Kingdom") %>% pull(plot)
## [[1]]
## `geom_smooth()` using formula 'y ~ x'
gapminder %>% select(-c(continent))
## # A tibble: 1,704 × 5## country year lifeExp pop gdpPercap## <fct> <int> <dbl> <int> <dbl>## 1 Afghanistan 1952 28.8 8425333 779.## 2 Afghanistan 1957 30.3 9240934 821.## 3 Afghanistan 1962 32.0 10267083 853.## 4 Afghanistan 1967 34.0 11537966 836.## 5 Afghanistan 1972 36.1 13079460 740.## 6 Afghanistan 1977 38.4 14880372 786.## 7 Afghanistan 1982 39.9 12881816 978.## 8 Afghanistan 1987 40.8 13867957 852.## 9 Afghanistan 1992 41.7 16317921 649.## 10 Afghanistan 1997 41.8 22227415 635.## # … with 1,694 more rows
gapminder %>% select(-c(continent)) %>% pivot_longer(cols = -c(country, year), names_to = "stat", values_to = "value")
## # A tibble: 5,112 × 4## country year stat value## <fct> <int> <chr> <dbl>## 1 Afghanistan 1952 lifeExp 28.8## 2 Afghanistan 1952 pop 8425333 ## 3 Afghanistan 1952 gdpPercap 779. ## 4 Afghanistan 1957 lifeExp 30.3## 5 Afghanistan 1957 pop 9240934 ## 6 Afghanistan 1957 gdpPercap 821. ## 7 Afghanistan 1962 lifeExp 32.0## 8 Afghanistan 1962 pop 10267083 ## 9 Afghanistan 1962 gdpPercap 853. ## 10 Afghanistan 1967 lifeExp 34.0## # … with 5,102 more rows
gapminder %>% select(country, year, pop)
## # A tibble: 1,704 × 3## country year pop## <fct> <int> <int>## 1 Afghanistan 1952 8425333## 2 Afghanistan 1957 9240934## 3 Afghanistan 1962 10267083## 4 Afghanistan 1967 11537966## 5 Afghanistan 1972 13079460## 6 Afghanistan 1977 14880372## 7 Afghanistan 1982 12881816## 8 Afghanistan 1987 13867957## 9 Afghanistan 1992 16317921## 10 Afghanistan 1997 22227415## # … with 1,694 more rows
gapminder %>% select(country, year, pop) %>% pivot_wider(names_from = "year", values_from = "pop")
## # A tibble: 142 × 13## country `1952` `1957` `1962` `1967` `1972` `1977` `1982` `1987` `1992` `1997`## <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>## 1 Afghan… 8.43e6 9.24e6 1.03e7 1.15e7 1.31e7 1.49e7 1.29e7 1.39e7 1.63e7 2.22e7## 2 Albania 1.28e6 1.48e6 1.73e6 1.98e6 2.26e6 2.51e6 2.78e6 3.08e6 3.33e6 3.43e6## 3 Algeria 9.28e6 1.03e7 1.10e7 1.28e7 1.48e7 1.72e7 2.00e7 2.33e7 2.63e7 2.91e7## 4 Angola 4.23e6 4.56e6 4.83e6 5.25e6 5.89e6 6.16e6 7.02e6 7.87e6 8.74e6 9.88e6## 5 Argent… 1.79e7 1.96e7 2.13e7 2.29e7 2.48e7 2.70e7 2.93e7 3.16e7 3.40e7 3.62e7## 6 Austra… 8.69e6 9.71e6 1.08e7 1.19e7 1.32e7 1.41e7 1.52e7 1.63e7 1.75e7 1.86e7## 7 Austria 6.93e6 6.97e6 7.13e6 7.38e6 7.54e6 7.57e6 7.57e6 7.58e6 7.91e6 8.07e6## 8 Bahrain 1.20e5 1.39e5 1.72e5 2.02e5 2.31e5 2.97e5 3.78e5 4.55e5 5.29e5 5.99e5## 9 Bangla… 4.69e7 5.14e7 5.68e7 6.28e7 7.08e7 8.04e7 9.31e7 1.04e8 1.14e8 1.23e8## 10 Belgium 8.73e6 8.99e6 9.22e6 9.56e6 9.71e6 9.82e6 9.86e6 9.87e6 1.00e7 1.02e7## # … with 132 more rows, and 2 more variables: 2002 <int>, 2007 <int>
group_details
## # A tibble: 4 × 2## group time ## <chr> <chr> ## 1 A morning ## 2 B lunchtime## 3 C afternoon## 4 D evening
More groups than in ratings
fruit_details
## # A tibble: 3 × 2## fruit price## <chr> <dbl>## 1 apple 2## 2 banana 4## 3 blackberry 6
Fewer fruits than in ratings
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% left_join(group_details, by = "group")
## # A tibble: 8 × 6## id_column group fruit rating sampled time ## <dbl> <chr> <chr> <dbl> <date> <chr> ## 1 1 A apple 4 2021-07-05 morning ## 2 2 B apricot 3 2021-07-13 lunchtime## 3 3 A avocado 5 2021-07-09 morning ## 4 4 B banana 1 2021-07-26 lunchtime## 5 5 A bell pepper 4 2021-07-22 morning ## 6 6 B bilberry 1 2021-07-31 lunchtime## 7 7 A blackberry 5 2021-07-01 morning ## 8 8 B blackcurrant 1 2021-07-18 lunchtime
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% left_join(fruit_details, by = "fruit")
## # A tibble: 8 × 6## id_column group fruit rating sampled price## <dbl> <chr> <chr> <dbl> <date> <dbl>## 1 1 A apple 4 2021-07-05 2## 2 2 B apricot 3 2021-07-13 NA## 3 3 A avocado 5 2021-07-09 NA## 4 4 B banana 1 2021-07-26 4## 5 5 A bell pepper 4 2021-07-22 NA## 6 6 B bilberry 1 2021-07-31 NA## 7 7 A blackberry 5 2021-07-01 6## 8 8 B blackcurrant 1 2021-07-18 NA
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% inner_join(fruit_details, by = "fruit")
## # A tibble: 3 × 6## id_column group fruit rating sampled price## <dbl> <chr> <chr> <dbl> <date> <dbl>## 1 1 A apple 4 2021-07-05 2## 2 4 B banana 1 2021-07-26 4## 3 7 A blackberry 5 2021-07-01 6
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% full_join(group_details, by = "group")
## # A tibble: 10 × 6## id_column group fruit rating sampled time ## <dbl> <chr> <chr> <dbl> <date> <chr> ## 1 1 A apple 4 2021-07-05 morning ## 2 2 B apricot 3 2021-07-13 lunchtime## 3 3 A avocado 5 2021-07-09 morning ## 4 4 B banana 1 2021-07-26 lunchtime## 5 5 A bell pepper 4 2021-07-22 morning ## 6 6 B bilberry 1 2021-07-31 lunchtime## 7 7 A blackberry 5 2021-07-01 morning ## 8 8 B blackcurrant 1 2021-07-18 lunchtime## 9 NA C <NA> NA NA afternoon## 10 NA D <NA> NA NA evening
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
o | Tile View: Overview of Slides |
Esc | Back to slideshow |
f(g(x)) = g(x) %>% f()
f(g(x)) = g(x) %>% f()
f(g(x)) = x %>% g() %>% f()
f(g(x)) = g(x) %>% f()
f(g(x)) = x %>% g() %>% f()
f(x, y) = x %>% f(y)
f(g(x)) = g(x) %>% f()
f(g(x)) = x %>% g() %>% f()
f(x, y) = x %>% f(y)
f(x, y) = x |> f(y)
sprinkle( sandwich( bake( mix( what = ingredients, in = "bowl" ) in = "oven", at = 180 ) between = "jam", ) with = "sugar")
batter <- mix(what = ingredients, in = "bowl")sponge <- bake(batter, in = "oven", at = 180)assembled_cake <- sandwich(sponge, between = "jam")decorated_cake <- sprinkle(assembled_cake, with = "sugar")
mix(what = ingredients, in = "bowl") %>% bake(in = "oven", at = 180) %>% sandwich(between = "jam") %>% sprinkle(with = "sugar")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4## ✓ tibble 3.1.4 ✓ dplyr 1.0.7## ✓ tidyr 1.1.3 ✓ stringr 1.4.0## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──## x dplyr::filter() masks stats::filter()## x dplyr::lag() masks stats::lag()
library(readxl)library(janitor)
## ## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':## ## chisq.test, fisher.test
read.csv("my_data.csv")
## id_column group fruit rating sampled## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
read_csv("my_data.csv")
## Rows: 8 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────## Delimiter: ","## chr (2): group, fruit## dbl (2): id_column, rating## date (1): sampled
## ## ℹ Use `spec()` to retrieve the full column specification for this data.## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings <- read_csv("my_data.csv", col_types = cols( id_column = col_double(), group = col_character(), fruit = col_character(), rating = col_double(), sampled = col_date(format = "") ))
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings_bad <- read_csv("my_data.csv", col_types = cols( id_column = col_double(), group = col_character(), fruit = col_double(), rating = col_double(), sampled = col_date(format = "") ))
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
glimpse(ratings)
## Rows: 8## Columns: 5## $ id_column <dbl> 1, 2, 3, 4, 5, 6, 7, 8## $ group <chr> "A", "B", "A", "B", "A", "B", "A", "B"## $ fruit <chr> "apple", "apricot", "avocado", "banana", "bell pepper", "bil…## $ rating <dbl> 4, 3, 5, 1, 4, 1, 5, 1## $ sampled <date> 2021-07-05, 2021-07-13, 2021-07-09, 2021-07-26, 2021-07-22, …
data frame
ratings %>% select(id_column)
## # A tibble: 8 × 1## id_column## <dbl>## 1 1## 2 2## 3 3## 4 4## 5 5## 6 6## 7 7## 8 8
ratings %>% select(id_column, group)
## # A tibble: 8 × 2## id_column group## <dbl> <chr>## 1 1 A ## 2 2 B ## 3 3 A ## 4 4 B ## 5 5 A ## 6 6 B ## 7 7 A ## 8 8 B
ratings %>% select(id_column:rating)
## # A tibble: 8 × 4## id_column group fruit rating## <dbl> <chr> <chr> <dbl>## 1 1 A apple 4## 2 2 B apricot 3## 3 3 A avocado 5## 4 4 B banana 1## 5 5 A bell pepper 4## 6 6 B bilberry 1## 7 7 A blackberry 5## 8 8 B blackcurrant 1
ratings %>% select(-id_column)
## # A tibble: 8 × 4## group fruit rating sampled ## <chr> <chr> <dbl> <date> ## 1 A apple 4 2021-07-05## 2 B apricot 3 2021-07-13## 3 A avocado 5 2021-07-09## 4 B banana 1 2021-07-26## 5 A bell pepper 4 2021-07-22## 6 B bilberry 1 2021-07-31## 7 A blackberry 5 2021-07-01## 8 B blackcurrant 1 2021-07-18
ratings %>% select(id_column)
ratings %>% select("id_column")
data frame
ratings %>% filter(group == "A")
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
ratings %>% filter(rating >= 3)
## # A tibble: 5 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 5 A bell pepper 4 2021-07-22## 5 7 A blackberry 5 2021-07-01
ratings %>% filter(sampled > as.Date("2021-07-15"))
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 4 B banana 1 2021-07-26## 2 5 A bell pepper 4 2021-07-22## 3 6 B bilberry 1 2021-07-31## 4 8 B blackcurrant 1 2021-07-18
data frame
ratings %>% arrange(rating)
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 4 B banana 1 2021-07-26## 2 6 B bilberry 1 2021-07-31## 3 8 B blackcurrant 1 2021-07-18## 4 2 B apricot 3 2021-07-13## 5 1 A apple 4 2021-07-05## 6 5 A bell pepper 4 2021-07-22## 7 3 A avocado 5 2021-07-09## 8 7 A blackberry 5 2021-07-01
ratings %>% arrange(desc(rating))
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 3 A avocado 5 2021-07-09## 2 7 A blackberry 5 2021-07-01## 3 1 A apple 4 2021-07-05## 4 5 A bell pepper 4 2021-07-22## 5 2 B apricot 3 2021-07-13## 6 4 B banana 1 2021-07-26## 7 6 B bilberry 1 2021-07-31## 8 8 B blackcurrant 1 2021-07-18
ratings %>% arrange(group, sampled)
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 7 A blackberry 5 2021-07-01## 2 1 A apple 4 2021-07-05## 3 3 A avocado 5 2021-07-09## 4 5 A bell pepper 4 2021-07-22## 5 2 B apricot 3 2021-07-13## 6 8 B blackcurrant 1 2021-07-18## 7 4 B banana 1 2021-07-26## 8 6 B bilberry 1 2021-07-31
data frame
ratings %>% mutate(rating = rating * 20)
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 80 2021-07-05## 2 2 B apricot 60 2021-07-13## 3 3 A avocado 100 2021-07-09## 4 4 B banana 20 2021-07-26## 5 5 A bell pepper 80 2021-07-22## 6 6 B bilberry 20 2021-07-31## 7 7 A blackberry 100 2021-07-01## 8 8 B blackcurrant 20 2021-07-18
ratings %>% mutate(fruit = str_to_upper(fruit))
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A APPLE 4 2021-07-05## 2 2 B APRICOT 3 2021-07-13## 3 3 A AVOCADO 5 2021-07-09## 4 4 B BANANA 1 2021-07-26## 5 5 A BELL PEPPER 4 2021-07-22## 6 6 B BILBERRY 1 2021-07-31## 7 7 A BLACKBERRY 5 2021-07-01## 8 8 B BLACKCURRANT 1 2021-07-18
ratings %>% mutate(sampled = format(sampled, "%b %d"), rating = rating * 20)
## # A tibble: 8 × 5## id_column group fruit rating sampled## <dbl> <chr> <chr> <dbl> <chr> ## 1 1 A apple 80 Jul 05 ## 2 2 B apricot 60 Jul 13 ## 3 3 A avocado 100 Jul 09 ## 4 4 B banana 20 Jul 26 ## 5 5 A bell pepper 80 Jul 22 ## 6 6 B bilberry 20 Jul 31 ## 7 7 A blackberry 100 Jul 01 ## 8 8 B blackcurrant 20 Jul 18
ratings %>% mutate(new_col = 1:8)
## # A tibble: 8 × 6## id_column group fruit rating sampled new_col## <dbl> <chr> <chr> <dbl> <date> <int>## 1 1 A apple 4 2021-07-05 1## 2 2 B apricot 3 2021-07-13 2## 3 3 A avocado 5 2021-07-09 3## 4 4 B banana 1 2021-07-26 4## 5 5 A bell pepper 4 2021-07-22 5## 6 6 B bilberry 1 2021-07-31 6## 7 7 A blackberry 5 2021-07-01 7## 8 8 B blackcurrant 1 2021-07-18 8
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% filter(group == "A")
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
ratings %>% filter(group == "A") %>% select(fruit, sampled)
## # A tibble: 4 × 2## fruit sampled ## <chr> <date> ## 1 apple 2021-07-05## 2 avocado 2021-07-09## 3 bell pepper 2021-07-22## 4 blackberry 2021-07-01
ratings %>% filter(group == "A") %>% select(fruit, sampled) %>% mutate(month_name = format(sampled, "%b"))
## # A tibble: 4 × 3## fruit sampled month_name## <chr> <date> <chr> ## 1 apple 2021-07-05 Jul ## 2 avocado 2021-07-09 Jul ## 3 bell pepper 2021-07-22 Jul ## 4 blackberry 2021-07-01 Jul
ratings %>% filter(group == "A") %>% select(fruit, sampled) %>% mutate(month_name = format(sampled, "%b")) %>% arrange(desc(fruit))
## # A tibble: 4 × 3## fruit sampled month_name## <chr> <date> <chr> ## 1 blackberry 2021-07-01 Jul ## 2 bell pepper 2021-07-22 Jul ## 3 avocado 2021-07-09 Jul ## 4 apple 2021-07-05 Jul
data frame
data frame
ratings %>% head(n = 2)
## # A tibble: 2 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13
ratings %>% group_by(group) %>% head(n = 2)
## # A tibble: 2 × 5## # Groups: group [2]## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13
ratings %>% summarise(mean = mean(rating))
## # A tibble: 1 × 1## mean## <dbl>## 1 3
ratings %>% group_by(group) %>% summarise(mean = mean(rating))
## # A tibble: 2 × 2## group mean## <chr> <dbl>## 1 A 4.5## 2 B 1.5
ratings %>% group_by(group) %>% summarise(count = n())
## # A tibble: 2 × 2## group count## <chr> <int>## 1 A 4## 2 B 4
ratings %>% count(group, name = "count")
## # A tibble: 2 × 2## group count## <chr> <int>## 1 A 4## 2 B 4
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% group_by(group)
## # A tibble: 8 × 5## # Groups: group [2]## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% group_by(group) %>% nest()
## # A tibble: 2 × 2## # Groups: group [2]## group data ## <chr> <list> ## 1 A <tibble [4 × 4]>## 2 B <tibble [4 × 4]>
(subset <- ratings %>% filter(group == "A"))
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
(subset <- ratings %>% filter(group == "A"))my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
(subset <- ratings %>% filter(group == "A"))my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}my_func(subset)
## # A tibble: 4 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 3 A avocado 5 2021-07-09## 3 5 A bell pepper 4 2021-07-22## 4 7 A blackberry 5 2021-07-01
## [1] "The highest rated fruit is avocado"
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}ratings %>% group_by(group) %>% nest()
## # A tibble: 2 × 2## # Groups: group [2]## group data ## <chr> <list> ## 1 A <tibble [4 × 4]>## 2 B <tibble [4 × 4]>
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}ratings %>% group_by(group) %>% nest() %>% mutate(description = map(data, my_func))
## # A tibble: 2 × 3## # Groups: group [2]## group data description## <chr> <list> <list> ## 1 A <tibble [4 × 4]> <chr [1]> ## 2 B <tibble [4 × 4]> <chr [1]>
my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f)}ratings %>% group_by(group) %>% nest() %>% mutate(description = map(data, my_func)) %>% unnest(description)
## # A tibble: 2 × 3## # Groups: group [2]## group data description ## <chr> <list> <chr> ## 1 A <tibble [4 × 4]> The highest rated fruit is avocado## 2 B <tibble [4 × 4]> The highest rated fruit is apricot
library(gapminder)
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}gapminder
## # A tibble: 1,704 × 6## country continent year lifeExp pop gdpPercap## <fct> <fct> <int> <dbl> <int> <dbl>## 1 Afghanistan Asia 1952 28.8 8425333 779.## 2 Afghanistan Asia 1957 30.3 9240934 821.## 3 Afghanistan Asia 1962 32.0 10267083 853.## 4 Afghanistan Asia 1967 34.0 11537966 836.## 5 Afghanistan Asia 1972 36.1 13079460 740.## 6 Afghanistan Asia 1977 38.4 14880372 786.## 7 Afghanistan Asia 1982 39.9 12881816 978.## 8 Afghanistan Asia 1987 40.8 13867957 852.## 9 Afghanistan Asia 1992 41.7 16317921 649.## 10 Afghanistan Asia 1997 41.8 22227415 635.## # … with 1,694 more rows
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}gapminder %>% group_by(country) %>% nest()
## # A tibble: 142 × 2## # Groups: country [142]## country data ## <fct> <list> ## 1 Afghanistan <tibble [12 × 5]>## 2 Albania <tibble [12 × 5]>## 3 Algeria <tibble [12 × 5]>## 4 Angola <tibble [12 × 5]>## 5 Argentina <tibble [12 × 5]>## 6 Australia <tibble [12 × 5]>## 7 Austria <tibble [12 × 5]>## 8 Bahrain <tibble [12 × 5]>## 9 Bangladesh <tibble [12 × 5]>## 10 Belgium <tibble [12 × 5]>## # … with 132 more rows
library(gapminder)lm_fit <- function(df) { lm(lifeExp ~ year, data = df)}plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")}gapminder %>% group_by(country) %>% nest() %>% mutate(model = map(data, lm_fit), plot = map(data, plot_graph))
## # A tibble: 142 × 4## # Groups: country [142]## country data model plot ## <fct> <list> <list> <list>## 1 Afghanistan <tibble [12 × 5]> <lm> <gg> ## 2 Albania <tibble [12 × 5]> <lm> <gg> ## 3 Algeria <tibble [12 × 5]> <lm> <gg> ## 4 Angola <tibble [12 × 5]> <lm> <gg> ## 5 Argentina <tibble [12 × 5]> <lm> <gg> ## 6 Australia <tibble [12 × 5]> <lm> <gg> ## 7 Austria <tibble [12 × 5]> <lm> <gg> ## 8 Bahrain <tibble [12 × 5]> <lm> <gg> ## 9 Bangladesh <tibble [12 × 5]> <lm> <gg> ## 10 Belgium <tibble [12 × 5]> <lm> <gg> ## # … with 132 more rows
gapminder_modified %>% filter(country == "United Kingdom") %>% pull(model)
## [[1]]## ## Call:## lm(formula = lifeExp ~ year, data = df)## ## Coefficients:## (Intercept) year ## -294.197 0.186
gapminder_modified %>% filter(country == "United Kingdom") %>% pull(plot)
## [[1]]
## `geom_smooth()` using formula 'y ~ x'
gapminder %>% select(-c(continent))
## # A tibble: 1,704 × 5## country year lifeExp pop gdpPercap## <fct> <int> <dbl> <int> <dbl>## 1 Afghanistan 1952 28.8 8425333 779.## 2 Afghanistan 1957 30.3 9240934 821.## 3 Afghanistan 1962 32.0 10267083 853.## 4 Afghanistan 1967 34.0 11537966 836.## 5 Afghanistan 1972 36.1 13079460 740.## 6 Afghanistan 1977 38.4 14880372 786.## 7 Afghanistan 1982 39.9 12881816 978.## 8 Afghanistan 1987 40.8 13867957 852.## 9 Afghanistan 1992 41.7 16317921 649.## 10 Afghanistan 1997 41.8 22227415 635.## # … with 1,694 more rows
gapminder %>% select(-c(continent)) %>% pivot_longer(cols = -c(country, year), names_to = "stat", values_to = "value")
## # A tibble: 5,112 × 4## country year stat value## <fct> <int> <chr> <dbl>## 1 Afghanistan 1952 lifeExp 28.8## 2 Afghanistan 1952 pop 8425333 ## 3 Afghanistan 1952 gdpPercap 779. ## 4 Afghanistan 1957 lifeExp 30.3## 5 Afghanistan 1957 pop 9240934 ## 6 Afghanistan 1957 gdpPercap 821. ## 7 Afghanistan 1962 lifeExp 32.0## 8 Afghanistan 1962 pop 10267083 ## 9 Afghanistan 1962 gdpPercap 853. ## 10 Afghanistan 1967 lifeExp 34.0## # … with 5,102 more rows
gapminder %>% select(country, year, pop)
## # A tibble: 1,704 × 3## country year pop## <fct> <int> <int>## 1 Afghanistan 1952 8425333## 2 Afghanistan 1957 9240934## 3 Afghanistan 1962 10267083## 4 Afghanistan 1967 11537966## 5 Afghanistan 1972 13079460## 6 Afghanistan 1977 14880372## 7 Afghanistan 1982 12881816## 8 Afghanistan 1987 13867957## 9 Afghanistan 1992 16317921## 10 Afghanistan 1997 22227415## # … with 1,694 more rows
gapminder %>% select(country, year, pop) %>% pivot_wider(names_from = "year", values_from = "pop")
## # A tibble: 142 × 13## country `1952` `1957` `1962` `1967` `1972` `1977` `1982` `1987` `1992` `1997`## <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>## 1 Afghan… 8.43e6 9.24e6 1.03e7 1.15e7 1.31e7 1.49e7 1.29e7 1.39e7 1.63e7 2.22e7## 2 Albania 1.28e6 1.48e6 1.73e6 1.98e6 2.26e6 2.51e6 2.78e6 3.08e6 3.33e6 3.43e6## 3 Algeria 9.28e6 1.03e7 1.10e7 1.28e7 1.48e7 1.72e7 2.00e7 2.33e7 2.63e7 2.91e7## 4 Angola 4.23e6 4.56e6 4.83e6 5.25e6 5.89e6 6.16e6 7.02e6 7.87e6 8.74e6 9.88e6## 5 Argent… 1.79e7 1.96e7 2.13e7 2.29e7 2.48e7 2.70e7 2.93e7 3.16e7 3.40e7 3.62e7## 6 Austra… 8.69e6 9.71e6 1.08e7 1.19e7 1.32e7 1.41e7 1.52e7 1.63e7 1.75e7 1.86e7## 7 Austria 6.93e6 6.97e6 7.13e6 7.38e6 7.54e6 7.57e6 7.57e6 7.58e6 7.91e6 8.07e6## 8 Bahrain 1.20e5 1.39e5 1.72e5 2.02e5 2.31e5 2.97e5 3.78e5 4.55e5 5.29e5 5.99e5## 9 Bangla… 4.69e7 5.14e7 5.68e7 6.28e7 7.08e7 8.04e7 9.31e7 1.04e8 1.14e8 1.23e8## 10 Belgium 8.73e6 8.99e6 9.22e6 9.56e6 9.71e6 9.82e6 9.86e6 9.87e6 1.00e7 1.02e7## # … with 132 more rows, and 2 more variables: 2002 <int>, 2007 <int>
group_details
## # A tibble: 4 × 2## group time ## <chr> <chr> ## 1 A morning ## 2 B lunchtime## 3 C afternoon## 4 D evening
More groups than in ratings
fruit_details
## # A tibble: 3 × 2## fruit price## <chr> <dbl>## 1 apple 2## 2 banana 4## 3 blackberry 6
Fewer fruits than in ratings
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% left_join(group_details, by = "group")
## # A tibble: 8 × 6## id_column group fruit rating sampled time ## <dbl> <chr> <chr> <dbl> <date> <chr> ## 1 1 A apple 4 2021-07-05 morning ## 2 2 B apricot 3 2021-07-13 lunchtime## 3 3 A avocado 5 2021-07-09 morning ## 4 4 B banana 1 2021-07-26 lunchtime## 5 5 A bell pepper 4 2021-07-22 morning ## 6 6 B bilberry 1 2021-07-31 lunchtime## 7 7 A blackberry 5 2021-07-01 morning ## 8 8 B blackcurrant 1 2021-07-18 lunchtime
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% left_join(fruit_details, by = "fruit")
## # A tibble: 8 × 6## id_column group fruit rating sampled price## <dbl> <chr> <chr> <dbl> <date> <dbl>## 1 1 A apple 4 2021-07-05 2## 2 2 B apricot 3 2021-07-13 NA## 3 3 A avocado 5 2021-07-09 NA## 4 4 B banana 1 2021-07-26 4## 5 5 A bell pepper 4 2021-07-22 NA## 6 6 B bilberry 1 2021-07-31 NA## 7 7 A blackberry 5 2021-07-01 6## 8 8 B blackcurrant 1 2021-07-18 NA
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% inner_join(fruit_details, by = "fruit")
## # A tibble: 3 × 6## id_column group fruit rating sampled price## <dbl> <chr> <chr> <dbl> <date> <dbl>## 1 1 A apple 4 2021-07-05 2## 2 4 B banana 1 2021-07-26 4## 3 7 A blackberry 5 2021-07-01 6
ratings
## # A tibble: 8 × 5## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05## 2 2 B apricot 3 2021-07-13## 3 3 A avocado 5 2021-07-09## 4 4 B banana 1 2021-07-26## 5 5 A bell pepper 4 2021-07-22## 6 6 B bilberry 1 2021-07-31## 7 7 A blackberry 5 2021-07-01## 8 8 B blackcurrant 1 2021-07-18
ratings %>% full_join(group_details, by = "group")
## # A tibble: 10 × 6## id_column group fruit rating sampled time ## <dbl> <chr> <chr> <dbl> <date> <chr> ## 1 1 A apple 4 2021-07-05 morning ## 2 2 B apricot 3 2021-07-13 lunchtime## 3 3 A avocado 5 2021-07-09 morning ## 4 4 B banana 1 2021-07-26 lunchtime## 5 5 A bell pepper 4 2021-07-22 morning ## 6 6 B bilberry 1 2021-07-31 lunchtime## 7 7 A blackberry 5 2021-07-01 morning ## 8 8 B blackcurrant 1 2021-07-18 lunchtime## 9 NA C <NA> NA NA afternoon## 10 NA D <NA> NA NA evening