gapminder
library(tidyverse)library(gapminder)glimpse(gapminder)
## Observations: 1,704## Variables: 6## $ country <fctr> Afghanistan, Afghanistan, Afghanistan, Afghanistan,...## $ continent <fctr> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asi...## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992...## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.8...## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 1488...## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 78...
ggplot(data=gapminder, aes(x=year, y=lifeExp, group=country)) +geom_line(alpha=0.5)
year
gapminder2 <- gapminder %>% mutate(year1950 = year-1950)
head(gapminder2)
## # A tibble: 6 x 7## country continent year lifeExp pop gdpPercap year1950## <fctr> <fctr> <int> <dbl> <int> <dbl> <dbl>## 1 Afghanistan Asia 1952 28.801 8425333 779.4453 2## 2 Afghanistan Asia 1957 30.332 9240934 820.8530 7## 3 Afghanistan Asia 1962 31.997 10267083 853.1007 12## 4 Afghanistan Asia 1967 34.020 11537966 836.1971 17## 5 Afghanistan Asia 1972 36.088 13079460 739.9811 22## 6 Afghanistan Asia 1977 38.438 14880372 786.1134 27
oz <- gapminder2 %>% filter(country=="Australia")head(oz)
## # A tibble: 6 x 7## country continent year lifeExp pop gdpPercap year1950## <fctr> <fctr> <int> <dbl> <int> <dbl> <dbl>## 1 Australia Oceania 1952 69.12 8691212 10039.60 2## 2 Australia Oceania 1957 70.33 9712569 10949.65 7## 3 Australia Oceania 1962 70.93 10794968 12217.23 12## 4 Australia Oceania 1967 71.10 11872264 14526.12 17## 5 Australia Oceania 1972 71.93 13177000 16788.63 22## 6 Australia Oceania 1977 73.49 14074100 18334.20 27
ggplot(data=oz, aes(x=year, y=lifeExp)) +geom_point() +geom_smooth(method="lm", se=FALSE)
oz_lm <- lm(lifeExp~year1950, data=oz)oz_lm
## ## Call:## lm(formula = lifeExp ~ year1950, data = oz)## ## Coefficients:## (Intercept) year1950 ## 67.9451 0.2277
summary(oz_lm)
## ## Call:## lm(formula = lifeExp ~ year1950, data = oz)## ## Residuals:## Min 1Q Median 3Q Max ## -1.0250 -0.5201 0.1162 0.3781 0.7909 ## ## Coefficients:## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 67.94507 0.35476 191.53 < 2e-16 ***## year1950 0.22772 0.01038 21.94 8.67e-10 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1## ## Residual standard error: 0.6206 on 10 degrees of freedom## Multiple R-squared: 0.9796, Adjusted R-squared: 0.9776 ## F-statistic: 481.3 on 1 and 10 DF, p-value: 8.667e-10
This summary output is useful enough if you just want to read it. However, converting it to a data frame that contains all the same information, so that you can combine it with other models or do further analysis, is not trivial.
tidy
augment
glance
tidy
: constructs a data frame that summarizes the model's statistical findingslibrary("broom")oz_coef <- tidy(oz_lm)oz_coef
## term estimate std.error statistic p.value## 1 (Intercept) 67.9450653 0.35475797 191.5251 3.700841e-19## 2 year1950 0.2277238 0.01037958 21.9396 8.667222e-10
class(oz_coef)
## [1] "data.frame"
class(oz_lm) # oz_lm - the messy output given by `lm`
## [1] "lm"
glance
: one-row summary of the modeloz_fit <- glance(oz_lm)oz_fit
## r.squared adj.r.squared sigma statistic p.value df logLik## 1 0.9796477 0.9776125 0.6206086 481.3459 8.667222e-10 2 -10.20868## AIC BIC deviance df.residual## 1 26.41735 27.87207 3.85155 10
augment
: add columns to the original data that was modeledoz_diag <- augment(oz_lm)head(oz_diag)
## lifeExp year1950 .fitted .se.fit .resid .hat .sigma## 1 69.12 2 68.40051 0.3370035 0.7194872 0.29487179 0.5885398## 2 70.33 7 69.53913 0.2943424 0.7908683 0.22494172 0.5816212## 3 70.93 12 70.67775 0.2551280 0.2522494 0.16899767 0.6476436## 4 71.10 17 71.81637 0.2212012 -0.7163695 0.12703963 0.6021888## 5 71.93 22 72.95499 0.1953366 -1.0249883 0.09906760 0.5462421## 6 73.49 27 74.09361 0.1810238 -0.6036072 0.08508159 0.6194376## .cooksd .std.resid## 1 0.39854524 1.3806107## 2 0.30404936 1.4475021## 3 0.02021487 0.4458731## 4 0.11106027 -1.2354410## 5 0.16646369 -1.7400232## 6 0.04807443 -1.0168232
gapminder
by country and continentby_country <- gapminder2 %>%select(country, year1950, lifeExp, continent) %>% group_by(country, continent)head(by_country)
## # A tibble: 6 x 4## # Groups: country, continent [1]## country year1950 lifeExp continent## <fctr> <dbl> <dbl> <fctr>## 1 Afghanistan 2 28.801 Asia## 2 Afghanistan 7 30.332 Asia## 3 Afghanistan 12 31.997 Asia## 4 Afghanistan 17 34.020 Asia## 5 Afghanistan 22 36.088 Asia## 6 Afghanistan 27 38.438 Asia
by_country <- by_country %>% nest() # nesting year 1950 and lifeExp into a listhead(by_country)
## # A tibble: 6 x 3## country continent data## <fctr> <fctr> <list>## 1 Afghanistan Asia <tibble [12 x 2]>## 2 Albania Europe <tibble [12 x 2]>## 3 Algeria Africa <tibble [12 x 2]>## 4 Angola Africa <tibble [12 x 2]>## 5 Argentina Americas <tibble [12 x 2]>## 6 Australia Oceania <tibble [12 x 2]>
by_country$data[[6]]
## # A tibble: 12 x 2## year1950 lifeExp## <dbl> <dbl>## 1 2 69.120## 2 7 70.330## 3 12 70.930## 4 17 71.100## 5 22 71.930## 6 27 73.490## 7 32 74.740## 8 37 76.320## 9 42 77.560## 10 47 78.830## 11 52 80.370## 12 57 81.235
by_country <- by_country %>%mutate(model = purrr::map(data, ~ lm(lifeExp ~ year1950,data = .)))
head(by_country)
## # A tibble: 6 x 4## country continent data model## <fctr> <fctr> <list> <list>## 1 Afghanistan Asia <tibble [12 x 2]> <S3: lm>## 2 Albania Europe <tibble [12 x 2]> <S3: lm>## 3 Algeria Africa <tibble [12 x 2]> <S3: lm>## 4 Angola Africa <tibble [12 x 2]> <S3: lm>## 5 Argentina Americas <tibble [12 x 2]> <S3: lm>## 6 Australia Oceania <tibble [12 x 2]> <S3: lm>
by_country$model[[6]]
## ## Call:## lm(formula = lifeExp ~ year1950, data = .)## ## Coefficients:## (Intercept) year1950 ## 67.9451 0.2277
country_coefs <- by_country %>%unnest(model %>% purrr::map(broom::tidy))
head(country_coefs)
## # A tibble: 6 x 7## country continent term estimate std.error statistic## <fctr> <fctr> <chr> <dbl> <dbl> <dbl>## 1 Afghanistan Asia (Intercept) 29.3566375 0.69898128 41.99918## 2 Afghanistan Asia year1950 0.2753287 0.02045093 13.46289## 3 Albania Europe (Intercept) 58.5597618 1.13357581 51.65933## 4 Albania Europe year1950 0.3346832 0.03316639 10.09104## 5 Algeria Africa (Intercept) 42.2364149 0.75626904 55.84840## 6 Algeria Africa year1950 0.5692797 0.02212707 25.72775## # ... with 1 more variables: p.value <dbl>
country_fit <- by_country %>%unnest(model %>%purrr::map(broom::glance))
head(country_fit)
## # A tibble: 6 x 15## country continent data model r.squared adj.r.squared## <fctr> <fctr> <list> <list> <dbl> <dbl>## 1 Afghanistan Asia <tibble [12 x 2]> <S3: lm> 0.9477123 0.9424835## 2 Albania Europe <tibble [12 x 2]> <S3: lm> 0.9105778 0.9016355## 3 Algeria Africa <tibble [12 x 2]> <S3: lm> 0.9851172 0.9836289## 4 Angola Africa <tibble [12 x 2]> <S3: lm> 0.8878146 0.8765961## 5 Argentina Americas <tibble [12 x 2]> <S3: lm> 0.9955681 0.9951249## 6 Australia Oceania <tibble [12 x 2]> <S3: lm> 0.9796477 0.9776125## # ... with 9 more variables: sigma <dbl>, statistic <dbl>, p.value <dbl>,## # df <int>, logLik <dbl>, AIC <dbl>, BIC <dbl>, deviance <dbl>,## # df.residual <int>
library(knitr)country_coefs <- country_coefs %>%select(country, continent, term, estimate) %>%spread(term, estimate)
head(country_coefs) %>% kable()
library(plotly)p1 <- ggplot(gapminder, aes(x=lifeExp, y=gdpPercap, colour=continent, label=country)) + geom_point()ggplotly(p1)
Most of the materials I've used here are based on
'R for Data Science' by Garrett Grolemund and Hadley Wickham
To learn more about purrr::map 'Read here'
Read more about broom package ' here'
gapminder
library(tidyverse)library(gapminder)glimpse(gapminder)
## Observations: 1,704## Variables: 6## $ country <fctr> Afghanistan, Afghanistan, Afghanistan, Afghanistan,...## $ continent <fctr> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asi...## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992...## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.8...## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 1488...## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 78...
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |