Hands-On Exercise 1: Grammar of Graphics with ggplot2

Author

Zhang Xuerong

# Load required packages
install.packages("pacman", repos = "https://cloud.r-project.org/")

The downloaded binary packages are in
    /var/folders/fw/r5v73g5d55xfr_hq4n78p6rr0000gn/T//RtmpDBACwz/downloaded_packages
pacman::p_load(tidyverse)
# Load the data
#install.packages("readr")

library(readr)

exam_data <- read_csv("hands-on_exercise1/data/Exam_data.csv")
Rows: 322 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ID, CLASS, GENDER, RACE
dbl (3): ENGLISH, MATHS, SCIENCE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

R Graphics VS ggplot

#install.packages("ggplot2")  # run only if not installed
library(ggplot2)

ggplot(data=exam_data, aes(x = MATHS)) +
  geom_histogram(bins=10, 
                 boundary = 100,
                 color="black", 
                 fill="grey") +
  ggtitle("Distribution of Maths scores")

hist(exam_data$MATHS)

Geometric Objects

Bar Chart

ggplot(data = exam_data, aes(x = RACE)) + 
  geom_bar()

Dot Plot

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_dotplot(binwidth = 2.5, dotsize = 0.5) +
  scale_y_continuous(NULL, breaks = NULL)

Histogram

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20, fill = "light blue", color = "black")

Histogram by Gender

ggplot(data = exam_data, aes(x = MATHS, fill = GENDER)) +
  geom_histogram(bins = 20, color = "grey30")

Density Plot

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_density()

ggplot(data = exam_data, aes(x = MATHS, colour = GENDER)) +
  geom_density()

Boxplot

ggplot(data = exam_data, aes(x = GENDER, y = MATHS)) +
  geom_boxplot()

ggplot(data = exam_data, aes(x = GENDER, y = MATHS)) +
  geom_boxplot(notch = TRUE)

Violin Plot

ggplot(data = exam_data, aes(x = GENDER, y = MATHS)) +
  geom_violin()

Scatterplot

ggplot(data = exam_data, aes(x = MATHS, y = ENGLISH)) +
  geom_point()

Boxplot + Points

ggplot(data = exam_data, aes(x = GENDER, y = MATHS)) +
  geom_boxplot() +
  geom_point(position = "jitter", size = 0.5)

Boxplot with Mean (stat_summary)

ggplot(data = exam_data, aes(x = GENDER, y = MATHS)) +
  geom_boxplot() +
  stat_summary(fun = "mean", geom = "point", colour = "red", size = 4)

Boxplot with Mean (geom_point)

ggplot(data = exam_data, aes(x = GENDER, y = MATHS)) +
  geom_boxplot() +
  geom_point(stat = "summary", fun = "mean", colour = "red", size = 4)

Scatterplot with Smooth Line

ggplot(data = exam_data, aes(x = MATHS, y = ENGLISH)) +
  geom_point() +
  geom_smooth(size = 0.5)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = exam_data, aes(x = MATHS, y = ENGLISH)) +
  geom_point() +
  geom_smooth(method = lm, linewidth = 0.5)
`geom_smooth()` using formula = 'y ~ x'

Facets

facet_wrap()

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20) +
  facet_wrap(~CLASS)

facet_grid()

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20) +
  facet_grid(~CLASS)

Coordinate Systems

Horizontal Bar Chart

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip()

Fix Axis Limits

ggplot(data = exam_data, aes(x = MATHS, y = ENGLISH)) +
  geom_point() +
  geom_smooth(method = lm, size = 0.5) +
  coord_cartesian(xlim = c(0, 100), ylim = c(0, 100))
`geom_smooth()` using formula = 'y ~ x'

Themes

theme_gray (default)

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip() +
  theme_gray()

theme_classic

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip() +
  theme_classic()

theme_minimal

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip() +
  theme_minimal()