I am trying to visualize continuous data points across two Time points (pre and post intervention) with three parallel Conditions (CET, RES, END), and Response to intervention (High or Low, i.e., CET_Hi, CET_Lo, etc)
I'd like to create a bar graph showing the mean output for each Condition on the X-axis, with separate bars for Time (Pre and Post). Then, I'd like to overlay the individual Subject data points at Pre and Post with lines to connect the Subjects data points and have the Responses grouped by color.
I have successfully created the bar graph using ggplot2 with the geom_bar function. I have also got geom_point to overlay the individual points by condition, but can't get the position aligned with the Time.
ggplot(Leg_Press_Summary, aes(x=Condition, y=Leg_Press, fill=as.factor(Time))) + 
geom_bar(stat="identity", position=position_dodge()) + 
scale_fill_brewer(palette="Blues", name = "Time", labels = c("Pre", "Post")) +
geom_point(data=Phys_Data, aes(x=Condition, y=Leg_Press, colour=Response, fill=as.factor(Time))) +
geom_line(data=Phys_Data, aes(x=Condition, y=Leg_Press, group=Subject)) + 
labs(title="Leg Press", x="Condition", y ="Leg Press (kg)")
I expected the geom_points to be positioned according to Time, however, they points just stack in a vertical line between the Pre and Post bars for each condition.
My result:

Figure I'm trying to recreate:

How can I fix this?
Data set included below, which I forgot to include in original post.
LegPress
# A tibble: 36 x 5
Subject  Time Condition Response Leg_Press
6     1 CET       CET_Hi        212.
6     2 CET       CET_Hi        300 
9     1 CET       CET_Lo        350 
9     2 CET       CET_Lo        370 
14     1 CET       CET_Hi        330 
14     2 CET       CET_Hi        450 
26     1 CET       CET_Hi        180 
26     2 CET       CET_Hi        250 
28     1 CET       CET_Lo        230 
28     2 CET       CET_Lo        275 
29     1 CET       CET_Lo        330 
29     2 CET       CET_Lo        325 
2     1 RES       RES_Hi        142.
2     2 RES       RES_Hi        225 
16     1 RES       RES_Lo        280 
16     2 RES       RES_Lo        320 
19     1 RES       RES_Hi        205 
19     2 RES       RES_Hi        295 
27     1 RES       RES_Hi        175 
27     2 RES       RES_Hi        260 
31     1 RES       RES_Lo        340 
31     2 RES       RES_Lo        370 
32     1 RES       RES_Lo        310 
32     2 RES       RES_Lo        370 
8     1 END       END_Lo        205 
8     2 END       END_Lo        250 
13     1 END       END_Hi        310 
13     2 END       END_Hi        320 
20     1 END       END_Hi        200 
20     2 END       END_Hi        185 
24     1 END       END_Lo        260 
24     2 END       END_Lo        270 
25     1 END       END_Hi        210 
25     2 END       END_Hi        235 
30     1 END       END_Lo        250 
30     2 END       END_Lo        245 
It think this is a case where you want to use faceting:
library(tidyverse)  
Phys_Data <- data.frame(stringsAsFactors=FALSE,
     Subject = c(6, 6, 9, 9, 14, 14, 26, 26, 28, 28, 29, 29, 2, 2, 16, 16, 19,
                 19, 27, 27, 31, 31, 32, 32, 8, 8, 13, 13, 20, 20, 24, 24, 25,
                 25, 30, 30),
        Time = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
                 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2),
   Condition = c("CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET",
                 "CET", "CET", "CET", "RES", "RES", "RES", "RES", "RES", "RES",
                 "RES", "RES", "RES", "RES", "RES", "RES", "END", "END", "END",
                 "END", "END", "END", "END", "END", "END", "END", "END", "END"),
    Response = c("CET_Hi", "CET_Hi", "CET_Lo", "CET_Lo", "CET_Hi", "CET_Hi",
                 "CET_Hi", "CET_Hi", "CET_Lo", "CET_Lo", "CET_Lo", "CET_Lo",
                 "RES_Hi", "RES_Hi", "RES_Lo", "RES_Lo", "RES_Hi", "RES_Hi",
                 "RES_Hi", "RES_Hi", "RES_Lo", "RES_Lo", "RES_Lo", "RES_Lo", "END_Lo",
                 "END_Lo", "END_Hi", "END_Hi", "END_Hi", "END_Hi", "END_Lo",
                 "END_Lo", "END_Hi", "END_Hi", "END_Lo", "END_Lo"),
   Leg_Press = c(212, 300, 350, 370, 330, 450, 180, 250, 230, 275, 330, 325,
                 142, 225, 280, 320, 205, 295, 175, 260, 340, 370, 310, 370,
                 205, 250, 310, 320, 200, 185, 260, 270, 210, 235, 250, 245)
)
Phys_Data %>%
  mutate(
    Time = as.factor(Time),
    Response = str_split_fixed(Response, "_", 2)[,2]
  ) %>%
  ggplot(aes(x=Time, y=Leg_Press, fill=Time)) +
    facet_wrap(~Condition, strip.position = "bottom") +
    geom_col(
      data = ~group_by(.x, Time, Condition) %>%
        summarize(Leg_Press = mean(Leg_Press)) %>%
        ungroup()
    ) +
    scale_fill_brewer(palette="Blues", name = "Time", labels = c("Pre", "Post")) +
    geom_point(aes(color=Response)) +
    geom_line(aes(color=Response, group=Subject)) +
    labs(title="Leg Press", x = "Condition", y ="Leg Press (kg)") +
    theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )

Created on 2019-09-04 by the reprex package (v0.3.0)
Loading packages:
library(dplyr); library(tidyr); library(ggplot2)
Setting up example data based loosely on your graphics:
set.seed(4)
df <- data.frame(Time = rep(rep(c("pre", "post"), each=20),3),
                 Condition = rep(c("CET", "END", "RES"), each=40),
                 Leg_Press = c(rnorm(20, 275, 20), rnorm(20, 325, 20), rnorm(20, 245, 20), rnorm(320, 251, 20), rnorm(20, 247, 10), rnorm(320, 305, 10)))
Generate a summary table of mean, min and max value for each condition and time period:
dat <- df %>% group_by(Time, Condition) %>% summarise(mean = mean(Leg_Press), max = max(Leg_Press), min = min(Leg_Press))
dat$Time <- factor(dat$Time, level=c("pre", "post"))
# # A tibble: 6 x 5
# # Groups:   Time [2]
#   Time  Condition  mean   max   min
#   <fct> <fct>     <dbl> <dbl> <dbl>
# 1 post  CET        283.  373.  209.
# 2 post  END        277.  329.  200.
# 3 post  RES        278.  328.  215.
# 4 pre   CET        273.  326.  191.
# 5 pre   END        276.  323.  197.
# 6 pre   RES        276.  329.  204.
Simple bar-plot for Leg Press by Condition, separated into pre and post time periods:
ggplot(dat, aes(Condition, mean, fill=Time)) +
      geom_col(position="dodge")

Calculate the new x-value for each point for maximum and minimum value:
dat <- dat %>% mutate(new.x = ifelse(Time == "pre", -0.25, 0.25) + as.numeric(as.factor(Condition)))
ggplot(data=dat) +
  geom_col(aes(Condition, mean, fill=Time), position="dodge") +
  geom_point(aes(x=new.x, y=max)) +
  geom_point(aes(x=new.x, y=min)) 

To draw the line for each group, you need a data-frame for each set of maximum and minimum values.
max.frame <- dat %>% 
     group_by(Condition) %>% 
     mutate(t2 = Time) %>% 
     spread(Time, max) %>% 
     summarise(x1 = min(new.x), x2 = max(new.x), y1 = mean(pre, na.rm=T), y2 = mean(post, na.rm=T))
# # A tibble: 3 x 5
#   Condition    x1    x2    y1    y2
#   <fct>     <dbl> <dbl> <dbl> <dbl>
# 1 CET        0.75  1.25  326.  373.
# 2 END        1.75  2.25  323.  329.
# 3 RES        2.75  3.25  329.  328.
min.frame <- dat %>% 
      group_by(Condition) %>% 
      mutate(t2 = Time) %>% 
      spread(Time, min) %>% 
      summarise(x1 = min(new.x), x2 = max(new.x), y1 = mean(pre, na.rm=T), y2 = mean(post, na.rm=T))
# # A tibble: 3 x 5
#   Condition    x1    x2    y1    y2
#   <fct>     <dbl> <dbl> <dbl> <dbl>
# 1 CET        0.75  1.25  191.  209.
# 2 END        1.75  2.25  197.  200.
# 3 RES        2.75  3.25  204.  215.
Plot based on the three frames:
ggplot() +
  geom_col(data=dat, aes(Condition, mean, fill=Time), position="dodge") +
  geom_segment(data=max.frame, aes(x=x1, y=y1, xend = x2, yend = y2)) +
  geom_segment(data=min.frame, aes(x=x1, y=y1, xend = x2, yend = y2)) +
  geom_point(data=dat, aes(x=new.x, y=max)) +
  geom_point(data=dat, aes(x=new.x, y=min)) 

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With