library("stringr")
library("plyr")
library("dplyr")
library("ggplot2")
library("tidyr")

# Set working directories
## Assuming that 'YOURPATH' is the directory where
## 'ReplicationFiles.zip' was unpacked.
loc <- "YOURPATH/ReplicationFiles/R_Coverage_Replication/"
## Assuming that R is run from the working directory is where the script is
## located
loc <- "../"

## ----No Cross level interactions-------------------
# Read the data
NoCLI <- readRDS(str_c(loc,'RSimulations/NoCrossLevelAggregate_GO.rds'))

## Figure 2
# draw info needed
simsag           <- NoCLI[, c("icc", "cli", "clusts", "estimator", "link", "biasVCcons", "min95biasVCcons", "max95biasVCcons")]
colnames(simsag) <- c("icc", "cli", "clusts", "estimator", "link", "depvar", "min95", "max95")

# Some re-naming for the plot
simsag$estimator[simsag$estimator == "ML (lme4::glmer)"]       <- "ML"
simsag$estimator[simsag$estimator == "REML/EQL (hglm::hglm2)"] <- "REML"
simsag$icc[simsag$icc == 5]  <- "ICC = 0.05"
simsag$icc[simsag$icc == 10] <- "ICC = 0.1"
simsag$icc[simsag$icc == 15] <- "ICC = 0.15"

# We do not consider conditions with compositional differences at this point

# Results for all levels of ICC (5, 10, and 15%)
plotdata <- simsag %>% filter(link != "Linear05")

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, ymin = min95, ymax = max95, shape = estimator, color = estimator, lty = estimator)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
    geom_line() +
    scale_linetype_manual(
      values = c("dashed", "solid"),
      label = c("ML", "REML")) +
    geom_pointrange(lty = "solid") +
    scale_shape_manual(
      values = c(15, 19),
      label = c("ML", "REML")) +
    scale_color_manual(
      values = c("#3f3f3f", "#000000"),
      label = c("ML", "REML")) +
    facet_grid(icc ~ link) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Bias", limits =c(-45, 7), breaks = c(5, 0, -5, -10, -20, -30, -40)) +
    guides(shape = guide_legend(title = "", ncol = 2), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-6.3, 0.1),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(.2, .05))

pdf(file = str_c(loc,"Figures/Figure2_WithAdditionalICCvalues.pdf"), width = 8.3, height = 9.6, family = "Helvetica")
    print(plot)
dev.off()

# Figure 2: Only results for ICC of 10 %
plotdata <- simsag %>% filter(link != "Linear05", icc == "ICC = 0.1")

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, ymin = min95, ymax = max95, shape = estimator, color = estimator, lty = estimator)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
    geom_line() +
    scale_linetype_manual(
      values = c("dashed", "solid"),
      label = c("ML", "REML")) +
    geom_pointrange(lty = "solid") +
    scale_shape_manual(
      values = c(15, 19),
      label = c("ML", "REML")) +
    scale_color_manual(
      values = c("#3f3f3f", "#000000"),
      label = c("ML", "REML")) +
    facet_grid(. ~ link) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Bias", limits =c(-45, 7), breaks = c(5, 0, -5, -10, -20, -30, -40)) +
    guides(shape = guide_legend(title = "", ncol = 2), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-6.3, 0.1),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(.2, .05))

pdf(file = str_c(loc,"Figures/Figure2.pdf"), width = 8.3, height = 5, family = "Helvetica")
    print(plot)
dev.off()

## Figure 3
# Reshape to long for plotting (through subsetting and rbind)

# 1. Normal based
simsag.cov95norm           <- NoCLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95norm.z")]
colnames(simsag.cov95norm) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.cov95norm$quantity  <- "Normal"

# m-l-1 based
simsag.ml1           <- NoCLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.z")]
colnames(simsag.ml1) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1$quantity  <- "Student's t (m-l-1 rule)"

# Combine both
simsag <- as.data.frame(rbind(simsag.cov95norm, simsag.ml1))

# Some re-naming fo the plot
simsag$estimator[simsag$estimator == "ML (lme4::glmer)"]       <- "ML"
simsag$estimator[simsag$estimator == "REML/EQL (hglm::hglm2)"] <- "REML"

simsag$quantity <- paste(simsag$estimator, simsag$quantity, sep = "")
simsag$quantity <- as.factor(simsag$quantity)
simsag$quantity <- factor(simsag$quantity, levels(as.factor(simsag$quantity))[c(4, 3, 2, 1)])

# Only ICC 0.1 and no compositional differences
plotdata <- simsag %>% filter(icc == 10 & link != "Linear05")

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, shape = quantity, color = quantity, lty = quantity)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * -qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
    geom_line() +
    geom_point(size=2.5) +
    scale_shape_manual(
      values = c(19, 1, 15, 0),
      label = c("REML: Student's t (m-l-1 rule)", "REML: Normal",
      "ML: Student's t (m-l-1 rule)", "ML: Normal")) +
    scale_linetype_manual(
      values = c("solid", "dashed", "solid", "dashed"),
      label = c("REML: Student's t (m-l-1 rule)", "REML: Normal",
       "ML: Student's t (m-l-1 rule)", "ML: Normal")) +
    scale_color_manual(
      values = c("#000000", "#3f3f3f", "#000000", "#3f3f3f"),
      label = c("REML: Student's t (m-l-1 rule)", "REML: Normal",
       "ML: Student's t (m-l-1 rule)", "ML: Normal")) +
    facet_grid(. ~ link) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Actual minus nominal coverage rate (%)", breaks = c(-15, -10, -5, -1, 0, 1, 5, 10, 15), limits =c(-18, 5)) +
    guides(shape = guide_legend(title = "", ncol = 1), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-1.95, 0),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(.1, 0))

pdf(file = str_c(loc,"Figures/Figure3.pdf"), width = 8.3, height = 5, family = "Helvetica")
    print(plot)
dev.off()

## Figure 4
# Reshape to long for plotting (through subsetting and rbind)

# m-l-1
simsag.ml1     <- NoCLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.z")]
colnames(simsag.ml1) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1$quantity <- "Student's t (G-k)"

# Satterthwaite
simsag.sat       <- NoCLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95sat.z")]
colnames(simsag.sat) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.sat$quantity <- "cov95sat.z"

# Combine all three
simsag <- rbind(simsag.ml1, simsag.sat)

# Focus on the linear case
simsag <- simsag[simsag$estimator != "ML" & simsag$link != "Probit", ]

# Some re-namings and re-codings
simsag$quantity <- as.factor(simsag$quantity)
simsag$quantity <- factor(simsag$quantity, levels(as.factor(simsag$quantity))[c(2, 1)])

# Add minor deviation around x for the plot
simsag$clusts[simsag$quantity == "cov95sat.z"] <- simsag$clusts[simsag$quantity == "cov95sat.z"] - 0.7

# Focus on icc 0.1 and linear models
plotdata <- simsag %>% filter(icc == 10 & link != "Probit")

# More Re-naming
plotdata$compo <- plotdata$link
plotdata$compo[plotdata$compo == "Linear"]   <- "Lower-level covariate with no compositional differences"
plotdata$compo[plotdata$compo == "Linear05"] <- "Lower-level covariate with 50% compositional differences"
plotdata$compo <- factor(plotdata$compo, levels(as.factor(plotdata$link))[c(2, 1)])
plotdata <- plotdata %>% filter(link == "Linear")

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, shape = quantity, color = quantity, lty = quantity)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * -qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
    geom_line() +
    geom_point(size=2.5) +
    scale_shape_manual(
      values = c(16, 1),
      label = c("m-l-1 rule", "Satterthwaite")) +
    scale_linetype_manual(
      values = c("solid", "solid"),
      label = c("m-l-1 rule", "Satterthwaite")) +
    scale_color_manual(
      values = c("#000000", "#696969"),
      label = c("m-l-1 rule", "Satterthwaite")) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Actual minus nominal coverage rate (%)", breaks = c(0.75, 0.5, 0.25, 0, -0.25, -0.5, -0.75), limits = c(-0.75, 0.75)) +
    facet_grid(. ~ link) +
    guides(shape = guide_legend(title = "", ncol = 1), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-1.9, 0.15),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(0.02, .025))
pdf(file = str_c(loc,"Figures/Figure4.pdf"), width = 5, height = 5, family = "Helvetica")
    print(plot)
dev.off()

## ---- Cross level interaction results for online appendix-------------------
# Read the data
CLI <- readRDS(str_c(loc,'RSimulations/CrossLevelAggregate_GO.rds'))

## Figure B2
# Reshape to long by hand
# 1. the random intercept
simsag.cons           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "biasVCcons", "min95biasVCcons", "max95biasVCcons")]
colnames(simsag.cons) <- c("icc", "cli", "clusts", "estimator", "link", "depvar", "min95", "max95")
simsag.cons$VC        <- "Intercept"
# 2. The random slope
simsag.x              <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "biasVCx", "min95biasVCx", "max95biasVCx")]
colnames(simsag.x)    <- c("icc", "cli", "clusts", "estimator", "link", "depvar", "min95", "max95")
simsag.x$VC           <- "Slope of x"
# Combine both
simsag <- rbind(simsag.cons, simsag.x)

# Some re-naming for the plot
simsag$estimator[simsag$estimator == "ML (lme4::glmer)"] <- "ML"
simsag$estimator[simsag$estimator == "REML/EQL (hglm::hglm2)"] <- "REML"

simsag$izz[simsag$icc == 5]  <- "ICC = 0.05"
simsag$izz[simsag$icc == 10] <- "ICC = 0.1"
simsag$izz[simsag$icc == 15] <- "ICC = 0.15"

# No compositional differences
plotdata <- simsag %>% filter(link != "Linear05")

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, ymin = min95, ymax = max95, shape = estimator, color = estimator, lty = estimator)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
    geom_line() +
    scale_linetype_manual(
      values = c("dashed", "solid"),
      label = c("ML", "REML")) +
    geom_pointrange(lty = "solid") +
    scale_shape_manual(
      values = c(15, 19),
      label = c("ML", "REML")) +
    scale_color_manual(
      values = c("#3f3f3f", "#000000"),
      label = c("ML", "REML")) +
    facet_grid(izz + VC ~ link) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Bias", limits =c(-45, 8), breaks = c(5, 0, -5, -10, -20, -30, -40)) +
    guides(shape = guide_legend(title = "", ncol = 2), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-6.1, 0.1),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(0.125, .01))
pdf(file = str_c(loc,"Figures/FigureB2.pdf"), width = 8.3, height = 10.6, family = "Helvetica")
    print(plot)
dev.off()

## Figure B3
# Reshape to long by hand
# 1. Normal based w effect (here still called z based on earlier convention of the authors)
simsag.cov95norm.z           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95norm.z")]
colnames(simsag.cov95norm.z) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.cov95norm.z$coef      <- "Beta z"
simsag.cov95norm.z$quantity  <- "Normal"
# 2. m-l-1 based w effect
simsag.ml1.z           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.z")]
colnames(simsag.ml1.z) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1.z$coef      <- "Beta z"
simsag.ml1.z$quantity  <- "Student t"

# 3. Normal based x effect
simsag.cov95norm.x           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95norm.x")]
colnames(simsag.cov95norm.x) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.cov95norm.x$coef      <- "Beta x"
simsag.cov95norm.x$quantity  <- "Normal"
# 4. m-l-1 based x effect
simsag.ml1.x           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.x")]
colnames(simsag.ml1.x) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1.x$coef      <- "Beta x"
simsag.ml1.x$quantity  <- "Student t"

# 5. Normal based x*w (cross-level interaction) effect
simsag.cov95norm.x.z           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95norm.x.z")]
colnames(simsag.cov95norm.x.z) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.cov95norm.x.z$coef      <- "Beta x.z"
simsag.cov95norm.x.z$quantity  <- "Normal"
# 6. m-l-1 based x*w (cross-level interaction) effect
simsag.ml1.x.z           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.x.z")]
colnames(simsag.ml1.x.z) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1.x.z$coef      <- "Beta x.z"
simsag.ml1.x.z$quantity  <- "Student t"

# Combine all six
simsag <- rbind(simsag.cov95norm.z, simsag.cov95norm.x, simsag.cov95norm.x.z, simsag.ml1.z, simsag.ml1.x, simsag.ml1.x.z)

# Some r-namings
simsag$estimator[simsag$estimator == "ML (lme4::glmer)"] <- "ML"
simsag$estimator[simsag$estimator == "REML/EQL (hglm::hglm2)"] <- "REML"

simsag$coef <- factor(simsag$coef)
simsag$coef <- factor(simsag$coef, labels = c("beta[x]", "beta[x*w]", "beta[w]"))

simsag$quantity <- paste(simsag$estimator, simsag$quantity, sep = "")
simsag$quantity <- as.factor(simsag$quantity)
simsag$quantity <- factor(simsag$quantity, levels(as.factor(simsag$quantity))[c(4, 3, 2, 1)])

# Focus on icc = 0.1 and no compositional differences
plotdata <- simsag %>% filter(icc == 10, link != "Linear05")

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, shape = quantity, color = quantity, lty = quantity)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * -qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
    geom_line() +
    geom_point(size=2.5) +
    scale_shape_manual(
      values = c(19, 1, 15, 0),
      label = c("REML: Student's t (m-l-1 rule)", "REML: Normal",
      "ML: Student's t (m-l-1 rule)", "ML: Normal")) +
    scale_linetype_manual(
      values = c("solid", "dashed", "solid", "dashed"),
      label = c("REML: Student's t (m-l-1 rule)", "REML: Normal",
       "ML: Student's t (m-l-1 rule)", "ML: Normal")) +
    scale_color_manual(
      values = c("#000000", "#3f3f3f", "#000000", "#3f3f3f"),
      label = c("REML: Student's t (m-l-1 rule)", "REML: Normal",
       "ML: Student's t (m-l-1 rule)", "ML: Normal")) +
    facet_grid(coef ~ link, labeller = label_parsed) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Bias", breaks = c(-15, -10, -5, 0, 5, 10, 15)) +
    guides(shape = guide_legend(title = "", ncol = 1), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-1.83, 0),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(.15, 0))
pdf(file = str_c(loc,"Figures/FigureB3.pdf"), width = 8.3, height = 9.6, family = "Helvetica")
    print(plot)
dev.off()

## Figure B4
# Reshape to long by hand
# 1. m-l-1 based w effect (here still called z based on earlier convention of the authors)
simsag.ml1.z             <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.z")]
colnames(simsag.ml1.z)   <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1.z$quantity    <- "m-l-1 rule"
simsag.ml1.z$coef        <- "Beta z"
# 2. Satterthwaite based w effect
simsag.sat.z               <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95sat.z")]
colnames(simsag.sat.z)     <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.sat.z$quantity      <- "Satterthwaite"
simsag.sat.z$coef          <- "Beta z"

# 3. m-l-1 based x effect
simsag.ml1.x             <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.x")]
colnames(simsag.ml1.x)   <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1.x$quantity    <- "m-l-1 rule"
simsag.ml1.x$coef        <- "Beta x"
# 4. Satterthwaite based x effect
simsag.sat.x               <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95sat.x")]
colnames(simsag.sat.x)     <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.sat.x$quantity      <- "Satterthwaite"
simsag.sat.x$coef          <- "Beta x"

# 5. m-l-1 based x*w (cross-level interaction) effect
simsag.ml1.x.z           <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95MminL.x.z")]
colnames(simsag.ml1.x.z) <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.ml1.x.z$quantity  <- "m-l-1 rule"
simsag.ml1.x.z$coef      <- "Beta x:z"
# 6. Satterthwaite based x*w (cross-level interaction) effect
simsag.sat.x.z             <- CLI[, c("icc", "cli", "clusts", "estimator", "link", "cov95sat.x.z")]
colnames(simsag.sat.x.z)   <- c("icc", "cli", "clusts", "estimator", "link", "depvar")
simsag.sat.x.z$quantity    <- "Satterthwaite"
simsag.sat.x.z$coef        <- "Beta x:z"

# Combine all six
simsag <- rbind(simsag.ml1.z, simsag.ml1.x, simsag.ml1.x.z,
  simsag.sat.z, simsag.sat.x, simsag.sat.x.z)

# Some re-namings
simsag$estimator[simsag$estimator == "ML (lme4::glmer)"]       <- "ML"
simsag$estimator[simsag$estimator == "REML/EQL (hglm::hglm2)"] <- "REML"

simsag$quantity <- factor(simsag$quantity, levels(as.factor(simsag$quantity))[c(2, 1, 3)])
simsag$coef     <- factor(simsag$coef, labels = c("beta[x]", "beta[x*w]", "beta[w]"))

# Focus on icc = 0.1 and on linear models
plotdata <- simsag %>% filter(icc == 10, link != "Probit", estimator != "ML")

# Some final re-namings
plotdata$link[plotdata$link == "Linear"]   <- "Lower-level~covariate~with~no~compositional~differences"
plotdata$link[plotdata$link == "Linear05"] <- "Lower-level~covariate~with~50*'%'~compositional~differences"
plotdata$link <- factor(plotdata$link, levels(as.factor(plotdata$link))[c(2, 1)])

plot <- ggplot(data = plotdata, aes(x = clusts, y = depvar, shape = quantity, color = quantity, lty = quantity)) +
  geom_hline(yintercept = 0, lty = 'solid', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
  geom_hline(yintercept = (100 * -qnorm(.975) * sqrt((0.95 * (1 - 0.95)) / 5000)), lty = 'dashed', size = 0.4, color = "#000000") +
    geom_line() +
    geom_point(size=2.5) +
    scale_shape_manual(
      values = c(16, 1),
      label = c("m-l-1 rule", "Satterthwaite")) +
    scale_linetype_manual(
      values = c("solid", "solid"),
      label = c("m-l-1 rule", "Satterthwaite")) +
    scale_color_manual(
      values = c("#000000", "#696969"),
      label = c("m-l-1 rule", "Satterthwaite")) +
    facet_grid(coef ~ link, labeller = label_parsed) +
    scale_x_continuous("Number of upper-level units (e.g., countries)", breaks = c(5, 10, 15, 20, 25, 30)) +
    scale_y_continuous("Bias", breaks = c(1, 0.75, 0.5, 0.25, 0, -0.25, -0.5, -0.75, -1), limits = c(-1, 1)) +
    guides(shape = guide_legend(title = "", ncol = 1), lty = guide_legend(title = ""), color = guide_legend(title = "", ncol = 2)) +
    theme_bw() +
    theme(panel.grid.minor = element_line(colour="white"),
    legend.justification= c(-4.1, 8.65),
    legend.box = "vertical",
    legend.background = element_rect(fill=NA, color=NA),
    legend.key        = element_rect(fill =NA, color =NA),
    strip.background = element_rect(fill = "#f1f1f1", color = NA)) +
    theme(legend.position=c(.15, .84))
pdf(file = str_c(loc,"Figures/FigureB4.pdf"), width = 8.5, height = 9.6, family = "Helvetica")
    print(plot)
dev.off()
