scope.qmd

---
title: "Scoping Review Report"
date: "`r Sys.Date()`"
format: html
---


```{r include = FALSE}
knitr::opts_chunk$set(echo = FALSE, dpi = 600)
```


```{r setup, include = FALSE, echo = FALSE}
if(!require(pacman)) install.packages("pacman")
pacman::p_load(
  tidyverse,
  janitor,
  inspectdf,
  here,
  readxl,
  flextable,
  ggplot2,
  dplyr,
  tidyr,
  colorspace
)

# Load data
fulldata <- read_excel(here("fulldata.xlsx"))

## Data cleaning
# Extract year from the author_year column
fulldata$year <- as.numeric(sub(".*\\((\\d{4})\\).*", "\\1", fulldata$author_year))

# Ensure data is cleaned  by removing all line breaks
fulldata[] <- lapply(fulldata, function(x) gsub("\n|\r", "", x))

# Change all values to lower case
fulldata[] <- lapply(fulldata, tolower)

# Direct and proxy
fulldata <- fulldata %>%
  mutate(direct = ifelse(direct == 1, "Direct outcomes", "Proxy outcomes"))

# Cleaning values
fulldata$study_design_short <- gsub("post_only", "post only", fulldata$study_design_short)
fulldata$outcomes_specific <- gsub("type-i error reduction", "type-I error reduction", fulldata$outcomes_specific)

## Re-ordering
# Re-ordering the study design
fulldata$study_design_short <- factor(fulldata$study_design_short)
new_order_studydesign <- levels(fulldata$study_design_short)[c(1,5,3,4,2)]
fulldata$studydesign_reordered <- factor(fulldata$study_design_short, levels = new_order_studydesign)

# Re-ordering the levels of 'outcomes_class'
fulldata$outcomes_class <- factor(fulldata$outcomes_class)
new_order_outcomes_class <- levels(fulldata$outcomes_class)[c(1,7,6,4,5,2,8,3)]
fulldata$outcomes_class_reordered <- factor(fulldata$outcomes_class, levels = new_order_outcomes_class)

# Re-ordering the levels of 'outcomes_specific'
fulldata$outcomes_specific <- factor(fulldata$outcomes_specific)
new_order_outcomes_specific <- levels(fulldata$outcomes_specific)[c(7,15,14,16,17,11,1,6,13,9,4,8,5,3,2,12,10)]
fulldata$outcomes_specific_reordered <- factor(fulldata$outcomes_specific, levels = new_order_outcomes_specific)

# Reordering the levels of 'interventions_class'
fulldata$intervention_class <- factor(fulldata$intervention_class)
new_order_intervention_class <- levels(fulldata$intervention_class)[c(4,5,1,3,2,6,7)]
fulldata$intervention_class_reordered <- factor(fulldata$intervention_class, levels = new_order_intervention_class)

# Reordering the levels of 'interventions_specific'
fulldata$intervention_specific <- factor(fulldata$intervention_specific)
new_order_intervention_specific <- levels(fulldata$intervention_specific)[c(5,9,11,10,13,12,8,3,1,14,4,6,15,2,7)]
fulldata$intervention_specific_reordered <- factor(fulldata$intervention_specific, levels = new_order_intervention_specific)

```


# Graph 1: Evidence Gap Map 1

## Class of interventions vs subclass of outcomes

```{r egm1, fig.height=6, fig.width=8}
# Tile 1: Summary of intervention class and outcome specific with count of author_year
tile_1 <- fulldata %>%
  group_by(intervention_class_reordered, outcomes_specific_reordered) %>%
  summarise(count_author_year_tile = n()) %>%
  ungroup() %>%
  complete(intervention_class_reordered, outcomes_specific_reordered, fill = list(count_author_year_tile = 0))

# Bubble 1: Summary of intervention class, outcome specific, and study design with count of author_year
bubble_1 <- fulldata %>%
  group_by(intervention_class_reordered, outcomes_specific_reordered, studydesign_reordered) %>%
  summarise(count_author_year_bubble = n()) %>%
  ungroup() %>%
  filter(!is.na(studydesign_reordered))

# Evidence Gap Map 1 (egm1)
ggplot(tile_1, aes(x = outcomes_specific_reordered, y = intervention_class_reordered)) +
  geom_tile(aes(fill = count_author_year_tile), color = "grey") +
  geom_point(data = bubble_1, aes(size = count_author_year_bubble, color = studydesign_reordered), 
             alpha = 0.6, position = position_jitterdodge(jitter.width = 0.2, 
                                                          dodge.width = 0.5, seed = 6554)) +
  scale_y_discrete(limits=rev) + 
  scale_size_continuous(range = c(3, 10),
                        breaks = c(1, 3, 5, 10, 20), 
                        labels = function(x) round(x, 0)) + 
  scale_colour_discrete_qualitative() +
  scale_fill_gradient(low = "white", high = "steelblue") +
  labs(x = "Sub-class of outcomes",
       y = "Class of interventions",
       fill = "Total studies",
       size = "Number of studies per study design",
       color = "Study design") +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 12),
        legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),) +
  guides(size = guide_legend(nrow = 2,
                             title = "Number of studies\nper study design"),
         colour = guide_legend(nrow = 3, override.aes = list(size = 5)))

```
Caption: Notes on study design: 
       between = comparative (between-subject comparison)
       within = comparative (within-subject comparison/repeated measures design)
       post only = posttest design (only a post measurement after the implementation of an intervention and the intervention is explicitly mentioned)
       other = other designs

# Graph 2: Evidence Gap Map 2

## Class of interventions vs class of outcomes

```{r egm2, fig.height=7, fig.width=12}
# Tile 2: Summary of intervention class and outcomes class with count of author year
tile_2 <- fulldata %>% 
  group_by(intervention_class_reordered, outcomes_class_reordered, direct) %>% 
  summarise(total_author_year_count = n(), .groups = 'drop')

tile_2 <- tile_2 %>% # replace missing values with 0 in total summary
  complete(intervention_class_reordered, outcomes_class_reordered, direct, fill = list(total_author_year_count = 0))

# Bubble 2: Summary of intervention class, outcome_class, and study_design with count of author_year
df_count2 <- fulldata %>% 
  group_by(intervention_class_reordered, outcomes_class_reordered, studydesign_reordered, direct) %>% 
  summarise(author_year_count = n(), .groups = 'drop')

bubble_2 <- df_count2 %>% 
  filter(author_year_count > 0)

# Evidence gap map 2 (EGM 2)
ggplot(tile_2, aes(x = outcomes_class_reordered, y = intervention_class_reordered)) +
  geom_tile(aes(fill = total_author_year_count), color = "grey", alpha = 0.6) +
  geom_point(data = bubble_2, aes(size = author_year_count, color = studydesign_reordered),
             position = position_jitterdodge(jitter.width = 0.2, 
                                                        dodge.width = 0.5, seed = 6554)) +
  scale_y_discrete(limits=rev) +
  scale_size_continuous(range = c(3, 10),
                        breaks = c(1, 3, 5, 10, 20), 
                        labels = function(x) round(x, 0)) + 
  scale_colour_discrete_qualitative() +
  scale_fill_gradient(low = "white", high = "steelblue") +
  labs(x = "Class of outcomes",
       y = "Class of interventions",
       fill = "Total studies",
       size = "Number of studies per study design",
       color = "Study design") +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 12),
        legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text = element_text(size = 12)) +
  facet_wrap(~direct, scales = "free_x") +
  guides(color = guide_legend(order = 1, nrow = 3),
         size = guide_legend(order = 2, nrow = 2,
                             title = "Number of studies\nper study design"),
         fill = guide_colorbar(order=3))

```
Caption: The left pane shows direct reproducibility outcomes, while the right pane shows proxy outcomes. Study designs: between = comparative (between-subject comparison) within = comparative (within-subject comparison/repeated measures design) post only = post-intervention (only a post measurement after the implementation of an intervention and the intervention is explicitly mentioned) other = other designs

# Graph 3: Author stated effect

## Sub class of interventions vs sub class of outcomes

```{r egm3, fig.height=6, fig.width=8}
# Tile 3: Summary of intervention specific and outcomes specific with count of author_year
tile_3 <- fulldata %>%
  group_by(intervention_specific_reordered, outcomes_specific_reordered) %>%
  summarise(count_author_year_tile = n()) %>%
  ungroup() %>%
  complete(intervention_specific_reordered, outcomes_specific_reordered, fill = list(count_author_year_tile = 0))

# Bubble 3: Summary of intervention class, outcome specific, and author stated effect with count of author_year
bubble_3 <- fulldata %>%
  group_by(intervention_specific_reordered, outcomes_specific_reordered, author_stated_effect) %>%
  summarise(count_author_year_bubble = n()) %>%
  ungroup() %>%
  filter(!is.na(author_stated_effect))

## Evidence graph based on author stated effect
ggplot(tile_3, aes(x = outcomes_specific_reordered, y = intervention_specific_reordered)) +
  geom_tile(aes(fill = count_author_year_tile), color = "grey") +
  geom_point(data = bubble_3, aes(size = count_author_year_bubble, color = author_stated_effect), 
             alpha = 0.6, position = position_jitterdodge(jitter.width = 0.2, 
                                                          dodge.width = 0.6, seed = 6554)) +
  scale_y_discrete(limits = rev) +
  scale_size_continuous(range = c(3, 10),
                        breaks = c(1, 3, 5, 10, 20), 
                        labels = function(x) round(x, 0)) + 
  scale_colour_discrete_qualitative() +
  scale_fill_gradient(low = "white", high = "lightskyblue") +
  labs(x = "Sub-class of outcomes",
       y = "Sub-class of interventions",
       fill = "Total studies",
       size = "Number of studies per author stated effect",
       color = "Author stated effect") +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 12),
        legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  scale_color_manual(values = c("generally positive" = "darkgreen", "generally negative" = "firebrick", "null/neutral" = "dimgrey"),
                     breaks = c("generally positive", "generally negative", "null/neutral")) +
  guides(color = guide_legend(order = 1, nrow = 3),
         size = guide_legend(order = 2, nrow = 2,
                             title = "Number of studies\nper author stated effect"),
         fill = guide_colorbar(order=3))

```