-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from ntluong95/causal-chain-refactor
Causal chain refactor
- Loading branch information
Showing
18 changed files
with
6,194 additions
and
235 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
RETICULATE_PYTHON=.venv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,4 +17,4 @@ notebooks/.env | |
/_build/ | ||
CRE_workshop_materials.zip | ||
/index_files/ | ||
.venv | ||
.venv.Renviron |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3.12.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
pacman::p_load( | ||
rio, | ||
here, | ||
igraph, | ||
tidygraph, | ||
ggraph, | ||
janitor, | ||
bslib, | ||
shiny, | ||
tidyverse, | ||
viridis | ||
) | ||
|
||
result_df <- import(here("notebooks", "cosine_test_31 Oct.csv")) | ||
|
||
ui <- page_sidebar( | ||
title = "Network Analysis Visualization", | ||
sidebar = sidebar( | ||
selectInput("centrality_method", "Centrality Measure:", | ||
choices = c("Degree" = "degree", | ||
"Betweenness" = "betweenness", | ||
"Closeness" = "closeness"), | ||
selected = "betweenness"), | ||
selectInput("community_method", "Community Detection:", | ||
choices = c("Cosine Similarity" = "cosine", | ||
"Infomap" = "infomap", | ||
"Label Propagation" = "label_prop", | ||
"Leading Eigenvector" = "leading_eigen"), | ||
selected = "infomap"), | ||
selectInput("layout_type", "Graph Layout:", | ||
choices = c("Fruchterman-Reingold" = "fr", | ||
"Kamada-Kawai" = "kk", | ||
"Stress" = "stress", | ||
"Circle" = "circle", | ||
"Grid" = "grid")), | ||
numericInput("small_cluster_threshold", | ||
"Small Cluster Threshold:", | ||
value = 5, | ||
min = 1), | ||
hr(), | ||
helpText("Select different methods to analyze network centrality and community structure") | ||
), | ||
|
||
card( | ||
card_header("Network Visualization"), | ||
plotOutput("network_plot", height = "800px") | ||
) | ||
) | ||
|
||
server <- function(input, output, session) { | ||
|
||
get_cluster_colors <- function(n) { | ||
viridis(n, option = "D") | ||
} | ||
|
||
network_data <- reactive({ | ||
# First create nodes dataframe with cluster information | ||
nodes_df <- result_df %>% | ||
select(Cause_category, Cause_cluster) %>% | ||
rename(name = Cause_category, cluster = Cause_cluster) %>% | ||
bind_rows( | ||
result_df %>% | ||
select(Effect_category, Effect_cluster) %>% | ||
rename(name = Effect_category, cluster = Effect_cluster) | ||
) %>% | ||
# Keep original name and cluster before transformation | ||
mutate( | ||
original_name = name, | ||
original_cluster = cluster, | ||
name = case_when( | ||
str_detect(name, "transmission|spread") ~ "disease transmission", | ||
TRUE ~ name | ||
) | ||
) %>% | ||
# Group by the new name and keep all unique clusters | ||
group_by(name) %>% | ||
mutate( | ||
all_clusters = paste(sort(unique(original_cluster)), collapse = "_") | ||
) %>% | ||
ungroup() %>% | ||
# Keep one row per unique name-cluster combination | ||
distinct(name, all_clusters, .keep_all = TRUE) %>% | ||
filter(!name %in% c("No content", "No context")) | ||
|
||
# Create edges dataframe | ||
edges_df <- result_df %>% | ||
select(Cause_category, Effect_category) %>% | ||
filter(!Cause_category %in% c("No content", "No context")) %>% | ||
mutate(across(c(Cause_category, Effect_category), | ||
~case_when( | ||
str_detect(., "transmission|spread") ~ "disease transmission", | ||
TRUE ~ . | ||
))) | ||
|
||
# Create graph with node attributes | ||
network_recode1 <- tbl_graph(nodes = nodes_df, | ||
edges = edges_df, | ||
directed = TRUE) | ||
|
||
# Calculate centrality and community | ||
network_recode1 <- network_recode1 %>% | ||
mutate( | ||
centrality = case_when( | ||
input$centrality_method == "degree" ~ centrality_degree(), | ||
input$centrality_method == "betweenness" ~ centrality_betweenness(), | ||
input$centrality_method == "closeness" ~ centrality_closeness() | ||
), | ||
community = case_when( | ||
input$community_method == "cosine" ~ as.factor(all_clusters), | ||
input$community_method == "infomap" ~ as.factor(group_infomap()), | ||
input$community_method == "label_prop" ~ as.factor(group_label_prop()), | ||
input$community_method == "leading_eigen" ~ as.factor(group_leading_eigen()) | ||
) | ||
) | ||
|
||
# Only apply small cluster threshold for non-cosine methods | ||
if(input$community_method != "cosine") { | ||
network_recode1 <- network_recode1 %>% | ||
mutate( | ||
community = if_else( | ||
community %in% names(which(table(community) <= input$small_cluster_threshold)), | ||
"Unclassified", | ||
"Cluster" | ||
), | ||
community = as.factor(community) | ||
) | ||
} | ||
|
||
network_recode1 %>% | ||
activate(edges) %>% | ||
mutate(edge_type = if_else(.N()$centrality[from] > 3, | ||
"link to endpoint", | ||
"link between drivers")) | ||
}) | ||
|
||
cluster_colors <- reactive({ | ||
g <- network_data() | ||
|
||
if(input$community_method == "cosine") { | ||
unique_communities <- sort(unique(as.character(V(g)$community))) | ||
n_communities <- length(unique_communities) | ||
colors <- get_cluster_colors(n_communities) | ||
names(colors) <- unique_communities | ||
colors | ||
} else { | ||
c("Unclassified" = "#1b9e77", "Cluster" = "#7570b3") | ||
} | ||
}) | ||
|
||
output$network_plot <- renderPlot({ | ||
g <- network_data() | ||
|
||
centrality_scaled <- scales::rescale(V(g)$centrality, to = c(0.3, 1)) | ||
|
||
ggraph(g, layout = input$layout_type) + | ||
geom_edge_link(aes(color = edge_type), | ||
arrow = arrow(length = unit(4, 'mm'), | ||
type = "closed"), | ||
end_cap = circle(2, 'mm'), | ||
alpha = 0.7, | ||
edge_width = 1) + | ||
geom_node_point(aes(size = centrality, | ||
color = community), | ||
show.legend = TRUE) + | ||
geom_node_text(aes(label = name, | ||
size = centrality, | ||
alpha = centrality), | ||
repel = TRUE) + | ||
scale_color_manual(values = cluster_colors(), | ||
name = "Cluster") + | ||
scale_edge_color_manual(values = c("link between drivers" = "gray", | ||
"link to endpoint" = "#d95f02")) + | ||
scale_size(range = c(3, 10)) + | ||
scale_alpha(range = c(0.3, 1)) + | ||
theme_graph() + | ||
labs(title = paste("Network Analysis using", | ||
tools::toTitleCase(input$centrality_method), | ||
"centrality and", | ||
tools::toTitleCase(input$community_method), | ||
"community detection")) + | ||
guides(alpha = "none") | ||
}) | ||
} | ||
|
||
shinyApp(ui, server) |
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
1. {Persona and task description} | ||
You are an epidemiologist tasked with identifying sentences or phrases from outbreak reports that describe the drivers or contributors to the emergence or transmission of emerging pests and pathogens | ||
|
||
2. {Domain localization, terms’ definition} | ||
|
||
Drivers: underlying socio-economic, environmental, or ecological forces that create conditions favourable for how a disease emerges, spreads or sustains transmission in human, animals or plants. | ||
Pressure: human anthropogenic activities that are mainly responsible for the chances of spillover events and the transmission of pests and pathogens | ||
State: the current circulation of pests and pathogens, represented as either new case detected, an endemic, an epidemic or a pandemic | ||
Impacts: the effects caused by pests and pathogens on individual, community's socio-economic, and political | ||
Responses: the actions and interventions taken by governments to manage the occurrence of drivers and pressures, and to control the spread of pests and pathogens and to mitigate the impacts | ||
|
||
|
||
Causality definition: In the reports, causality can take two forms. The first form is "Intra-sentence causality", the “cause” and the “effect” lie in a single sentence while in "Inter-sentence causality", the “cause” and the “effect” lie in different sentences. | ||
|
||
3. {Few-shot examples describing how drivers can be reported}: Denoting: (C): Cause, (E): Effect | ||
|
||
|
||
- Single cause, single effect (Type 1) | ||
|
||
Example 1: (C1) High population density and mobility in urban areas (C1) have facilitated (E1) the rapid spread of the virus (E1)". | ||
|
||
Example 2: There is (C1) no vaccine for Influenza A(H1N1)v infection currently licensed for use in humans (C1). Seasonal influenza vaccines against human influenza viruses are generally not expected to protect people from (E1) infection with influenza viruses (E1) that normally circulate in pigs, but they can reduce severity. | ||
|
||
|
||
- Single cause, multiple effects (Type 2) | ||
|
||
Example 3: Several countries including Cameroon, Ethiopia, Haiti, Lebanon, Nigeria (north-east of the country), Pakistan, Somalia, Syria and the Democratic Republic of Congo (eastern part of the country) are in the midst of complex (C1) humanitarian crises (C1) with (E1) fragile health systems (E1), (E1) inadequate access to clean water and sanitation (E1) and have (E1) insufficient capacity to respond to the outbreaks (E1) | ||
|
||
- Multiple causes, single effect (Type 3) | ||
Example 4: Moreover, (C1) a low index of suspicion (C1), (C1) socio-cultural norms (C1), (C1) community resistance (C1), (C1) limited community knowledge regarding anthrax transmission (C1), (C1) high levels of poverty (C1) and (C1) food insecurity (C1), (C1) a shortage of available vaccines and laboratory reagents (C1), (C1) inadequate carcass disposal (C1) and (C1) decontamination practices (C1) significantly contribute to hampering (E1) the containment of the anthrax outbreak (E1). | ||
|
||
Example 5: | ||
The (E1) risk at the national level (E1) is assessed as ‘High’ due to the following: | ||
+ In other parts of Timor-Leste (C1) health workers have limited knowledge dog bite and scratch case management (C1) including PEP and RIG administration | ||
+ (C2) Insufficient stock of human rabies vaccines (C2) in the government health facilities. | ||
|
||
- Multiple causes, multiple effects (Type 4) - Chain of causalities | ||
The text may describe a chain of causality, where one effect becomes then the cause of another effect. To describe the chain, you should number the causes and effects. For example, cause 1 (C1) -> effect 1 (E1), but since effect 1 is also cause of effect 2, you should do cause 1 (C1) -> effect 1 (E1, C2) -> effect 2 (E2). | ||
|
||
Example 6: (E2) The risk of insufficient control capacities (E2) is considered high in Zambia due to (C1) concurrent public health emergencies in the country (cholera, measles, COVID-19) (C1) that limit the country’s human and (E1, C2) financial capacities to respond to the current anthrax outbreak adequately (E1, C2). | ||
|
||
Example 7: (C1) Surveillance systems specifically targeting endemic transmission of chikungunya or Zika are weak or non-existent (C1) -> (E1, C2) Misdiagnosis between diseases & Skewed surveillance (E1, C2) -> (E2, C3) Misinform policy decisions (E2, C3) -> (E3)reduced accuracy on the estimation of the true burden of each diseases (E3), poor risk assessments (E3), and non optimal clinical management and resource allocation (E3). | ||
|
||
Example 8: (C1) Changes in the predominant circulating serotype (C1) -> (E1, C2) increase the population risk of subsequent exposure to a heterologous DENV serotype (E1, C2), -> (E2) which increases the risk of higher rates of severe dengue and deaths (E2). | ||
|
||
4. {Negative cases} | ||
Some sentences contain causal relationships, but the effect may not be related to the disease transmission or emergence. Avoid classifying those causal relationships. | ||
|
||
Example 1 (no causality): Because these viruses continue to be detected in swine populations worldwide, further human cases following direct or indirect contact with infected swine can be expected. | ||
|
||
Example 2 (no relevant causality): There is some (E1) pressure on the healthcare capacity (E1) due to the (C1) very high number of admissions for dengue (C1); (C1) high vector density (C1); and an (C1) anticipated prolonged monsoon (C1). | ||
|
||
Example 3 (no relevant causality): (C1) MVD is a highly virulent disease (C1) that can cause (E1) haemorrhagic fever (E1) and is clinically similar to Ebola virus disease. | ||
|
||
5. {Mechanism of causality} | ||
When the text describes/list possible mechanisms behind the cause of transmission or emergence, tag them with (M). A mechanism of causality describes the specific interactions between the pathogen, host, and environment that causes the transmission / emergence. They often describe interactions at the physiological level. | ||
|
||
Example 1: The global outbreak 2022 — 2024 has shown that (C1) sexual contact (C1) enables faster and more efficient (E1) spread of the virus (E1) from one person to another due to (M1) direct contact of mucous membranes between people (M1), (M1) contact with multiple partners (M1), (M1) a possibly shorter incubation period on average (M1), and (M1) a longer infectious period for immunocompromised individuals (M1). | ||
|
||
6. {Sign of causality} | ||
|
||
For each cause-effect relationship, indicate whether each cause (C) is positive (C+) or negative (C-) and each effect (E) is positive (E+) or negative (E-). | ||
Use the list of positive and negative sign words provided to help determine the sign of each cause and effect. Be mindful of sentences with negations (e.g., “does not improve”), which reverses polarity. | ||
Positive sign words: increase, facilitate, support, improve, expand, promote, enable, enhance, accelerate, advance, grow, boost, strengthen, benefit, contribute, progress, initiate, develop, elevate, stimulate, alleviate, optimize, revitalize. | ||
Negative sign words: limit, decrease, reduce, hamper, hinder, restrict, suppress, impair, inhibit, undermine, challenge, disrupt, lack, insufficient, incomplete, challenge, deficit, obstacle, barrier, diminish, shortage, scarcity, obstruct, worsen, decline. | ||
|
||
Example 1: “(C1-) a lack of timely access to diagnostics in many areas (C1-), (C1-) incomplete epidemiological investigations (C1-), (C1-) challenges in contact tracing and extensive but inconclusive animal investigations (C1-) continue to hamper rapid response (E1-)” | ||
|
||
Example 2: Moreover, (C1-) a low index of suspicion (C1-), (C1) socio-cultural norms (C1), (C1) community resistance (C1), (C1-) limited community knowledge regarding anthrax transmission (C1-), (C1+) high levels of poverty (C1+) and (C1) food insecurity (C1), (C1-) a shortage of available vaccines and laboratory reagents (C1-), (C1-) inadequate carcass disposal (C1-) and (C1) decontamination practices (C1) significantly contribute to hampering (E1-) the containment of the anthrax outbreak (E1-). | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.