## Attach packages
knitr::opts_chunk$set(echo=TRUE)
library(knitr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(epiR)
## Loading required package: survival
## Package epiR 2.0.84 is loaded
## Type help(epi.about) for summary information
## Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses
##
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(UpSetR)
# Import and arrange SAC data
# Import data
SAC <- read.csv("schisto_data.csv")
# Change sex to a factor variable
SAC$Sex <- as.factor(SAC$Sex)
# Code binary variables as yes/no categorical variables
SAC$Schistosomiasis <- ifelse(SAC$Schistosomiasis == 1, "Yes", "No")
SAC$Schistosomiasis <- factor(SAC$Schistosomiasis, levels = c("Yes", "No"))
SAC$IS <- ifelse(SAC$IS == 1, "Yes", "No")
SAC$IS <- factor(SAC$IS, levels = c("Yes", "No"))
SAC$IS_Sh <- ifelse(SAC$IS_Sh == 1, "Yes", "No")
SAC$IS_Sh <- factor(SAC$IS_Sh, levels = c("Yes", "No"))
SAC$IS_Sman <- ifelse(SAC$IS_Sman == 1, "Yes", "No")
SAC$IS_Sman <- factor(SAC$IS_Sman, levels = c("Yes", "No"))
SAC$IS_Smat <- ifelse(SAC$IS_Smat == 1, "Yes", "No")
SAC$IS_Smat <- factor(SAC$IS_Smat, levels = c("Yes", "No"))
SAC$UGS <- ifelse(SAC$UGS == 1, "Yes", "No")
SAC$UGS <- factor(SAC$UGS, levels = c("Yes", "No"))
SAC$UGS_Sh <- ifelse(SAC$UGS_Sh == 1, "Yes", "No")
SAC$UGS_Sh <- factor(SAC$UGS_Sh, levels = c("Yes", "No"))
SAC$UGS_Sman <- ifelse(SAC$UGS_Sman == 1, "Yes", "No")
SAC$UGS_Sman <- factor(SAC$UGS_Sman, levels = c("Yes", "No"))
SAC$UGS_Smat <- ifelse(SAC$UGS_Smat == 1, "Yes", "No")
SAC$UGS_Smat <- factor(SAC$UGS_Smat, levels = c("Yes", "No"))
# Code CCA categories
SAC$CCA <- ifelse(SAC$CCA == 0, "Negative",
ifelse(SAC$CCA == 1, "Trace", "Positive"))
SAC$CCA_trace_positive <- ifelse(SAC$CCA == "Negative", "Negative", "Positive")
SAC$CCA_trace_positive <- factor(SAC$CCA_trace_positive, levels = c("Positive", "Negative"))
SAC$CCA_trace_negative <- ifelse(SAC$CCA == "Positive", "Positive", "Negative")
SAC$CCA_trace_negative <- factor(SAC$CCA_trace_negative, levels = c("Positive", "Negative"))
# Code urogenital infection intensity, grouping negative and 1 - 9 eggs in the same cateory (< 10)
SAC$UGS_egg_count_category[SAC$UGS_egg_count_category %in% c(0, 1)] <- 0
SAC$UGS_egg_count_category <- factor(SAC$UGS_egg_count_category,
levels = c(0, 2, 3),
labels = c("< 10", "10 – 49", "≥ 50")
)
# Generate variables for infection site (none, urogenital, intestinal, mixed)
SAC$mixed_infection <- ifelse(SAC$UGS == "Yes" & SAC$IS == "No", "UGS_only",
ifelse(SAC$UGS == "No" & SAC$IS == "Yes", "IS_only",
ifelse(SAC$UGS == "No" & SAC$IS == "No", "no_infection",
ifelse(SAC$UGS == "Yes" & SAC$IS == "Yes", "mixed", NA))))
SAC$mixed_infection <- as.factor(SAC$mixed_infection)
# Generate variables to indicate species infection regardless of infection site
SAC$Sh <- ifelse(
rowSums(SAC[, c("IS_Sh", "UGS_Sh")] == "Yes", na.rm = TRUE) > 0,
"Yes",
"No"
)
SAC$Sh <- factor(SAC$Sh, levels = c("Yes", "No"))
SAC$Sman <- ifelse(
rowSums(SAC[, c("IS_Sman", "UGS_Sman")] == "Yes", na.rm = TRUE) > 0,
"Yes",
"No"
)
SAC$Sman <- factor(SAC$Sman, levels = c("Yes", "No"))
SAC$Smat <- ifelse(
rowSums(SAC[, c("IS_Smat", "UGS_Smat")] == "Yes", na.rm = TRUE) > 0,
"Yes",
"No"
)
SAC$Smat <- factor(SAC$Smat, levels = c("Yes", "No"))
# Generate variable to indicate whether a species was identified in intestinal infection
SAC$IS_species_ID <- ifelse(
rowSums(SAC[, c("IS_Sh", "IS_Sman", "IS_Smat")] == "Yes", na.rm = TRUE) > 0,
"Yes",
"No"
)
SAC$IS_species_ID <- factor(SAC$IS_species_ID, levels = c("Yes", "No"))
Summary data
Demographics
# Print summary statistics for age
summary(SAC$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.00 8.00 10.00 9.98 12.00 15.00
# Print summary statistics for sex
summary(SAC$Sex)
## female male
## 123 124
Prevalence
# Generate summary statistics for Schistosomiasis
# Define the variables of interest
vars <- c("Schistosomiasis", "Sh", "Sman", "Smat", "IS", "IS_Sh", "IS_Sman",
"IS_Smat", "UGS", "UGS_Sh", "UGS_Sman", "UGS_Smat")
# Initialize the summary dataframe
prevalence_summary <- data.frame(
Variable = vars,
Positive_Count = NA,
Total_N = NA,
Prevalence_Percent = NA,
CI_Lower = NA,
CI_Upper = NA
)
# Loop through variables and calculate values
for (i in seq_along(vars)) {
var <- vars[i]
x <- SAC[[var]]
pos <- sum(x %in% c("Yes", "Positive"), na.rm = TRUE)
total <- sum(!is.na(x))
test <- prop.test(pos, total)
prevalence_summary$Positive_Count[i] <- pos
prevalence_summary$Total_N[i] <- total
prevalence_summary$Prevalence_Percent[i] <- round(100 * pos / total, 1)
prevalence_summary$CI_Lower[i] <- round(100 * test$conf.int[1], 1)
prevalence_summary$CI_Upper[i] <- round(100 * test$conf.int[2], 1)
}
# Print the table
print(prevalence_summary)
## Variable Positive_Count Total_N Prevalence_Percent CI_Lower CI_Upper
## 1 Schistosomiasis 154 247 62.3 56.0 68.4
## 2 Sh 106 247 42.9 36.7 49.4
## 3 Sman 34 247 13.8 9.8 18.8
## 4 Smat 30 247 12.1 8.5 17.0
## 5 IS 125 247 50.6 44.2 57.0
## 6 IS_Sh 46 247 18.6 14.1 24.2
## 7 IS_Sman 23 247 9.3 6.1 13.8
## 8 IS_Smat 18 247 7.3 4.5 11.5
## 9 UGS 86 247 34.8 29.0 41.2
## 10 UGS_Sh 86 247 34.8 29.0 41.2
## 11 UGS_Sman 15 247 6.1 3.6 10.0
## 12 UGS_Smat 17 247 6.9 4.2 11.0
Plot co-infection
# Make subset of data for an UpSet plot
SAC_upset <- SAC[, c("Sh", "Sman", "Smat")]
# Convert Yes/No to binary 1/0
SAC_upset$Sh <- ifelse(SAC_upset$Sh == "Yes", 1, 0)
SAC_upset$Sman <- ifelse(SAC_upset$Sman == "Yes", 1, 0)
SAC_upset$Smat <- ifelse(SAC_upset$Smat == "Yes", 1, 0)
# Convert colnames for plotting
colnames(SAC_upset) <- c("S. haematobium", "S. mansoni", "S. mattheei")
# Open a JPEG device
jpeg("coinfection_plot.jpg", width = 6000, height = 3000, res = 800) # Adjust size/resolution as needed
# Create the plot
upset_plot <- upset(SAC_upset,
sets = c("S. haematobium", "S. mansoni", "S. mattheei"),
order.by = "freq",
keep.order = TRUE,
main.bar.color = "#1B9E77",
sets.bar.color = "#C51B7D",
sets.x.label = "Individuals per species",
mainbar.y.label = "Individuals per combination")
# Close the device
dev.off()
## png
## 2
# Print the plot
print(upset_plot)

Infection location
# Print infection location(s)
summary(SAC$mixed_infection)
## IS_only mixed no_infection UGS_only
## 68 57 93 29
POC-CCA prevalence
# Print CCA prevalence if trace = positive
summary(SAC$CCA_trace_positive)
## Positive Negative
## 37 210
# Print CCA prevalence if trace = negative
summary(SAC$CCA_trace_negative)
## Positive Negative
## 17 230
Analysis of intestinal schistosomiasis diagnostics
Investigating infection intensity (Cq/Ct value) and species-level
diagnosis
# Extract intestinal schistosomiasis cases
SAC_IS <- subset(SAC, IS == "Yes")
# Show the number of cases in which species could be identified
summary(SAC_IS$IS_species_ID)
## Yes No
## 66 59
# Plot Cq value vs species-specific detection
Ct_vs_species_plot <- ggplot(SAC_IS, aes(x = IS_species_ID, y = Faecal_schis_gen_Cq, fill = IS_species_ID)) +
geom_boxplot() +
labs(
x = "Species-specific mtDNA detection",
y = "Genus-specific ITS2 real-time PCR Ct value",
) +
theme_minimal() +
scale_fill_manual(values = c("No" = "lightblue", "Yes" = "lightgreen"))
# Save the plot
ggsave("Ct_vs_species_plot.jpg", plot = Ct_vs_species_plot, width = 6, height = 4)
# Print the plot
print(Ct_vs_species_plot)

# Perform a Wilcoxon Rank-Sum Test (non-parametric) to compare Ct values between the two groups
wilcox.test(Faecal_schis_gen_Cq ~ IS_species_ID, data = SAC_IS)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Faecal_schis_gen_Cq by IS_species_ID
## W = 367, p-value = 5.657e-15
## alternative hypothesis: true location shift is not equal to 0
# Plot percentage of cases with species-level ID for each species
# Reshape data into long format
SAC_long <- SAC_IS %>%
select(IS_Sh, IS_Sman, IS_Smat) %>%
pivot_longer(cols = everything(), names_to = "Species", values_to = "Response")
# Calculate percentage 'Yes' for each species
percent_yes <- SAC_long %>%
group_by(Species) %>%
summarise(Percent_Yes = mean(Response == "Yes") * 100)
# Rename species for display
percent_yes$Species <- factor(percent_yes$Species,
levels = c("IS_Sh", "IS_Sman", "IS_Smat"),
labels = c("italic('S. haematobium')",
"italic('S. mansoni')",
"italic('S. mattheei')"))
# Custom colors
custom_colors <- c("lightblue", "lightgreen", "plum")
# Plot percentage of cases in which a species was identified
IS_species_plot <- ggplot(percent_yes, aes(x = Species, y = Percent_Yes, fill = Species)) +
geom_col(width = 0.6, color = "black") +
labs(
x = NULL,
y = "% of intestinal schistosomiasis cases"
) +
scale_fill_manual(values = custom_colors) +
scale_y_continuous(limits = c(0, 100), expand = expansion(mult = c(0, 0.05))) +
theme_minimal(base_size = 13) +
theme(legend.position = "none") +
scale_x_discrete(labels = function(x) parse(text = x))
# Save the plot
ggsave("IS_species_plot.jpg", plot = IS_species_plot, width = 5, height = 4)
# Print the plot
print(IS_species_plot)

Sensitivity and specificity of CCA
# Create 2x2 matrix for CCA (trace positive) vs intestinal schistosomiasis
tab_IS_CCA_trace_positive <- table(SAC$CCA_trace_positive, SAC$IS)
# Run diagnostic test evaluation
epi.tests(tab_IS_CCA_trace_positive)
## Outcome + Outcome - Total
## Test + 25 12 37
## Test - 100 110 210
## Total 125 122 247
##
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence * 0.15 (0.11, 0.20)
## True prevalence * 0.51 (0.44, 0.57)
## Sensitivity * 0.20 (0.13, 0.28)
## Specificity * 0.90 (0.83, 0.95)
## Positive predictive value * 0.68 (0.50, 0.82)
## Negative predictive value * 0.52 (0.45, 0.59)
## Positive likelihood ratio 2.03 (1.07, 3.86)
## Negative likelihood ratio 0.89 (0.80, 0.99)
## False T+ proportion for true D- * 0.10 (0.05, 0.17)
## False T- proportion for true D+ * 0.80 (0.72, 0.87)
## False T+ proportion for T+ * 0.32 (0.18, 0.50)
## False T- proportion for T- * 0.48 (0.41, 0.55)
## Correctly classified proportion * 0.55 (0.48, 0.61)
## --------------------------------------------------------------
## * Exact CIs
# Create 2x2 matrix for CCA (trace negative) vs intestinal schistosomiasis
tab_IS_CCA_trace_negative <- table(SAC$CCA_trace_negative, SAC$IS)
# Run diagnostic test evaluation
epi.tests(tab_IS_CCA_trace_negative)
## Outcome + Outcome - Total
## Test + 11 6 17
## Test - 114 116 230
## Total 125 122 247
##
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence * 0.07 (0.04, 0.11)
## True prevalence * 0.51 (0.44, 0.57)
## Sensitivity * 0.09 (0.04, 0.15)
## Specificity * 0.95 (0.90, 0.98)
## Positive predictive value * 0.65 (0.38, 0.86)
## Negative predictive value * 0.50 (0.44, 0.57)
## Positive likelihood ratio 1.79 (0.68, 4.69)
## Negative likelihood ratio 0.96 (0.90, 1.03)
## False T+ proportion for true D- * 0.05 (0.02, 0.10)
## False T- proportion for true D+ * 0.91 (0.85, 0.96)
## False T+ proportion for T+ * 0.35 (0.14, 0.62)
## False T- proportion for T- * 0.50 (0.43, 0.56)
## Correctly classified proportion * 0.51 (0.45, 0.58)
## --------------------------------------------------------------
## * Exact CIs
# Create 2x2 matrix for CCA (trace positive) vs S. mansoni intestinal infection
tab_IS_Sman_CCA_trace_positive <- table(SAC$CCA_trace_positive, SAC$IS_Sman)
# Run diagnostic test evaluation
epi.tests(tab_IS_Sman_CCA_trace_positive)
## Outcome + Outcome - Total
## Test + 7 30 37
## Test - 16 194 210
## Total 23 224 247
##
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence * 0.15 (0.11, 0.20)
## True prevalence * 0.09 (0.06, 0.14)
## Sensitivity * 0.30 (0.13, 0.53)
## Specificity * 0.87 (0.81, 0.91)
## Positive predictive value * 0.19 (0.08, 0.35)
## Negative predictive value * 0.92 (0.88, 0.96)
## Positive likelihood ratio 2.27 (1.13, 4.58)
## Negative likelihood ratio 0.80 (0.61, 1.06)
## False T+ proportion for true D- * 0.13 (0.09, 0.19)
## False T- proportion for true D+ * 0.70 (0.47, 0.87)
## False T+ proportion for T+ * 0.81 (0.65, 0.92)
## False T- proportion for T- * 0.08 (0.04, 0.12)
## Correctly classified proportion * 0.81 (0.76, 0.86)
## --------------------------------------------------------------
## * Exact CIs
# Create 2x2 matrix for CCA (trace negative) vs intestinal schistosomiasis
tab_IS_Sman_CCA_trace_negative <- table(SAC$CCA_trace_negative, SAC$IS_Sman)
# Run diagnostic test evaluation
epi.tests(tab_IS_Sman_CCA_trace_negative)
## Outcome + Outcome - Total
## Test + 5 12 17
## Test - 18 212 230
## Total 23 224 247
##
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence * 0.07 (0.04, 0.11)
## True prevalence * 0.09 (0.06, 0.14)
## Sensitivity * 0.22 (0.07, 0.44)
## Specificity * 0.95 (0.91, 0.97)
## Positive predictive value * 0.29 (0.10, 0.56)
## Negative predictive value * 0.92 (0.88, 0.95)
## Positive likelihood ratio 4.06 (1.57, 10.50)
## Negative likelihood ratio 0.83 (0.67, 1.03)
## False T+ proportion for true D- * 0.05 (0.03, 0.09)
## False T- proportion for true D+ * 0.78 (0.56, 0.93)
## False T+ proportion for T+ * 0.71 (0.44, 0.90)
## False T- proportion for T- * 0.08 (0.05, 0.12)
## Correctly classified proportion * 0.88 (0.83, 0.92)
## --------------------------------------------------------------
## * Exact CIs
POC-CCA results in different groups
# S. mansoni (intestinal schistosomiasis with no atypical species) only
Sman_IS_only <- SAC %>%
filter(
UGS == "No",
IS == "Yes",
Sh == "No",
Sman == "Yes",
Smat == "No"
)
Sman_IS_only$CCA <- as.factor(Sman_IS_only$CCA)
summary(Sman_IS_only$CCA)
## Negative Positive Trace
## 4 1 1
# S. haematobium only
Sh_UGS_only <- SAC %>%
filter(
UGS == "Yes",
IS == "No",
Sh == "Yes",
Sman == "No",
Smat == "No"
)
Sh_UGS_only$CCA <- as.factor(Sh_UGS_only$CCA)
summary(Sh_UGS_only$CCA)
## Negative Positive
## 22 3
# S. haematobium x S. mattheei, no S. mansoni
Sh_Smat_only <- SAC %>%
filter(
Sh == "Yes",
Sman == "No",
Smat == "Yes"
)
Sh_Smat_only$CCA <- as.factor(Sh_Smat_only$CCA)
summary(Sh_Smat_only$CCA)
## Negative Positive
## 10 3
S. mattheei and S. haematobium dynamics
# S. mattheei in urine and stool
table("UGS_Smat" = SAC$UGS_Smat, "IS_Smat" = SAC$IS_Smat)
## IS_Smat
## UGS_Smat Yes No
## Yes 5 12
## No 13 217
# Plot S. mattheei in urine and stool by S. haematobium infection status
Smat_urine_stool_by_Sh <- ggplot(SAC, aes(x = IS_Smat, y = UGS_Smat, color = Sh, shape = Sh)) +
geom_jitter(position = position_jitter(seed = 1)) +
labs(
x = expression(italic("S. mattheei") ~ "intestinal infection"),
y = expression(italic("S. mattheei") ~ "urogenital infection"),
color = expression(italic("S. haematobium") ~ "infection"),
shape = expression(italic("S. haematobium") ~ "infection"),
) +
scale_color_manual(values = c("Yes" = "brown1", "No" = "#0072B2")) +
theme_minimal()
# Save the plot
ggsave("Smat_urine_stool_by_Sh.jpg", plot = Smat_urine_stool_by_Sh, width = 6, height = 5.5)
# Print the plot
print(Smat_urine_stool_by_Sh)

# S. haematobium in urine and stool
table("UGS_Sh" = SAC$UGS_Sh, "IS_Sh" = SAC$IS_Sh)
## IS_Sh
## UGS_Sh Yes No
## Yes 26 60
## No 20 141
# Plot S. haematobium in urine and stool by S. mattheei infection status
Sh_urine_stool_by_Smat <- ggplot(SAC, aes(x = IS_Sh, y = UGS_Sh, color = Smat, shape = Smat)) +
geom_jitter(position = position_jitter(seed = 1)) +
labs(
x = expression(italic("S. haematobium") ~ "intestinal infection"),
y = expression(italic("S. haematobium") ~ "urogenital infection"),
color = expression(italic("S. mattheei") ~ "infection"),
shape = expression(italic("S. mattheei") ~ "infection"),
) +
scale_color_manual(values = c("Yes" = "#C51B7D", "No" = "#1B9E77")) +
theme_minimal()
# Save the plot
ggsave("Sh_urine_stool_by_Smat.jpg", plot = Sh_urine_stool_by_Smat, width = 6, height = 5.5)
# Print the plot
print(Sh_urine_stool_by_Smat)

Urogential schistosomiasis infection intensity among patients with
confirmed S. haematobium infection
# Print breakdown of infection intensity categories
table(SAC_Sh$UGS_egg_count_category)
##
## < 10 10 – 49 ≥ 50
## 61 26 19
Follow-up case-studies in Patient X and Patient Y
# Import miracidia data
miracidia <- read.csv("schisto_miracidia.csv")
# Divide into baseline and follow-up
miracidia_baseline <- miracidia[miracidia$Visit == "Baseline", ]
miracidia_fu <- miracidia[miracidia$Visit == "Follow-up", ]
# Extract individual participant data
X <- miracidia[miracidia$Participant == "Patient X", ]
Y <- miracidia[miracidia$Participant == "Patient Y", ]
Species abundance plots
# Create species relative abundance plots for Patient X
# Handle the data
species_abundance_X <- X %>%
group_by(Species, Sample, Visit) %>%
summarise(Abundance = n(), .groups = 'drop') %>%
group_by(Sample, Visit) %>%
mutate(Relative_Abundance = Abundance / sum(Abundance))
# Generate the plot
abundance_plot_X <- ggplot(data = species_abundance_X, aes(x = Sample, y = Relative_Abundance, fill = Species)) +
geom_bar(stat = "identity", position = "stack") +
labs(x = "Sample Type", y = "Relative Abundance") +
facet_wrap(~ Visit) +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4", "#FB9A99")) +
theme(legend.text = element_text(face = "italic"))
# Save the plot
ggsave("abundance_plot_X.png", abundance_plot_X, width = 4.5, height = 4)
# Print the plot
print(abundance_plot_X)

# Create species relative abundance plots for Patient Y
# Handle the data
species_abundance_Y <- Y %>%
group_by(Species, Sample, Visit) %>%
summarise(Abundance = n(), .groups = 'drop') %>%
group_by(Sample, Visit) %>%
mutate(Relative_Abundance = Abundance / sum(Abundance))
# Generate the plot
abundance_plot_Y <- ggplot(data = species_abundance_Y, aes(x = Sample, y = Relative_Abundance, fill = Species)) +
geom_bar(stat = "identity", position = "stack") +
labs(x = "Sample Type", y = "Relative Abundance") +
facet_wrap(~ Visit) +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99")) +
theme(legend.text = element_text(face = "italic"))
# Save the plot
ggsave("abundance_plot_Y.png", abundance_plot_Y, width = 4.5, height = 4)
# Print the plot
print(abundance_plot_Y)
