## Attach packages
knitr::opts_chunk$set(echo=TRUE)
library(knitr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(epiR)
## Loading required package: survival
## Package epiR 2.0.84 is loaded
## Type help(epi.about) for summary information
## Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses
## 
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(UpSetR)
# Import and arrange SAC data

# Import data
SAC <- read.csv("schisto_data.csv")

# Change sex to a factor variable
SAC$Sex <- as.factor(SAC$Sex)

# Code binary variables as yes/no categorical variables
SAC$Schistosomiasis <- ifelse(SAC$Schistosomiasis == 1, "Yes", "No")
SAC$Schistosomiasis <- factor(SAC$Schistosomiasis, levels = c("Yes", "No"))
SAC$IS <- ifelse(SAC$IS == 1, "Yes", "No")
SAC$IS <- factor(SAC$IS, levels = c("Yes", "No"))
SAC$IS_Sh <- ifelse(SAC$IS_Sh == 1, "Yes", "No")
SAC$IS_Sh <- factor(SAC$IS_Sh, levels = c("Yes", "No"))
SAC$IS_Sman <- ifelse(SAC$IS_Sman == 1, "Yes", "No")
SAC$IS_Sman <- factor(SAC$IS_Sman, levels = c("Yes", "No"))
SAC$IS_Smat <- ifelse(SAC$IS_Smat == 1, "Yes", "No")
SAC$IS_Smat <- factor(SAC$IS_Smat, levels = c("Yes", "No"))
SAC$UGS <- ifelse(SAC$UGS == 1, "Yes", "No")
SAC$UGS <- factor(SAC$UGS, levels = c("Yes", "No"))
SAC$UGS_Sh <- ifelse(SAC$UGS_Sh == 1, "Yes", "No")
SAC$UGS_Sh <- factor(SAC$UGS_Sh, levels = c("Yes", "No"))
SAC$UGS_Sman <- ifelse(SAC$UGS_Sman == 1, "Yes", "No")
SAC$UGS_Sman <- factor(SAC$UGS_Sman, levels = c("Yes", "No"))
SAC$UGS_Smat <- ifelse(SAC$UGS_Smat == 1, "Yes", "No")
SAC$UGS_Smat <- factor(SAC$UGS_Smat, levels = c("Yes", "No"))

# Code CCA categories
SAC$CCA <- ifelse(SAC$CCA == 0, "Negative",
                   ifelse(SAC$CCA == 1, "Trace", "Positive"))
SAC$CCA_trace_positive <- ifelse(SAC$CCA == "Negative", "Negative", "Positive")
SAC$CCA_trace_positive <- factor(SAC$CCA_trace_positive, levels = c("Positive", "Negative"))
SAC$CCA_trace_negative <- ifelse(SAC$CCA == "Positive", "Positive", "Negative")
SAC$CCA_trace_negative <- factor(SAC$CCA_trace_negative, levels = c("Positive", "Negative"))

# Code urogenital infection intensity, grouping negative and 1 - 9 eggs in the same cateory (< 10)
SAC$UGS_egg_count_category[SAC$UGS_egg_count_category %in% c(0, 1)] <- 0
SAC$UGS_egg_count_category <- factor(SAC$UGS_egg_count_category,
  levels = c(0, 2, 3),
  labels = c("< 10", "10 – 49", "≥ 50")
)

# Generate variables for infection site (none, urogenital, intestinal, mixed)
SAC$mixed_infection <- ifelse(SAC$UGS == "Yes" & SAC$IS == "No",  "UGS_only",
                        ifelse(SAC$UGS == "No"  & SAC$IS == "Yes", "IS_only",
                        ifelse(SAC$UGS == "No"  & SAC$IS == "No",  "no_infection",
                        ifelse(SAC$UGS == "Yes" & SAC$IS == "Yes", "mixed", NA))))
SAC$mixed_infection <- as.factor(SAC$mixed_infection)

# Generate variables to indicate species infection regardless of infection site
SAC$Sh <- ifelse(
  rowSums(SAC[, c("IS_Sh", "UGS_Sh")] == "Yes", na.rm = TRUE) > 0,
  "Yes",
  "No"
)
SAC$Sh <- factor(SAC$Sh, levels = c("Yes", "No"))
SAC$Sman <- ifelse(
  rowSums(SAC[, c("IS_Sman", "UGS_Sman")] == "Yes", na.rm = TRUE) > 0,
  "Yes",
  "No"
)
SAC$Sman <- factor(SAC$Sman, levels = c("Yes", "No"))
SAC$Smat <- ifelse(
  rowSums(SAC[, c("IS_Smat", "UGS_Smat")] == "Yes", na.rm = TRUE) > 0,
  "Yes",
  "No"
)
SAC$Smat <- factor(SAC$Smat, levels = c("Yes", "No"))

# Generate variable to indicate whether a species was identified in intestinal infection
SAC$IS_species_ID <- ifelse(
  rowSums(SAC[, c("IS_Sh", "IS_Sman", "IS_Smat")] == "Yes", na.rm = TRUE) > 0,
  "Yes",
  "No"
)
SAC$IS_species_ID <- factor(SAC$IS_species_ID, levels = c("Yes", "No"))

Summary data

Demographics

# Print summary statistics for age
summary(SAC$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00    8.00   10.00    9.98   12.00   15.00
# Print summary statistics for sex
summary(SAC$Sex)
## female   male 
##    123    124

Prevalence

# Generate summary statistics for Schistosomiasis

# Define the variables of interest
vars <- c("Schistosomiasis", "Sh", "Sman", "Smat", "IS", "IS_Sh", "IS_Sman",
          "IS_Smat", "UGS", "UGS_Sh", "UGS_Sman", "UGS_Smat")

# Initialize the summary dataframe
prevalence_summary <- data.frame(
  Variable = vars,
  Positive_Count = NA,
  Total_N = NA,
  Prevalence_Percent = NA,
  CI_Lower = NA,
  CI_Upper = NA
)

# Loop through variables and calculate values
for (i in seq_along(vars)) {
  var <- vars[i]
  x <- SAC[[var]]
  pos <- sum(x %in% c("Yes", "Positive"), na.rm = TRUE)
  total <- sum(!is.na(x))
  test <- prop.test(pos, total)

  prevalence_summary$Positive_Count[i] <- pos
  prevalence_summary$Total_N[i] <- total
  prevalence_summary$Prevalence_Percent[i] <- round(100 * pos / total, 1)
  prevalence_summary$CI_Lower[i] <- round(100 * test$conf.int[1], 1)
  prevalence_summary$CI_Upper[i] <- round(100 * test$conf.int[2], 1)
}

# Print the table
print(prevalence_summary)
##           Variable Positive_Count Total_N Prevalence_Percent CI_Lower CI_Upper
## 1  Schistosomiasis            154     247               62.3     56.0     68.4
## 2               Sh            106     247               42.9     36.7     49.4
## 3             Sman             34     247               13.8      9.8     18.8
## 4             Smat             30     247               12.1      8.5     17.0
## 5               IS            125     247               50.6     44.2     57.0
## 6            IS_Sh             46     247               18.6     14.1     24.2
## 7          IS_Sman             23     247                9.3      6.1     13.8
## 8          IS_Smat             18     247                7.3      4.5     11.5
## 9              UGS             86     247               34.8     29.0     41.2
## 10          UGS_Sh             86     247               34.8     29.0     41.2
## 11        UGS_Sman             15     247                6.1      3.6     10.0
## 12        UGS_Smat             17     247                6.9      4.2     11.0

Plot co-infection

# Make subset of data for an UpSet plot
SAC_upset <- SAC[, c("Sh", "Sman", "Smat")]

# Convert Yes/No to binary 1/0
SAC_upset$Sh    <- ifelse(SAC_upset$Sh == "Yes", 1, 0)
SAC_upset$Sman  <- ifelse(SAC_upset$Sman == "Yes", 1, 0)
SAC_upset$Smat  <- ifelse(SAC_upset$Smat == "Yes", 1, 0)

# Convert colnames for plotting
colnames(SAC_upset) <- c("S. haematobium", "S. mansoni", "S. mattheei")

# Open a JPEG device
jpeg("coinfection_plot.jpg", width = 6000, height = 3000, res = 800)  # Adjust size/resolution as needed

# Create the plot
upset_plot <- upset(SAC_upset,
      sets = c("S. haematobium", "S. mansoni", "S. mattheei"),
      order.by = "freq",
      keep.order = TRUE,
      main.bar.color = "#1B9E77",
      sets.bar.color = "#C51B7D",
      sets.x.label = "Individuals per species",
      mainbar.y.label = "Individuals per combination")

# Close the device
dev.off()
## png 
##   2
# Print the plot
print(upset_plot)

Infection location

# Print infection location(s)
summary(SAC$mixed_infection)
##      IS_only        mixed no_infection     UGS_only 
##           68           57           93           29

POC-CCA prevalence

# Print CCA prevalence if trace = positive
summary(SAC$CCA_trace_positive)
## Positive Negative 
##       37      210
# Print CCA prevalence if trace = negative
summary(SAC$CCA_trace_negative)
## Positive Negative 
##       17      230

Analysis of intestinal schistosomiasis diagnostics

Investigating infection intensity (Cq/Ct value) and species-level diagnosis

# Extract intestinal schistosomiasis cases
SAC_IS <- subset(SAC, IS == "Yes")

# Show the number of cases in which species could be identified
summary(SAC_IS$IS_species_ID)
## Yes  No 
##  66  59
# Plot Cq value vs species-specific detection
Ct_vs_species_plot <- ggplot(SAC_IS, aes(x = IS_species_ID, y = Faecal_schis_gen_Cq, fill = IS_species_ID)) +
  geom_boxplot() +
  labs(
    x = "Species-specific mtDNA detection",
    y = "Genus-specific ITS2 real-time PCR Ct value",
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "lightgreen"))

# Save the plot
ggsave("Ct_vs_species_plot.jpg", plot = Ct_vs_species_plot, width = 6, height = 4)

# Print the plot
print(Ct_vs_species_plot)

# Perform a Wilcoxon Rank-Sum Test (non-parametric) to compare Ct values between the two groups
wilcox.test(Faecal_schis_gen_Cq ~ IS_species_ID, data = SAC_IS)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Faecal_schis_gen_Cq by IS_species_ID
## W = 367, p-value = 5.657e-15
## alternative hypothesis: true location shift is not equal to 0
# Plot percentage of cases with species-level ID for each species

# Reshape data into long format
SAC_long <- SAC_IS %>%
  select(IS_Sh, IS_Sman, IS_Smat) %>%
  pivot_longer(cols = everything(), names_to = "Species", values_to = "Response")

# Calculate percentage 'Yes' for each species
percent_yes <- SAC_long %>%
  group_by(Species) %>%
  summarise(Percent_Yes = mean(Response == "Yes") * 100)

# Rename species for display
percent_yes$Species <- factor(percent_yes$Species,
                              levels = c("IS_Sh", "IS_Sman", "IS_Smat"),
                              labels = c("italic('S. haematobium')", 
                                         "italic('S. mansoni')", 
                                         "italic('S. mattheei')"))

# Custom colors
custom_colors <- c("lightblue", "lightgreen", "plum")

# Plot percentage of cases in which a species was identified
IS_species_plot <- ggplot(percent_yes, aes(x = Species, y = Percent_Yes, fill = Species)) +
  geom_col(width = 0.6, color = "black") +
  labs(
    x = NULL,
    y = "% of intestinal schistosomiasis cases"
  ) +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(limits = c(0, 100), expand = expansion(mult = c(0, 0.05))) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none") +
  scale_x_discrete(labels = function(x) parse(text = x))

# Save the plot
ggsave("IS_species_plot.jpg", plot = IS_species_plot, width = 5, height = 4)

# Print the plot
print(IS_species_plot)

Sensitivity and specificity of CCA

# Create 2x2 matrix for CCA (trace positive) vs intestinal schistosomiasis
tab_IS_CCA_trace_positive <- table(SAC$CCA_trace_positive, SAC$IS)

# Run diagnostic test evaluation
epi.tests(tab_IS_CCA_trace_positive)
##           Outcome +    Outcome -      Total
## Test +           25           12         37
## Test -          100          110        210
## Total           125          122        247
## 
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence *                  0.15 (0.11, 0.20)
## True prevalence *                      0.51 (0.44, 0.57)
## Sensitivity *                          0.20 (0.13, 0.28)
## Specificity *                          0.90 (0.83, 0.95)
## Positive predictive value *            0.68 (0.50, 0.82)
## Negative predictive value *            0.52 (0.45, 0.59)
## Positive likelihood ratio              2.03 (1.07, 3.86)
## Negative likelihood ratio              0.89 (0.80, 0.99)
## False T+ proportion for true D- *      0.10 (0.05, 0.17)
## False T- proportion for true D+ *      0.80 (0.72, 0.87)
## False T+ proportion for T+ *           0.32 (0.18, 0.50)
## False T- proportion for T- *           0.48 (0.41, 0.55)
## Correctly classified proportion *      0.55 (0.48, 0.61)
## --------------------------------------------------------------
## * Exact CIs
# Create 2x2 matrix for CCA (trace negative) vs intestinal schistosomiasis
tab_IS_CCA_trace_negative <- table(SAC$CCA_trace_negative, SAC$IS)

# Run diagnostic test evaluation
epi.tests(tab_IS_CCA_trace_negative)
##           Outcome +    Outcome -      Total
## Test +           11            6         17
## Test -          114          116        230
## Total           125          122        247
## 
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence *                  0.07 (0.04, 0.11)
## True prevalence *                      0.51 (0.44, 0.57)
## Sensitivity *                          0.09 (0.04, 0.15)
## Specificity *                          0.95 (0.90, 0.98)
## Positive predictive value *            0.65 (0.38, 0.86)
## Negative predictive value *            0.50 (0.44, 0.57)
## Positive likelihood ratio              1.79 (0.68, 4.69)
## Negative likelihood ratio              0.96 (0.90, 1.03)
## False T+ proportion for true D- *      0.05 (0.02, 0.10)
## False T- proportion for true D+ *      0.91 (0.85, 0.96)
## False T+ proportion for T+ *           0.35 (0.14, 0.62)
## False T- proportion for T- *           0.50 (0.43, 0.56)
## Correctly classified proportion *      0.51 (0.45, 0.58)
## --------------------------------------------------------------
## * Exact CIs
# Create 2x2 matrix for CCA (trace positive) vs S. mansoni intestinal infection
tab_IS_Sman_CCA_trace_positive <- table(SAC$CCA_trace_positive, SAC$IS_Sman)

# Run diagnostic test evaluation
epi.tests(tab_IS_Sman_CCA_trace_positive)
##           Outcome +    Outcome -      Total
## Test +            7           30         37
## Test -           16          194        210
## Total            23          224        247
## 
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence *                  0.15 (0.11, 0.20)
## True prevalence *                      0.09 (0.06, 0.14)
## Sensitivity *                          0.30 (0.13, 0.53)
## Specificity *                          0.87 (0.81, 0.91)
## Positive predictive value *            0.19 (0.08, 0.35)
## Negative predictive value *            0.92 (0.88, 0.96)
## Positive likelihood ratio              2.27 (1.13, 4.58)
## Negative likelihood ratio              0.80 (0.61, 1.06)
## False T+ proportion for true D- *      0.13 (0.09, 0.19)
## False T- proportion for true D+ *      0.70 (0.47, 0.87)
## False T+ proportion for T+ *           0.81 (0.65, 0.92)
## False T- proportion for T- *           0.08 (0.04, 0.12)
## Correctly classified proportion *      0.81 (0.76, 0.86)
## --------------------------------------------------------------
## * Exact CIs
# Create 2x2 matrix for CCA (trace negative) vs intestinal schistosomiasis
tab_IS_Sman_CCA_trace_negative <- table(SAC$CCA_trace_negative, SAC$IS_Sman)

# Run diagnostic test evaluation
epi.tests(tab_IS_Sman_CCA_trace_negative)
##           Outcome +    Outcome -      Total
## Test +            5           12         17
## Test -           18          212        230
## Total            23          224        247
## 
## Point estimates and 95% CIs:
## --------------------------------------------------------------
## Apparent prevalence *                  0.07 (0.04, 0.11)
## True prevalence *                      0.09 (0.06, 0.14)
## Sensitivity *                          0.22 (0.07, 0.44)
## Specificity *                          0.95 (0.91, 0.97)
## Positive predictive value *            0.29 (0.10, 0.56)
## Negative predictive value *            0.92 (0.88, 0.95)
## Positive likelihood ratio              4.06 (1.57, 10.50)
## Negative likelihood ratio              0.83 (0.67, 1.03)
## False T+ proportion for true D- *      0.05 (0.03, 0.09)
## False T- proportion for true D+ *      0.78 (0.56, 0.93)
## False T+ proportion for T+ *           0.71 (0.44, 0.90)
## False T- proportion for T- *           0.08 (0.05, 0.12)
## Correctly classified proportion *      0.88 (0.83, 0.92)
## --------------------------------------------------------------
## * Exact CIs

POC-CCA results in different groups

# S. mansoni (intestinal schistosomiasis with no atypical species) only
Sman_IS_only <- SAC %>%
  filter(
    UGS == "No",
    IS == "Yes",
    Sh == "No",
    Sman == "Yes",
    Smat == "No"
  )
Sman_IS_only$CCA <- as.factor(Sman_IS_only$CCA)
summary(Sman_IS_only$CCA)
## Negative Positive    Trace 
##        4        1        1
# S. haematobium only
Sh_UGS_only <- SAC %>%
  filter(
    UGS == "Yes",
    IS == "No",
    Sh == "Yes",
    Sman == "No",
    Smat == "No"
  )
Sh_UGS_only$CCA <- as.factor(Sh_UGS_only$CCA)
summary(Sh_UGS_only$CCA)
## Negative Positive 
##       22        3
# S. haematobium x S. mattheei, no S. mansoni
Sh_Smat_only <- SAC %>%
  filter(
    Sh == "Yes",
    Sman == "No",
    Smat == "Yes"
  )
Sh_Smat_only$CCA <- as.factor(Sh_Smat_only$CCA)
summary(Sh_Smat_only$CCA)
## Negative Positive 
##       10        3

S. mattheei and S. haematobium dynamics

# S. mattheei in urine and stool
table("UGS_Smat" = SAC$UGS_Smat, "IS_Smat" = SAC$IS_Smat)
##         IS_Smat
## UGS_Smat Yes  No
##      Yes   5  12
##      No   13 217
# Plot S. mattheei in urine and stool by S. haematobium infection status
Smat_urine_stool_by_Sh <- ggplot(SAC, aes(x = IS_Smat, y = UGS_Smat, color = Sh, shape = Sh)) +
  geom_jitter(position = position_jitter(seed = 1)) +
  labs(
    x = expression(italic("S. mattheei") ~ "intestinal infection"),
    y = expression(italic("S. mattheei") ~ "urogenital infection"),
    color = expression(italic("S. haematobium") ~ "infection"),
    shape = expression(italic("S. haematobium") ~ "infection"),
  ) +
  scale_color_manual(values = c("Yes" = "brown1", "No" = "#0072B2")) +
  theme_minimal()

# Save the plot
ggsave("Smat_urine_stool_by_Sh.jpg", plot = Smat_urine_stool_by_Sh, width = 6, height = 5.5)

# Print the plot
print(Smat_urine_stool_by_Sh)

# S. haematobium in urine and stool
table("UGS_Sh" = SAC$UGS_Sh, "IS_Sh" = SAC$IS_Sh)
##       IS_Sh
## UGS_Sh Yes  No
##    Yes  26  60
##    No   20 141
# Plot S. haematobium in urine and stool by S. mattheei infection status
Sh_urine_stool_by_Smat <- ggplot(SAC, aes(x = IS_Sh, y = UGS_Sh, color = Smat, shape = Smat)) +
  geom_jitter(position = position_jitter(seed = 1)) +
  labs(
    x = expression(italic("S. haematobium") ~ "intestinal infection"),
    y = expression(italic("S. haematobium") ~ "urogenital infection"),
    color = expression(italic("S. mattheei") ~ "infection"),
    shape = expression(italic("S. mattheei") ~ "infection"),
  ) +
  scale_color_manual(values = c("Yes" = "#C51B7D", "No" = "#1B9E77")) +
  theme_minimal()

# Save the plot
ggsave("Sh_urine_stool_by_Smat.jpg", plot = Sh_urine_stool_by_Smat, width = 6, height = 5.5)

# Print the plot
print(Sh_urine_stool_by_Smat)

Investigating high rates of S. haematobium DNA in stool, and associations with S. mattheei infection

# Extract S. haematobium cases
SAC_Sh <- subset(SAC, Sh == "Yes")

# Set reference categories for regression analysis
SAC_Sh$IS_Sh <- relevel(SAC_Sh$IS_Sh, ref = "No")
SAC_Sh$Smat <- relevel(SAC_Sh$Smat, ref = "No")
SAC_Sh$Sman <- relevel(SAC_Sh$Sman, ref = "No")
SAC_Sh$UGS_egg_count_category <- relevel(SAC_Sh$UGS_egg_count_category, ref = "< 10")

# Run logistic regression
Sh_stool_model <- glm(IS_Sh ~ Smat + Sman + Age + Sex + UGS_egg_count_category, data = SAC_Sh, family = binomial)

# Summary of the model
summary(Sh_stool_model)
## 
## Call:
## glm(formula = IS_Sh ~ Smat + Sman + Age + Sex + UGS_egg_count_category, 
##     family = binomial, data = SAC_Sh)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                    0.89365    0.82581   1.082  0.27919   
## SmatYes                        1.55214    0.56999   2.723  0.00647 **
## SmanYes                        0.58436    0.59085   0.989  0.32266   
## Age                           -0.19223    0.08477  -2.268  0.02334 * 
## Sexmale                        0.87254    0.43838   1.990  0.04655 * 
## UGS_egg_count_category10 – 49 -0.64173    0.56970  -1.126  0.25998   
## UGS_egg_count_category≥ 50    -0.13724    0.68142  -0.201  0.84039   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.09  on 105  degrees of freedom
## Residual deviance: 126.69  on  99  degrees of freedom
## AIC: 140.69
## 
## Number of Fisher Scoring iterations: 4
# Exponentiate to get odds ratio and confidence interval
exp(cbind(Odds_Ratio = coef(Sh_stool_model), confint(Sh_stool_model)))
## Waiting for profiling to be done...
##                               Odds_Ratio     2.5 %    97.5 %
## (Intercept)                    2.4440227 0.4868934 12.676936
## SmatYes                        4.7215484 1.6069277 15.370300
## SmanYes                        1.7938477 0.5616399  5.846801
## Age                            0.8251159 0.6941399  0.969985
## Sexmale                        2.3929897 1.0250365  5.770709
## UGS_egg_count_category10 – 49  0.5263787 0.1643077  1.565578
## UGS_egg_count_category≥ 50     0.8717619 0.2191079  3.282871

Model diagnostics

# Check for multicollinearity
car::vif(Sh_stool_model)
##                            GVIF Df GVIF^(1/(2*Df))
## Smat                   1.148935  1        1.071884
## Sman                   1.434776  1        1.197821
## Age                    1.202008  1        1.096361
## Sex                    1.040747  1        1.020170
## UGS_egg_count_category 1.463997  2        1.099981
# Check diagnostic plots
plot(Sh_stool_model)

Urogential schistosomiasis infection intensity among patients with confirmed S. haematobium infection

# Print breakdown of infection intensity categories
table(SAC_Sh$UGS_egg_count_category)
## 
##    < 10 10 – 49    ≥ 50 
##      61      26      19

Follow-up case-studies in Patient X and Patient Y

# Import miracidia data
miracidia <- read.csv("schisto_miracidia.csv")

# Divide into baseline and follow-up
miracidia_baseline <- miracidia[miracidia$Visit == "Baseline", ]
miracidia_fu <- miracidia[miracidia$Visit == "Follow-up", ]

# Extract individual participant data
X <- miracidia[miracidia$Participant == "Patient X", ]
Y <- miracidia[miracidia$Participant == "Patient Y", ]

Species abundance plots

# Create species relative abundance plots for Patient X

# Handle the data
species_abundance_X <- X %>%
  group_by(Species, Sample, Visit) %>%
  summarise(Abundance = n(), .groups = 'drop') %>%
  group_by(Sample, Visit) %>%
  mutate(Relative_Abundance = Abundance / sum(Abundance))

# Generate the plot
abundance_plot_X <- ggplot(data = species_abundance_X, aes(x = Sample, y = Relative_Abundance, fill = Species)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(x = "Sample Type", y = "Relative Abundance") +
  facet_wrap(~ Visit) +
  theme_bw() +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = c("#A6CEE3", "#1F78B4", "#FB9A99")) +
  theme(legend.text = element_text(face = "italic"))

# Save the plot
ggsave("abundance_plot_X.png", abundance_plot_X, width = 4.5, height = 4)

# Print the plot
print(abundance_plot_X)

# Create species relative abundance plots for Patient Y

# Handle the data
species_abundance_Y <- Y %>%
  group_by(Species, Sample, Visit) %>%
  summarise(Abundance = n(), .groups = 'drop') %>%
  group_by(Sample, Visit) %>%
  mutate(Relative_Abundance = Abundance / sum(Abundance))

# Generate the plot
abundance_plot_Y <- ggplot(data = species_abundance_Y, aes(x = Sample, y = Relative_Abundance, fill = Species)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(x = "Sample Type", y = "Relative Abundance") +
  facet_wrap(~ Visit) +
  theme_bw() +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99")) +
  theme(legend.text = element_text(face = "italic"))

# Save the plot
ggsave("abundance_plot_Y.png", abundance_plot_Y, width = 4.5, height = 4)

# Print the plot
print(abundance_plot_Y)

Mitochondrial DNA inheritance in S. haematobium x S. mattheei hybrids (stool and urine)

# Extract Sh x Smat miracidia
miracidia_ShxSmat <- miracidia[miracidia$Species == "S. haematobium x S. mattheei", ]

# Divide into baseline and follow-up
miracidia_ShxSmat_baseline <- miracidia_ShxSmat[miracidia_ShxSmat$Visit == "Baseline", ]
miracidia_ShxSmat_fu <- miracidia_ShxSmat[miracidia_ShxSmat$Visit == "Follow-up", ]
# Create mitochondrial DNA inheritance plots S. haematobium x S. mattheei miracidia

# Handle the data
ShxSmat_plot_data <- miracidia_ShxSmat %>%
  group_by(Sample, Mito_DNA, Visit) %>%
  summarise(Abundance = n(), .groups = 'drop') %>%
  group_by(Mito_DNA, Visit)

# Reduce length of species names for plotting
ShxSmat_plot_data <- ShxSmat_plot_data %>%
  mutate(Mito_DNA = ifelse(Mito_DNA == "S. haematobium", "S. haem.", Mito_DNA)) %>%
  mutate(Mito_DNA = ifelse(Mito_DNA == "S. mattheei", "S. mat.", Mito_DNA))

# Generate the plot
mtDNA_plot <- ggplot(data = ShxSmat_plot_data, aes(x = Mito_DNA, y = Abundance, fill = Sample)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(x = "Species (mtDNA)", y = "Count") +
  facet_wrap(~ Visit) +
  theme_bw() +
  scale_fill_manual(values = c("#1B9E77", "brown1")) +
  theme(axis.text.x = element_text(face = "italic"))

# Save the plot
ggsave("mtDNA_plot.png", mtDNA_plot, width = 4, height = 4)

# Print the plot
print(mtDNA_plot)