Urinalysis Publication Script

Author

Raphaella Jackson

Modified

December 10, 2024

Setup

Load Packages
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cowplot)

Attaching package: 'cowplot'

The following object is masked from 'package:lubridate':

    stamp
library(reshape2)

Attaching package: 'reshape2'

The following object is masked from 'package:tidyr':

    smiths
library(readxl)
library(ggsankey)
library(ggrepel)
library(geepack)
library(RColorBrewer)
library(kableExtra)

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
library(survival)
Load Data
#Participant Metadata
Participant_Metadata <- read.csv("../Data/3_Clean_Publication_Data/2024_11_20.ForPublication.ParticipantMeta.Anon.csv",
                       sep = ",")

#Urine Sample Metadata
Urinalysis <- read.csv("../Data/3_Clean_Publication_Data/2024_11_20.ForPublication.Urinalysis.Anon.csv",
                       sep = ",")
Urinalysis$Sample.date <- as.Date(Urinalysis$Sample.date, tryFormats = c("%d/%m/%Y"))

#Number Urine Samples in Order Collected
Urinalysis <- Urinalysis[order(Urinalysis$Sample.date),]
Urinalysis <- Urinalysis %>% group_by(Participant.ID) %>% mutate(Sample_Number = row_number()) %>% ungroup()

#Urine Sample Metadata (Culture Results)
Urinalysis_Culture <- read.csv("../Data/3_Clean_Publication_Data/2024_11_20.ForPublication.CultureResults.Anon.csv",
                       sep = ",")
Urinalysis_Culture$Sample.date <- as.Date(Urinalysis_Culture$Sample.date, tryFormats = c("%d/%m/%Y"))

#Log of Infection Events for Participants
Event_Log <- read.csv("../Data/3_Clean_Publication_Data/2024_11_20.ForPublication.EventLog.Anon.csv",
                       sep = ",")
Event_Log$Event.Date <- as.Date(Event_Log$Event.Date, tryFormats = c("%d/%m/%Y"))

Section 2.1: Overview of Cohort and UTI Frequency

Demographic and Study Statistics

Participants and Samples

#Number of Samples
Number_of_Samples <- nrow(Urinalysis)

#Number of Participants
Number_of_Participants <- length(unique(Urinalysis$Participant.ID))
Total Samples

637

Total Participants

91

Living Situation

#Living Situation
LivingSituation <- Participant_Metadata %>% count(Living.Situation)
colnames(LivingSituation) <- c("Living Situation","Number of Participants")
LivingSituation$`Percent of Total` <- round(LivingSituation$`Number of Participants`/sum(LivingSituation$`Number of Participants`),digits=2)*100
LivingSituation %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Living Situation Number of Participants Percent of Total
Lives Alone 29 32
Lives with Carer 51 56
Lives with Family 7 8
Not Reported 4 4

Sex

Sex <- Participant_Metadata %>% count(Sex)
colnames(Sex) <- c("Sex","Number of Participants")
Sex$`Percent of Total` <- round(Sex$`Number of Participants`/sum(Sex$`Number of Participants`),digits=2)*100
Sex %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Sex Number of Participants Percent of Total
Female 40 44
Male 51 56

Cognitive Diagnosis Groups

Diagnosis_Group <- Participant_Metadata %>% count(Diagnosis.Group)
colnames(Diagnosis_Group) <- c("Diagnosis Group","Number of Participants")
Diagnosis_Group$`Percent of Total` <- round(Diagnosis_Group$`Number of Participants`/sum(Diagnosis_Group$`Number of Participants`),digits=2)*100
Diagnosis_Group %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Diagnosis Group Number of Participants Percent of Total
Alzheimer's Disease 59 65
Dementia in Parkinsons 6 7
Frontotemporal Dementia 7 8
Lewy Body Dementia 2 2
Multi-Type 14 15
Vascular Dementia 3 3

Age

Tmp <- summary(Participant_Metadata$age_at_enrollment)
Age_Mean <- round(as.numeric(Tmp["Mean"]),digits = 0)
Age_LQuant <- round(as.numeric(Tmp["1st Qu."]),digits = 0)
Age_UQuant <- round(as.numeric(Tmp["3rd Qu."]),digits = 0)

ggplot(Participant_Metadata,aes(x=age_at_enrollment))+
  geom_histogram(binwidth = 5)+
  theme_bw()+
  xlab("Age at Enrollment")+
  ylab("Number of Participants")

Age at Enrollment

Mean: 82
Lower Interquartile Bound: 77
Upper Interquartile Bound:88

Samples Per Participant

T1 <- Urinalysis %>% count(Participant.ID)
Tmp <- summary(T1$n)
Mean <- round(as.numeric(Tmp["Mean"]),digits = 0)
LQuant <- round(as.numeric(Tmp["1st Qu."]),digits = 0)
UQuant <- round(as.numeric(Tmp["3rd Qu."]),digits = 0)

ggplot(T1,aes(x=n))+
  geom_histogram(binwidth = 2)+
  theme_bw()+
  xlab("Total Samples Submitted")+
  ylab("Number of Participants")

Age at Enrollment

Mean: 7
Lower Interquartile Bound: 3
Upper Interquartile Bound:10

Sankey Plot Overview

#Group Participants into bins by total samples collected for visual
Participant_Metadata$Samples_Collected_Bin <- 
    Participant_Metadata %>% mutate(
                                    LS = case_when(
                                            Routine.Samples.Collected >= 12 ~ "12-24",
                                            Routine.Samples.Collected >= 6  ~ "6-11",
                                            TRUE                            ~ "<6")) %>%
                                    pull(LS)

#Pull Categories of Interest
Tmp <- Participant_Metadata %>% select(Living.Situation,Sex,Diagnosis.Group,
                                       Submitted.a.UTI.Sample, Samples_Collected_Bin)
#Rename for Visual
colnames(Tmp) <- c("Living Situation","Sex","Diagnosis","Submitted a UTI Sample","Samples Collected")
#Make Long
df <- Tmp %>%
  make_long(`Living Situation`,Sex,`Diagnosis`,`Samples Collected`,`Submitted a UTI Sample`)

#Set factor order for visual 
LS_order <- c("Lives with Carer","Lives Alone","Lives with Family","Not Reported")
G_order <- c("Male","Female")
DG_order <- c("Alzheimer's Disease","Multi-Type","Frontotemporal Dementia",
              "Dementia in Parkinsons","Lewy Body Dementia","Vascular Dementia")
S_order <- c("<6","6-11","12-24")
U_order <- c("No","Yes")
Levels_Order <- append(append(append(append(LS_order,G_order),DG_order),S_order),U_order)
df$node <- factor(df$node,levels = Levels_Order)
df$next_node <- factor(df$next_node,levels = Levels_Order)

Plot1 <- ggplot(df, aes(x = x, next_x = next_x, 
               node = node, next_node = next_node, 
               fill = factor(node), label = node)) +
  geom_sankey(flow.alpha = .6,
              node.color = "gray30") +
  geom_sankey_label(size = 5, color = "white", fill = "grey30")+
  scale_fill_viridis_d(option="mako") +
  theme_sankey(base_size = 20)+
  theme(legend.position = "none")+
  labs(x = NULL)

jpeg(filename = "../Figures/Raw_R_Figures/Participant_Sample_Overview.jpg",res =300,
     width = 14,height = 5, units = "in")
Plot1
dev.off()
quartz_off_screen 
                2 
Plot1

Number of UTI Samples

UTI_Samples <- Urinalysis %>% count(Research_Diagnosis)
colnames(UTI_Samples) <- c("UTI Status","Number of Samples")
UTI_Samples$`Percent of Total` <- round(UTI_Samples$`Number of Samples`/sum(UTI_Samples$`Number of Samples`),digits=2)*100
UTI_Samples %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
UTI Status Number of Samples Percent of Total
No_UTI 512 80
UTI 106 17
Uncertain 19 3

Submitted a UTI Sample

UTI_Sample_Submission <- Participant_Metadata %>% count(Submitted.a.UTI.Sample)
colnames(UTI_Sample_Submission) <- c("Submitted a UTI Sample (1+)","Number of Participants")
UTI_Sample_Submission$`Percent of Total` <- round(UTI_Sample_Submission$`Number of Participants`/sum(UTI_Sample_Submission$`Number of Participants`),digits=2)*100
UTI_Sample_Submission %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Submitted a UTI Sample (1+) Number of Participants Percent of Total
No 58 64
Yes 33 36

UTI Frequency

#Classify events in the event log by six week periods starting from the first sample collection
T1 <- Event_Log %>% group_by(Participant.ID) %>% slice_min(Event.Date) %>% select(Participant.ID,Event.Date)
colnames(T1) <- c("Participant.ID","Start.Date")
Event_Log <- merge(Event_Log,T1,by="Participant.ID",all = TRUE)
Event_Log$Week6 <- floor(abs(as.numeric(as.Date(Event_Log$Start.Date)-as.Date(Event_Log$Event.Date)))/(7*6))

#Determine Classification by 6 Week Groupings
T1 <- Event_Log %>% count(Participant.ID,UTI,Week6)
T2 <- dcast(T1,Participant.ID+Week6~UTI,fill = 0,value.var = "n")
T2$TrueType <- T2 %>% mutate(LS = case_when(
                                             UTI_Urine_Sample>=1 ~ "UTI",
                                             Reported_UTI>=1     ~ "UTI",
                                                    TRUE         ~ "No UTI Reported")) %>%
                                    pull(LS)

Sample_Log <- T2 %>% select(Participant.ID,Week6,TrueType)
remove(T1,T2,Tmp)
Sample_Log <- Sample_Log %>% group_by(Participant.ID) %>% complete(Week6=0:max(Week6),fill =list(TrueType="No UTI Reported"))

#Determine Participant Type by Number of UTI Events 
T1 <- dcast(Sample_Log %>% count(Participant.ID,TrueType),Participant.ID~TrueType,fill=0,value.var = "n")
T2 <- as.data.frame(Sample_Log %>% group_by(Participant.ID) %>% slice_max(Week6) %>% select(Participant.ID,Week6) %>% ungroup())
Tmp <- merge(T1,T2,by="Participant.ID")
remove(T1,T2)

Tmp$Ratio <- (Tmp$UTI)/(Tmp$Week6+1)
Tmp$PType <- rep("No_UTI",nrow(Tmp))
Tmp[Tmp$UTI>0,"PType"] <- "Low_Frequency"
Tmp[Tmp$Ratio>0.20,"PType"] <- "High_Frequency"
Tmp[Tmp$Ratio>0.5,"PType"] <- "Chronic"
Tmp[Tmp$Week6<6,"PType"] <- "Short-Term"
Tmp$Week6 <- NULL
Sample_Log <- merge(Sample_Log,Tmp,by="Participant.ID")

#Retrieve Types and Add to Urinalysis
T1 <- Tmp[,c("Participant.ID","PType")]
Urinalysis <- merge(Urinalysis,T1,by="Participant.ID",all.x = TRUE)

#Set Order By Samples Collected
T1 <- Event_Log %>% group_by(Participant.ID) %>% slice_max(Week6) %>% select(Participant.ID,Week6)
T1 <- distinct(T1[order(T1$Week6),])
Sample_Log$Participant.ID <- factor(Sample_Log$Participant.ID,levels = T1$Participant.ID)
Sample_Log$PType <- str_replace(Sample_Log$PType,"_"," ")

Plot1 <- ggplot(Sample_Log,aes(x=Week6,y=Participant.ID,fill=TrueType))+
  geom_tile(color="black")+
  facet_wrap(~PType,scales = "free_y")+
  theme_bw()+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values = c("#d6d6d6","red"),name="Window Type")+
  theme(text = element_text(size = 15))+
  theme(legend.position='bottom')+
  ylab("Participants")+
  xlab("Number of 6 Week Windows Since First Sample")+
  theme(axis.text.y=element_blank())+
  theme(strip.background = element_rect(fill="white"))

jpeg(filename = "../Figures/Raw_R_Figures/Participant_UTI_Frequency_Overview.jpg",res =300,
     width = 7,height = 5, units = "in")
Plot1
dev.off()
quartz_off_screen 
                2 
Plot1

Frequency_Types <- Urinalysis %>% count(Participant.ID, PType) %>% count(PType)
colnames(Frequency_Types) <- c("Participant Type","Number of Participants")
Frequency_Types %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Participant Type Number of Participants
Chronic 8
High_Frequency 14
Low_Frequency 22
No_UTI 19
Short-Term 28

Section 2.2: Microbiological Characteristics of Collected Samples

Leucocytes and Nitrites

#Leucocytes and Nitrites
Leucocytes.df <- Urinalysis %>% count(LEU,NIT)
colnames(Leucocytes.df) <- c("Leucocyte Dipstick Result","Nitrites Dipstick Result","Number of Samples")
Leucocytes.df[is.na(Leucocytes.df)] <- "no result"
Leucocytes.df$`Percent of Total` <- round(Leucocytes.df$`Number of Samples`/sum(Leucocytes.df$`Number of Samples`),digits=2)*100
Leucocytes.df %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Leucocyte Dipstick Result Nitrites Dipstick Result Number of Samples Percent of Total
1+ negative 48 8
1+ positive 17 3
2+ negative 14 2
2+ positive 9 1
3+ negative 12 2
3+ positive 12 2
negative negative 440 69
negative positive 7 1
trace negative 59 9
trace positive 14 2
trace no result 1 0
no result no result 4 1
Tmp <- melt(Urinalysis[,c("Participant.ID","Sample.date","LEU","NIT", "Research_Diagnosis")],measure.vars = c("LEU","NIT"))
Tmp$value <- factor(Tmp$value,levels = c("negative","trace","trace-lysed","trace-intact","positive","1+","2+","3+"))
Tmp <- Tmp[!is.na(Tmp$value),]
Tmp[Tmp$Research_Diagnosis%in%"Uncertain","Research_Diagnosis"] <- "Indeterminate"
Tmp[Tmp$Research_Diagnosis%in%"No_UTI","Research_Diagnosis"] <- "No-UTI"
Tmp$variable <- as.character(Tmp$variable)
Tmp[Tmp$variable%in%"LEU","variable"] <- "Leucocytes"
Tmp[Tmp$variable%in%"NIT","variable"] <- "Nitrites"

Plot1 <- ggplot(Tmp, aes(fill=Research_Diagnosis,y=value))+
  facet_wrap(~variable,scales = "free_y")+
  geom_bar(color="black")+
  scale_fill_manual(values = c("black","grey","red"),name="UTI Status")+
  theme_bw()+
  theme(text = element_text(size = 15))+
  theme(legend.position='bottom')+
  xlab("Number of Samples")+
  ylab("Dipstick Test Result")+
  theme(strip.background = element_rect(fill="white"))+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))

jpeg(filename = "../Figures/Raw_R_Figures/Microbiology_Overview_Dip.jpg",res =300,
     width = 8.5,height = 3.5, units = "in")
Plot1
dev.off()
quartz_off_screen 
                2 
Plot1

#Leucocytes and Nitrites + UTI Status
LeuNITUTI.df <- Urinalysis %>% count(LEU,NIT,Research_Diagnosis)
colnames(LeuNITUTI.df) <- c("Leucocyte Dipstick Result","Nitrites Dipstick Result","UTI Status","Number of Samples")
LeuNITUTI.df[is.na(LeuNITUTI.df)] <- "no result"
LeuNITUTI.df$`Percent of Total` <- round(LeuNITUTI.df$`Number of Samples`/sum(LeuNITUTI.df$`Number of Samples`),digits=2)*100
LeuNITUTI.df %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Leucocyte Dipstick Result Nitrites Dipstick Result UTI Status Number of Samples Percent of Total
1+ negative No_UTI 24 4
1+ negative UTI 17 3
1+ negative Uncertain 7 1
1+ positive No_UTI 2 0
1+ positive UTI 14 2
1+ positive Uncertain 1 0
2+ negative No_UTI 7 1
2+ negative UTI 6 1
2+ negative Uncertain 1 0
2+ positive UTI 9 1
3+ negative No_UTI 2 0
3+ negative UTI 9 1
3+ negative Uncertain 1 0
3+ positive UTI 12 2
negative negative No_UTI 425 67
negative negative UTI 11 2
negative negative Uncertain 4 1
negative positive No_UTI 2 0
negative positive UTI 5 1
trace negative No_UTI 46 7
trace negative UTI 9 1
trace negative Uncertain 4 1
trace positive No_UTI 1 0
trace positive UTI 13 2
trace no result No_UTI 1 0
no result no result No_UTI 2 0
no result no result UTI 1 0
no result no result Uncertain 1 0

Plate Growth Overview

#Bin Growth
Urinalysis$BinnedCount <- Urinalysis %>% mutate(
                            LS = case_when(
                                         All_Count == 0 ~ "No Growth",
                                         All_Count <= 5000 ~ "<=5000",
                                         All_Count <= 10000 ~ "<=10000",
                                         All_Count <= 25000 ~ "<=25000",
                                         All_Count <= 50000 ~ "<=50000",
                                         All_Count < 100000 ~ "<100000",
                                         All_Count >= 100000 ~ "100000+",
                                         TRUE              ~ "Unquantified")) %>%
                                    pull(LS)
Urinalysis$`Total Growth (CFU/mL)` <- factor(Urinalysis$BinnedCount,levels=c("No Growth","<=5000","<=10000","<=25000","<=50000","<100000","100000+","Unquantified"))

#Quantify Mixed Growth
Urinalysis$Mixed <- Urinalysis %>% mutate(
                            LS = case_when(
                                         Isolates == 0 ~ "Not Applicable",
                                         Isolates == 1 ~ "Pure Growth",
                                         Isolates > 1 & DominantIsolate == "YES" ~ "Mixed Growth with Dominant Isolate",
                                         Isolates > 1 & DominantIsolate == "NO"  ~ "Mixed Growth",
                                         TRUE              ~ "Unquantified")) %>%
                                    pull(LS)

#Remove Samples Where a Culture Was Not Preformed
Tmp <- Urinalysis[!Urinalysis$Mixed%in% c("Unquantified","Not Applicable") &
                  !Urinalysis$`Total Growth (CFU/mL)`%in% c("Unquantified","Not Applicable"),]

Tmp[Tmp$Research_Diagnosis%in%"Uncertain","Research_Diagnosis"] <- "Indeterminate"
Tmp$Research_Diagnosis <- factor(Tmp$Research_Diagnosis,levels = c("No_UTI","Indeterminate","UTI"))

#Statistics and Tables
Total_Plates_With_Growth <- nrow(Urinalysis_Culture %>% count(Participant.ID,Sample.date))
Growth_Table <- dcast(Tmp %>% count(`Total Growth (CFU/mL)`,Research_Diagnosis,Mixed),`Total Growth (CFU/mL)`+Mixed~Research_Diagnosis,
                      fill=0,value.var = "n")
colnames(Growth_Table) <- c("Total Growth (CFU/mL)","Mixed Growth","No UTI","Indeterminate","UTI")
Total Plates With Growth

392

Growth_Table %>%
  kbl(row.names = FALSE) %>%
  kable_paper("hover", full_width = F)
Total Growth (CFU/mL) Mixed Growth No UTI Indeterminate UTI
<=5000 Mixed Growth 22 0 0
<=5000 Mixed Growth with Dominant Isolate 7 0 0
<=5000 Pure Growth 81 1 0
<=10000 Mixed Growth 11 1 0
<=10000 Mixed Growth with Dominant Isolate 12 0 2
<=10000 Pure Growth 14 0 0
<=25000 Mixed Growth 9 1 1
<=25000 Mixed Growth with Dominant Isolate 11 2 0
<=25000 Pure Growth 6 0 2
<=50000 Mixed Growth 10 0 0
<=50000 Mixed Growth with Dominant Isolate 11 2 1
<=50000 Pure Growth 6 0 1
<100000 Mixed Growth 10 0 0
<100000 Mixed Growth with Dominant Isolate 14 0 0
<100000 Pure Growth 4 0 0
100000+ Mixed Growth 16 1 31
100000+ Mixed Growth with Dominant Isolate 16 3 28
100000+ Pure Growth 11 5 38
Plot1 <- ggplot(Tmp,aes(y=`Total Growth (CFU/mL)`,fill=Research_Diagnosis))+
  facet_grid(~Mixed)+
  geom_bar(color="black")+
  scale_fill_manual(values = c("grey","black","red"),name="UTI Status")+
  theme_bw()+
  theme(text = element_text(size = 15))+
  theme(legend.position='bottom')+
  xlab("Number of Samples")+
  theme(strip.background = element_rect(fill="white"))+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))

jpeg(filename = "../Figures/Raw_R_Figures/Microbiology_Overview_TotalGrowth.jpg",res =300,
     width = 10,height = 3.5, units = "in")
Plot1
dev.off()
quartz_off_screen 
                2 
Plot1

Section 2.2: Microbiological Characteristics of Collected Samples

#Bin Growth 
Urinalysis_Culture$BinnedCount <- Urinalysis_Culture %>% mutate(
                            LS = case_when(
                                         Count == 0      ~ "No Growth",
                                         Count <= 5000   ~ "<=5000",
                                         Count <= 10000  ~ "<=10000",
                                         Count <= 25000  ~ "<=25000",
                                         Count <= 50000  ~ "<=50000",
                                         Count < 100000  ~ "<100000",
                                         Count >= 100000 ~ "100000+",
                                         TRUE            ~ "Unquantified")) %>%
                                    pull(LS)
Urinalysis_Culture$`CFU/mL` <- factor(Urinalysis_Culture$BinnedCount,levels=c("No Growth","<=5000","<=10000","<=25000","<=50000","<100000","100000+","Unquantified"))

#Add Urinalysis Data to Isolate Data
Urinalysis_Culture$Sample.date <- as.Date(Urinalysis_Culture$Sample.date)
Urinalysis_Culture <- merge(Urinalysis_Culture,Urinalysis[,c("Participant.ID","Sample.date","Research_Diagnosis","PType")],
                        by=c("Participant.ID","Sample.date"))

#Pick out isolates with ID
ID_Urinalysis_Culture <- Urinalysis_Culture[!is.na(Urinalysis_Culture$Con_Gen),]

Isolate Identities

#Total Isolates
Total_Isolates <- nrow(Urinalysis_Culture)

#Isolate Growth
Isolates_Table <- Urinalysis_Culture %>% count(`CFU/mL`)

#Total Sanger ID'd
Isolates_Identified <- nrow(ID_Urinalysis_Culture)

#Isolate Growth ID'd
Isolates_Identified_Table <- ID_Urinalysis_Culture %>% count(`CFU/mL`,Con_Gen)
colnames(Isolates_Identified_Table) <- c("CFU/mL","Genus","Number of Isolates")
Isolates_Identified_Table <- dcast(Isolates_Identified_Table,Genus~`CFU/mL`,value.var = "Number of Isolates",fill=0)
Isolates_Identified_Table$`Total Count` <- rowSums(Isolates_Identified_Table[,-1])
Total Isolates

771

Total Isolates Identified

352

Fisher Test: Taxa Across Growth Classes

Only considers isolates observed greater then 10 times.

#Tax Representation across Growth Classes
Tmp <- dcast(Urinalysis_Culture %>% count(Con_Gen,`CFU/mL`),Con_Gen~`CFU/mL`,fill=0,value.var = "n")
Tmp$Total_Isolates <- rowSums(Tmp[,-1])
T1 <- Tmp[Tmp$Total_Isolates>10,]
T1 <- T1 %>% filter(!is.na(Con_Gen)) %>% select(-Unquantified,-Total_Isolates)
T1
               Con_Gen <=5000 <=10000 <=25000 <=50000 <100000 100000+
1         Enterococcus     40       3       7       9       6       9
2 Escherichia-Shigella     17       6      10       3       4      38
3           Klebsiella      2       0       1       1       0       9
4              Proteus      5       1       0       1       2       3
5          Pseudomonas      4       2       6       2       0       7
6       Staphylococcus     45      12      15       4       3       8
7        Streptococcus      6       2       7       6       6      10
fisher.test(T1[,-1],simulate.p.value = TRUE,B=10000)

    Fisher's Exact Test for Count Data with simulated p-value (based on
    10000 replicates)

data:  T1[, -1]
p-value = 9.999e-05
alternative hypothesis: two.sided

Fisher Test: Taxa Across UTI non-UTI

Only considers isolates observed greater then 10 times.

#Tax Representation across UTI non UTI
Tmp <- dcast(Urinalysis_Culture %>% count(Con_Gen,Research_Diagnosis),Con_Gen~Research_Diagnosis,fill=0,value.var = "n")
Tmp$Total_Isolates <- rowSums(Tmp[,-1])
T1 <- Tmp %>% filter(Total_Isolates>10,!is.na(Con_Gen)) %>% select(-Total_Isolates,-Uncertain)
T1
               Con_Gen No_UTI UTI
1         Enterococcus     58  13
2 Escherichia-Shigella     38  36
3           Klebsiella      3  10
4              Proteus      5   7
5          Pseudomonas     12   9
6       Staphylococcus     70  14
7        Streptococcus     18  14
fisher.test(T1[,-1],simulate.p.value = TRUE,B=10000)

    Fisher's Exact Test for Count Data with simulated p-value (based on
    10000 replicates)

data:  T1[, -1]
p-value = 9.999e-05
alternative hypothesis: two.sided

Taxonomy Visualization - All

#Total Plates with ID
ID_Urinalysis_Culture$BinnedCount <- factor(ID_Urinalysis_Culture$BinnedCount,levels=c("No Growth","<=5000","<=10000","<=25000","<=50000","<100000","100000+","Unquantified"))
ID_Urinalysis_Culture[ID_Urinalysis_Culture$Research_Diagnosis%in%"Uncertain","Research_Diagnosis"] <- "Indeterminate"
ID_Urinalysis_Culture$Research_Diagnosis <- factor(ID_Urinalysis_Culture$Research_Diagnosis,levels = c("No_UTI","Indeterminate","UTI"))

Plot2 <- ggplot(ID_Urinalysis_Culture,aes(y=Con_Gen,fill=BinnedCount))+
  facet_wrap(~Research_Diagnosis)+
  geom_bar(color="black")+
  scale_fill_manual(values=brewer.pal(6,"Reds"),name="Isolate Growth Level (CFU/mL)")+
  theme_bw()+
  theme(text = element_text(size = 15))+
  xlab("Number of Isolates")+
  ylab("Bacterial Genus")+
    theme(plot.margin = unit(c(0.4,1,1,1), "cm"))+ 
  theme(legend.position="none")+
  theme(strip.background = element_rect(fill="white"))+
  theme(axis.title.y=element_blank())+
  ggtitle("All Isolates")+
  theme(plot.title = element_text(hjust = 0.5))

Plot2

Taxonomy Visualization - UTI Most Abundant

Dominant <- ID_Urinalysis_Culture %>% group_by(Participant.ID,Sample.date) %>% slice_max(Count) %>% 
  select(Participant.ID,Sample.date,Con_Gen,PType,Research_Diagnosis,BinnedCount) %>% ungroup()
Dominant <- Dominant[Dominant$Research_Diagnosis%in%"UTI",]

Tmp <- Dominant %>% count(Con_Gen)

Plot3 <- ggplot(Dominant,aes(y=Con_Gen,fill=BinnedCount))+
  geom_bar(color="black")+
  scale_fill_manual(values=brewer.pal(6,"Reds"),name="Isolate Growth Level (CFU/mL)")+
  theme_bw()+
  theme(text = element_text(size = 15))+
  xlab("Number of Isolates")+
  ylab("Bacterial Genus")+
    theme(plot.margin = unit(c(0,1,0.5,1), "cm"))+ 
  theme(legend.position="right")+
  theme(strip.background = element_rect(fill="white"))+
  theme(axis.title.y=element_blank())+
  ggtitle("Most Abundant Isolates in UTI Samples")+
  theme(plot.title = element_text(hjust = 0.5))

Plot4 <- plot_grid(Plot2,Plot3,nrow = 2, rel_heights = c(1.2,1),labels = c("A","B"),label_size = 20)
jpeg(filename = "../Figures/Raw_R_Figures/Sanger_Isolate_Overview.jpg",res =300,
     width = 12,height = 8, units = "in")
Plot4
dev.off()
quartz_off_screen 
                2 
Plot3

Taxonomy Visualization - Participants

Tmp <- Dominant[!Dominant$PType%in%"Short-Term",]

Plot3 <- ggplot(Tmp,aes(y=Participant.ID,fill=Con_Gen))+
  geom_bar(color="black")+
  facet_wrap(~PType,scales = "free_y")+
  scale_fill_viridis_d(option="turbo",name="Genus")+
  theme_bw()+
  theme(text = element_text(size = 15))+
  xlab("Number of Isolates")+
  ylab("Bacterial Genus")+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))+ 
  theme(legend.position="bottom")+
  theme(strip.background = element_rect(fill="white"))+
  theme(axis.title.y=element_blank())

jpeg(filename = "../Figures/Raw_R_Figures/Sanger_Isolate_UTI_PType.jpg",res =300,
     width = 11,height = 4, units = "in")
Plot3
dev.off()
quartz_off_screen 
                2 
Plot3

Section 2.4: Dipstick Superior to Symptoms as an Indicator of UTI

Model Dipstick and Symptoms Against UTI

#Select Data for Modeling relationship between UTI status symptoms and dipstick results
ForModel <- Urinalysis %>% select(Research_Diagnosis,NIT,LEU,Q1:Q7,Participant.ID)
ForModel <- ForModel[complete.cases(ForModel),]
ForModel <- ForModel[!ForModel$Research_Diagnosis%in%"Uncertain",]

ForModel[ForModel$LEU%in%c("3+","2+","1+"),"LEU"] <- "positive"

#Make UTI status a factor
ForModel$RD <- as.numeric(as.factor(ForModel$Research_Diagnosis))-1

#Build the model
gee_modle <- geeglm(formula = RD ~ NIT + LEU + Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7,
                    family = binomial,
                    data= ForModel,
                    id = as.factor(Participant.ID), 
                    corstr = "exchangeable")

#Store Coefficients with Significance
T1 <- data.frame(coef(summary(gee_modle)))
T1$P <- signif(T1$Pr...W..,digits=3)
T1$Pr...W.. <- NULL
T1 <- T1[order(T1$P),]

gee_modle

Call:
geeglm(formula = RD ~ NIT + LEU + Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + 
    Q7, family = binomial, data = ForModel, id = as.factor(Participant.ID), 
    corstr = "exchangeable")

Coefficients:
(Intercept) NITpositive LEUpositive    LEUtrace     Q1OTHER       Q1YES 
-4.15173824  4.26406492  3.57172956  2.23863555  1.19587901  0.39002521 
    Q2OTHER       Q2YES     Q3OTHER       Q3YES     Q4OTHER       Q4YES 
-3.60263092  0.29579190 -0.16260605  1.33687143  1.07715935  0.55317912 
    Q5OTHER       Q5YES     Q6OTHER       Q6YES     Q7OTHER       Q7YES 
 0.12430575  1.19268098  1.53995986  0.09313728 -1.08169555  0.03782606 

Degrees of Freedom: 614 Total (i.e. Null);  596 Residual

Scale Link:                   identity
Estimated Scale Parameters:  [1] 1.020702

Correlation:  Structure = exchangeable    Link = identity 
Estimated Correlation Parameters:
      alpha 
-0.01783112 

Number of clusters:   91   Maximum cluster size: 22 
T1 %>%
  kbl(row.names = TRUE) %>%
  kable_paper("hover", full_width = F)
Estimate Std.err Wald P
(Intercept) -4.1517382 0.3905331 113.0170938 0.00e+00
NITpositive 4.2640649 0.5775700 54.5052623 0.00e+00
LEUpositive 3.5717296 0.4909780 52.9216162 0.00e+00
LEUtrace 2.2386356 0.4182889 28.6427049 1.00e-07
Q2OTHER -3.6026309 1.0304185 12.2239698 4.72e-04
Q6OTHER 1.5399599 0.8502702 3.2802347 7.01e-02
Q5YES 1.1926810 0.6730265 3.1403946 7.64e-02
Q3YES 1.3368714 0.8789362 2.3134740 1.28e-01
Q1OTHER 1.1958790 0.8041457 2.2115922 1.37e-01
Q1YES 0.3900252 0.3692806 1.1155071 2.91e-01
Q4OTHER 1.0771593 1.0300724 1.0935141 2.96e-01
Q4YES 0.5531791 0.5759854 0.9223774 3.37e-01
Q7OTHER -1.0816956 1.3132259 0.6784709 4.10e-01
Q2YES 0.2957919 0.4070937 0.5279392 4.67e-01
Q5OTHER 0.1243058 0.6078316 0.0418231 8.38e-01
Q6YES 0.0931373 0.4614069 0.0407454 8.40e-01
Q3OTHER -0.1626060 1.1739976 0.0191840 8.90e-01
Q7YES 0.0378261 0.3761718 0.0101114 9.20e-01

Plot Dipstick and Symptoms Against UTI

#Plot Symptom Reporting Against UTI Status
Tmp <- Urinalysis %>% select(Q1:Q7,Research_Diagnosis)
df <-  melt(Tmp,id.vars = "Research_Diagnosis")
df <- df[!df$Research_Diagnosis%in%"Uncertain",]

Plot1 <- ggplot(df,aes(y=Research_Diagnosis,fill=value))+
  facet_wrap(~variable)+
  geom_bar(position = "fill",color="black")+
  scale_fill_viridis_d(option="magma",name="Symptom Reported",begin = 0.2,end=0.95)+
  theme_bw()+
  theme(text = element_text(size = 15))+
  xlab("Proportion Responses")+
  ylab("Sample Type")+
    theme(plot.margin = unit(c(1,1,1,1), "cm"))+ 
  theme(legend.position="bottom")+
  geom_vline(xintercept = 0.50,linetype="dashed",color="white")+
  scale_x_continuous(labels=c("0","0.25","0.5","0.75","1"))

#Plot Dipstick Results Against UTI Status
Tmp <- Urinalysis %>% select(NIT,LEU,Research_Diagnosis)
df <-  melt(Tmp,id.vars = "Research_Diagnosis")
df <- df[!df$Research_Diagnosis%in%"Uncertain",]
df$value <- factor(df$value,levels = c("negative","trace","positive","1+","2+","3+"))

Plot2 <- ggplot(df,aes(y=Research_Diagnosis,fill=value))+
  facet_wrap(~variable)+
  geom_bar(position = "fill",color="black")+
  scale_fill_viridis_d(option="magma",name="Dipstick Outcome",begin = 0.2,end=0.95)+
  theme_bw()+
  theme(text = element_text(size = 15))+
  xlab("Proportion Samples")+
  ylab("Sample Type")+
    theme(plot.margin = unit(c(1,1,1,1), "cm"))+ 
  theme(legend.position="bottom")+
  geom_vline(xintercept = 0.50,linetype="dashed",color="white")+
  scale_x_continuous(labels=c("0","0.25","0.5","0.75","1"))


Plot4 <- plot_grid(Plot1,Plot2,rel_widths = c(1,1),
                   labels = c("A","B"),label_size = 20,ncol = 2)

jpeg(filename = "../Figures/Raw_R_Figures/Dipstick_Versus_Symptoms_UTI.jpg",res =300,
     width = 14,height = 5, units = "in")
Plot4
dev.off()
quartz_off_screen 
                2 
Plot4

Plot Predictive Value

Tmp <- Urinalysis %>% select(Symptomatic,LEU,NIT,Research_Diagnosis)
Tmp$Dipstick_Postive <- rep("No",nrow(Tmp))
Tmp[!Tmp$LEU%in%"negative"|!Tmp$NIT%in%"negative","Dipstick_Postive"] <- "Yes"

#Bin Growth 
Tmp$`Dipstick Prediction` <- Tmp %>% mutate(
                            LS = case_when(
                                         Dipstick_Postive == "Yes" & Research_Diagnosis == "UTI" ~ "True Positive",
                                         Dipstick_Postive == "Yes" & Research_Diagnosis == "No_UTI" ~ "False Positive",
                                         Dipstick_Postive == "No" & Research_Diagnosis == "UTI" ~ "False Negative",
                                         Dipstick_Postive == "No" & Research_Diagnosis == "No_UTI" ~ "True Negative",
                                         TRUE              ~ "Unquantified")) %>%
                                    pull(LS)

Tmp$`Symptom Prediction` <- Tmp %>% mutate(
                            LS = case_when(
                                         Symptomatic == "Symptomatic" & Research_Diagnosis == "UTI" ~ "True Positive",
                                         Symptomatic == "Symptomatic" & Research_Diagnosis == "No_UTI" ~ "False Positive",
                                         Symptomatic == "Asymptomatic" & Research_Diagnosis == "UTI" ~ "False Negative",
                                         Symptomatic == "Asymptomatic" & Research_Diagnosis == "No_UTI" ~ "True Negative",
                                         TRUE              ~ "Unquantified")) %>%
                                    pull(LS)

Tmp <- melt(Tmp,measure.vars = c("Symptom Prediction","Dipstick Prediction"))

rwb <-c('#9e0142','#d53e4f','#abdda4','#66c2a5','#fee08b')
Plot1 <- ggplot(Tmp,aes(y=variable,fill=value))+facet_wrap(~value,ncol=5)+
  geom_bar(color="black")+
  scale_fill_manual(values=rwb,name="")+
  theme_bw()+
  theme(text = element_text(size = 15))+
  xlab("Number of Samples")+
    theme(plot.margin = unit(c(1,1,1,1), "cm"))+ 
  theme(legend.position="none")+
  theme(axis.title.y=element_blank())


jpeg(filename = "../Figures/Raw_R_Figures/Dipstick_Versus_Symptoms_Prediction.jpg",res =300,
     width = 12,height = 3, units = "in")
Plot1
dev.off()
quartz_off_screen 
                2 
Plot1