#Number of SamplesNumber_of_Samples <-nrow(Urinalysis)#Number of ParticipantsNumber_of_Participants <-length(unique(Urinalysis$Participant.ID))
Total Samples
637
Total Participants
91
Living Situation
#Living SituationLivingSituation <- Participant_Metadata %>%count(Living.Situation)colnames(LivingSituation) <-c("Living Situation","Number of Participants")LivingSituation$`Percent of Total`<-round(LivingSituation$`Number of Participants`/sum(LivingSituation$`Number of Participants`),digits=2)*100LivingSituation %>%kbl(row.names =FALSE) %>%kable_paper("hover", full_width = F)
Living Situation
Number of Participants
Percent of Total
Lives Alone
29
32
Lives with Carer
51
56
Lives with Family
7
8
Not Reported
4
4
Sex
Sex <- Participant_Metadata %>%count(Sex)colnames(Sex) <-c("Sex","Number of Participants")Sex$`Percent of Total`<-round(Sex$`Number of Participants`/sum(Sex$`Number of Participants`),digits=2)*100Sex %>%kbl(row.names =FALSE) %>%kable_paper("hover", full_width = F)
Sex
Number of Participants
Percent of Total
Female
40
44
Male
51
56
Cognitive Diagnosis Groups
Diagnosis_Group <- Participant_Metadata %>%count(Diagnosis.Group)colnames(Diagnosis_Group) <-c("Diagnosis Group","Number of Participants")Diagnosis_Group$`Percent of Total`<-round(Diagnosis_Group$`Number of Participants`/sum(Diagnosis_Group$`Number of Participants`),digits=2)*100Diagnosis_Group %>%kbl(row.names =FALSE) %>%kable_paper("hover", full_width = F)
Diagnosis Group
Number of Participants
Percent of Total
Alzheimer's Disease
59
65
Dementia in Parkinsons
6
7
Frontotemporal Dementia
7
8
Lewy Body Dementia
2
2
Multi-Type
14
15
Vascular Dementia
3
3
Age
Tmp <-summary(Participant_Metadata$age_at_enrollment)Age_Mean <-round(as.numeric(Tmp["Mean"]),digits =0)Age_LQuant <-round(as.numeric(Tmp["1st Qu."]),digits =0)Age_UQuant <-round(as.numeric(Tmp["3rd Qu."]),digits =0)ggplot(Participant_Metadata,aes(x=age_at_enrollment))+geom_histogram(binwidth =5)+theme_bw()+xlab("Age at Enrollment")+ylab("Number of Participants")
#Group Participants into bins by total samples collected for visualParticipant_Metadata$Samples_Collected_Bin <- Participant_Metadata %>%mutate(LS =case_when( Routine.Samples.Collected >=12~"12-24", Routine.Samples.Collected >=6~"6-11",TRUE~"<6")) %>%pull(LS)#Pull Categories of InterestTmp <- Participant_Metadata %>%select(Living.Situation,Sex,Diagnosis.Group, Submitted.a.UTI.Sample, Samples_Collected_Bin)#Rename for Visualcolnames(Tmp) <-c("Living Situation","Sex","Diagnosis","Submitted a UTI Sample","Samples Collected")#Make Longdf <- Tmp %>%make_long(`Living Situation`,Sex,`Diagnosis`,`Samples Collected`,`Submitted a UTI Sample`)#Set factor order for visual LS_order <-c("Lives with Carer","Lives Alone","Lives with Family","Not Reported")G_order <-c("Male","Female")DG_order <-c("Alzheimer's Disease","Multi-Type","Frontotemporal Dementia","Dementia in Parkinsons","Lewy Body Dementia","Vascular Dementia")S_order <-c("<6","6-11","12-24")U_order <-c("No","Yes")Levels_Order <-append(append(append(append(LS_order,G_order),DG_order),S_order),U_order)df$node <-factor(df$node,levels = Levels_Order)df$next_node <-factor(df$next_node,levels = Levels_Order)Plot1 <-ggplot(df, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill =factor(node), label = node)) +geom_sankey(flow.alpha = .6,node.color ="gray30") +geom_sankey_label(size =5, color ="white", fill ="grey30")+scale_fill_viridis_d(option="mako") +theme_sankey(base_size =20)+theme(legend.position ="none")+labs(x =NULL)jpeg(filename ="../Figures/Raw_R_Figures/Participant_Sample_Overview.jpg",res =300,width =14,height =5, units ="in")Plot1dev.off()
quartz_off_screen
2
Plot1
Number of UTI Samples
UTI_Samples <- Urinalysis %>%count(Research_Diagnosis)colnames(UTI_Samples) <-c("UTI Status","Number of Samples")UTI_Samples$`Percent of Total`<-round(UTI_Samples$`Number of Samples`/sum(UTI_Samples$`Number of Samples`),digits=2)*100UTI_Samples %>%kbl(row.names =FALSE) %>%kable_paper("hover", full_width = F)
UTI Status
Number of Samples
Percent of Total
No_UTI
512
80
UTI
106
17
Uncertain
19
3
Submitted a UTI Sample
UTI_Sample_Submission <- Participant_Metadata %>%count(Submitted.a.UTI.Sample)colnames(UTI_Sample_Submission) <-c("Submitted a UTI Sample (1+)","Number of Participants")UTI_Sample_Submission$`Percent of Total`<-round(UTI_Sample_Submission$`Number of Participants`/sum(UTI_Sample_Submission$`Number of Participants`),digits=2)*100UTI_Sample_Submission %>%kbl(row.names =FALSE) %>%kable_paper("hover", full_width = F)
Submitted a UTI Sample (1+)
Number of Participants
Percent of Total
No
58
64
Yes
33
36
UTI Frequency
#Classify events in the event log by six week periods starting from the first sample collectionT1 <- Event_Log %>%group_by(Participant.ID) %>%slice_min(Event.Date) %>%select(Participant.ID,Event.Date)colnames(T1) <-c("Participant.ID","Start.Date")Event_Log <-merge(Event_Log,T1,by="Participant.ID",all =TRUE)Event_Log$Week6 <-floor(abs(as.numeric(as.Date(Event_Log$Start.Date)-as.Date(Event_Log$Event.Date)))/(7*6))#Determine Classification by 6 Week GroupingsT1 <- Event_Log %>%count(Participant.ID,UTI,Week6)T2 <-dcast(T1,Participant.ID+Week6~UTI,fill =0,value.var ="n")T2$TrueType <- T2 %>%mutate(LS =case_when( UTI_Urine_Sample>=1~"UTI", Reported_UTI>=1~"UTI",TRUE~"No UTI Reported")) %>%pull(LS)Sample_Log <- T2 %>%select(Participant.ID,Week6,TrueType)remove(T1,T2,Tmp)Sample_Log <- Sample_Log %>%group_by(Participant.ID) %>%complete(Week6=0:max(Week6),fill =list(TrueType="No UTI Reported"))#Determine Participant Type by Number of UTI Events T1 <-dcast(Sample_Log %>%count(Participant.ID,TrueType),Participant.ID~TrueType,fill=0,value.var ="n")T2 <-as.data.frame(Sample_Log %>%group_by(Participant.ID) %>%slice_max(Week6) %>%select(Participant.ID,Week6) %>%ungroup())Tmp <-merge(T1,T2,by="Participant.ID")remove(T1,T2)Tmp$Ratio <- (Tmp$UTI)/(Tmp$Week6+1)Tmp$PType <-rep("No_UTI",nrow(Tmp))Tmp[Tmp$UTI>0,"PType"] <-"Low_Frequency"Tmp[Tmp$Ratio>0.20,"PType"] <-"High_Frequency"Tmp[Tmp$Ratio>0.5,"PType"] <-"Chronic"Tmp[Tmp$Week6<6,"PType"] <-"Short-Term"Tmp$Week6 <-NULLSample_Log <-merge(Sample_Log,Tmp,by="Participant.ID")#Retrieve Types and Add to UrinalysisT1 <- Tmp[,c("Participant.ID","PType")]Urinalysis <-merge(Urinalysis,T1,by="Participant.ID",all.x =TRUE)#Set Order By Samples CollectedT1 <- Event_Log %>%group_by(Participant.ID) %>%slice_max(Week6) %>%select(Participant.ID,Week6)T1 <-distinct(T1[order(T1$Week6),])Sample_Log$Participant.ID <-factor(Sample_Log$Participant.ID,levels = T1$Participant.ID)Sample_Log$PType <-str_replace(Sample_Log$PType,"_"," ")Plot1 <-ggplot(Sample_Log,aes(x=Week6,y=Participant.ID,fill=TrueType))+geom_tile(color="black")+facet_wrap(~PType,scales ="free_y")+theme_bw()+theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust=1))+scale_fill_manual(values =c("#d6d6d6","red"),name="Window Type")+theme(text =element_text(size =15))+theme(legend.position='bottom')+ylab("Participants")+xlab("Number of 6 Week Windows Since First Sample")+theme(axis.text.y=element_blank())+theme(strip.background =element_rect(fill="white"))jpeg(filename ="../Figures/Raw_R_Figures/Participant_UTI_Frequency_Overview.jpg",res =300,width =7,height =5, units ="in")Plot1dev.off()
Fisher's Exact Test for Count Data with simulated p-value (based on
10000 replicates)
data: T1[, -1]
p-value = 9.999e-05
alternative hypothesis: two.sided
Fisher Test: Taxa Across UTI non-UTI
Only considers isolates observed greater then 10 times.
#Tax Representation across UTI non UTITmp <-dcast(Urinalysis_Culture %>%count(Con_Gen,Research_Diagnosis),Con_Gen~Research_Diagnosis,fill=0,value.var ="n")Tmp$Total_Isolates <-rowSums(Tmp[,-1])T1 <- Tmp %>%filter(Total_Isolates>10,!is.na(Con_Gen)) %>%select(-Total_Isolates,-Uncertain)T1
Fisher's Exact Test for Count Data with simulated p-value (based on
10000 replicates)
data: T1[, -1]
p-value = 9.999e-05
alternative hypothesis: two.sided