| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- #' Add binomial confidence intervals to var, fraction estimates, evaluated over nVar participants
- #'
- #' @param df a data frame that contains var and nVar columns
- #' @param var variable to calculate CI for, should be a fraction 0..1
- #' @param nVar variable that contains number of participants
- #'
- #' @return data frame with added var_low and var_high columns containing low and high CI95 values
- #'
- #' @export
- add.conf.int<-function(df,var,nVar='total'){
- v<-df[,var]
- n<-df[,nVar]
- y1<-base::rep(0, base::length(v))
- y2<-base::rep(0, base::length(v))
- for (i in 1:base::length(v)){
- z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],0.5,"two.sided",0.95)
- y1[i]<-z$conf.int[1]
- y2[i]<-z$conf.int[2]
- }
-
- df[,base::sprintf('%s_low',var)]=y1
- df[,base::sprintf('%s_high',var)]=y2
- df
- }
- #' Add values from reference (for statistical comparison)
- #'
- #' @param df data frame with a timeVar, target variable var and count variable nVar
- #' @param dfRef data frame of same shape as df, considered reference for experiment
- #' @param var a variable to set reference values for
- #' @param timeVar variable containg ids of time points, where data will be merged
- #' @param nVar variable containing counts
- #'
- #' @return data frame df with new coluns var_ref, var_nRef
- #'
- #' @export
- add.Ref<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
- pRef<-base::sprintf('%s_ref',var)
- df[,pRef]<-base::rep(0, base::nrow(df))
- df[,'nRef']<-base::rep(0, base::nrow(df))
- for (i in 1:base::nrow(df)){
- df[i,pRef]<-dfRef[dfRef[,timeVar]==df[i,timeVar],var]
- df[i,'nRef']<-dfRef[dfRef[,timeVar]==df[i,timeVar],nVar]
- }
- df
- }
- #' Add statistical significance to df/dfRef difference using binomial distribution
- #'
- #' @param df data frame with target variable var, timeVar and count variable nVar
- #' @param dfRef a reference data frame of the same shape as df
- #' @param var the target variable
- #' @param timeVar variable indicating comparable visits
- #' @param nVar count variable
- #'
- #' @return data frame with var_p column of calculated signficance of difference (binom.test) and boolean var_s column notifying statistical significance at 0.05 level
- #'
- #' @export
- add.p.values<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
- df<-add.Ref(df,dfRef,var,timeVar,nVar)
- pName<-base::sprintf('%s_ref',var)
- pRef<-df[,pName]
- v<-df[,var]
- n<-df[,nVar]
- p<-base::rep(0, base::nrow(df))
- for (i in 1:base::nrow(df)){
- z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],pRef[i],"two.sided",0.95)
- p[i]<-z$p.value
- }
- df[,base::sprintf('%s_p',var)]<-p
- df[,base::sprintf('%s_s',var)]<-df[,base::sprintf('%s_p',var)]<0.05
- df
- }
- #' Add statistical significance to df/dfRef difference using Fisher exact method
- #'
- #' @param df data frame with target variable var, timeVar and count variable nVar
- #' @param dfRef a reference data frame of the same shape as df
- #' @param var the target variable
- #' @param timeVar variable indicating comparable visits
- #' @param nVar count variable
- #'
- #' @return data frame with var_pF column of calculated signficance of difference (fisher.test) and var_sF, coded (+<0.05,*<0.01,**<0.001) statistical significance
- #'
- #' @export
- add.p.values.fisher<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
-
- df<-add.Ref(df,dfRef,var,timeVar,nVar)
- pRef<-df[,base::sprintf('%s_ref',var)]
- nRef<-df[,'nRef']
- v<-df[,var]
- n<-df[,nVar]
- p<-base::rep(0, base::nrow(df))
- for (i in 1:base::nrow(df)){
- dat <- base::data.frame(tox = base::c(pRef[i]*nRef[i],v[i]*n[i]),
- noTox = base::c((1-pRef[i])*nRef[i], (1-v[i])*n[i]),
- row.names = base::c("ref", "hypoAfrica"),
- stringsAsFactors = FALSE)
- z<-stats::fisher.test(dat)
- p[i]<-z$p.value
- }
- df[,base::sprintf('%s_pF',var)]<-p
- df[,base::sprintf('%s_sF',var)]<-''
- df[df[,base::sprintf('%s_pF',var)]<0.05,base::sprintf('%s_sF',var)]<-'(+)'
- df[df[,base::sprintf('%s_pF',var)]<0.01,base::sprintf('%s_sF',var)]<-'(*)'
- df[df[,base::sprintf('%s_pF',var)]<0.001,base::sprintf('%s_sF',var)]<-'(**)'
- df
- }
- #' Plot comparison of measured and reference fractions
- #'
- #' @param df data frame of measured fractions
- #' @param dfRef data frame of reference fractions
- #' @param var target fraction variable
- #' @param timeVar variable measuring time
- #' @param doseVar variable for selecting datasets
- #' @param nVar variable where counts are stored
- #' @param doseBreaks values to break plots to
- #' @param doseLabels labels for legend for breaks
- #' @param doseColors colors to use for different plots
- #' @param timeBreaks where time points are assigned
- #' @param timeLabels labels for time points
- #'
- #' @return grob object with ggplot
- #'
- #' @export
- #'
- #' @importFrom rlang .data
- #'
- get.ggplot<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',nVar='total',
- doseBreaks=base::c(60,62,100),doseLabels=base::c('60 Gy','62 Gy','all'),doseColors=base::c('red','magenta','blue'),
- timeBreaks=base::c(0,1,3,4),timeLabels=base::c('beforeRT','at20','at3mo','at12mo')){
- df<-add.conf.int(df,var,nVar)
- dfRef<-add.conf.int(dfRef,var,nVar)
- ghigh<-base::sprintf("%s_high",var)
- glow<-base::sprintf("%s_low",var)
- ggplot2::ggplot()+
- ggplot2::geom_ribbon(ggplot2::aes(x=.data[[timeVar]],ymin=.data[[glow]],ymax=.data[[ghigh]]),dfRef,fill='grey70')+
- ggplot2::geom_ribbon(ggplot2::aes(x=.data[[timeVar]],ymin=.data[[glow]],ymax=.data[[ghigh]],fill=base::as.factor(.data[[doseVar]])),df,alpha=0.2)+
- ggplot2::geom_line(ggplot2::aes(x=.data[[timeVar]],y=.data[[var]]),dfRef)+
- ggplot2::geom_line(ggplot2::aes(x=.data[[timeVar]],y=.data[[var]],color=base::as.factor(.data[[doseVar]])),df)+
- ggplot2::scale_color_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+
- ggplot2::scale_fill_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+
- ggplot2::theme(legend.position="bottom")+
- ggplot2::xlab('Evaluation visit')+
- ggplot2::scale_x_continuous(breaks=timeBreaks,limits=c(0,4),labels=timeLabels)+
- ggplot2::ylab('Portion with toxicity')
-
-
- }
- #' Get table of values (as ggplot graphic object)
- #'
- #' @param df data frame with reference data
- #' @param dfRef reference data frame
- #' @param var target variable to be shown
- #' @param timeVar variable measuring time
- #' @param doseVar variable for selecting datasets
- #' @param nVar variable where counts are stored
- #' @param doseBreaks which doses are in df
- #' @param doseLabels how to label doses in legend
- #' @param timeBreaks where time points are assigned
- #' @param dfAdd additional data frame not checked for statistics (NULL ignored)
- #'
- #' @return grobj with values laid out as text
- #'
- #' @export
- get.ggtext<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',nVar='total',
- doseBreaks=c(59.9,60,62,100),doseLabels=c('ref','60 Gy','62 Gy','all'),
- timeBreaks=c(0,1,3,4),dfAdd=NULL){
- df<-add.p.values.fisher(df,dfRef,var,timeVar,nVar)
- sf<-base::sprintf('%s_sF',var)
- dfRef[,sf]<-base::rep('',nrow(dfRef))
- colSel<-base::c(timeVar,labelVar,sf,doseVar)
- df1<-base::rbind(df[,colSel],dfRef[,colSel])
- if (!is.null(dfAdd)){
- dfAdd[,sf]<-base::rep('',nrow(dfAdd))
- df1<-base::rbind(df1[,colSel],dfAdd[,colSel])
- }
- #set labels
- labelVar=sprintf('%sLabel',var)
- if (! labelVar %in% colnames(df1)){
- df1[,labelVar]=base::paste0(base::round(df1[, var], digits = 2))
- }
- ggplot2::ggplot()+
- ggplot2::geom_text(ggplot2::aes(x=.data[[timeVar]],y=base::as.factor(.data[[doseVar]]),
- label=base::paste0(.data[[labelVar]],.data[[sf]]),
- hjust=0.5, vjust=0.5),size=5,df1)+
- ggplot2::scale_x_continuous(breaks=timeBreaks,limits=base::c(0,4))+
- ggplot2::scale_y_discrete(breaks=doseBreaks,labels=doseLabels)+
- ggplot2::theme_classic(base_size=20) +
- ggplot2::theme(axis.line = ggplot2::element_blank(),axis.ticks = ggplot2::element_blank(),axis.title.y = ggplot2::element_blank(), axis.title.x = ggplot2::element_blank(),axis.text.x = ggplot2::element_text(color="white"))+
- ggplot2::theme(plot.title = ggplot2::element_text(size=11, hjust =-0.06))+
- ggplot2::theme(panel.background = ggplot2::element_rect(fill = "white"))
- }
|