#' Add binomial confidence intervals to var, fraction estimates, evaluated over nVar participants #' #' @param df a data frame that contains var and nVar columns #' @param var variable to calculate CI for, should be a fraction 0..1 #' @param nVar variable that contains number of participants #' #' @return data frame with added var_low and var_high columns containing low and high CI95 values #' #' @export add.conf.int<-function(df,var,nVar='total'){ v<-df[,var] n<-df[,nVar] y1<-base::rep(0, base::length(v)) y2<-base::rep(0, base::length(v)) for (i in 1:base::length(v)){ z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],0.5,"two.sided",0.95) y1[i]<-z$conf.int[1] y2[i]<-z$conf.int[2] } df[,base::sprintf('%s_low',var)]=y1 df[,base::sprintf('%s_high',var)]=y2 df } #' Add values from reference (for statistical comparison) #' #' @param df data frame with a timeVar, target variable var and count variable nVar #' @param dfRef data frame of same shape as df, considered reference for experiment #' @param var a variable to set reference values for #' @param timeVar variable containg ids of time points, where data will be merged #' @param nVar variable containing counts #' #' @return data frame df with new coluns var_ref, var_nRef #' #' @export add.Ref<-function(df,dfRef,var,timeVar='visitid',nVar='total'){ pRef<-base::sprintf('%s_ref',var) df[,pRef]<-base::rep(0, base::nrow(df)) df[,'nRef']<-base::rep(0, base::nrow(df)) for (i in 1:base::nrow(df)){ df[i,pRef]<-dfRef[dfRef[,timeVar]==df[i,timeVar],var] df[i,'nRef']<-dfRef[dfRef[,timeVar]==df[i,timeVar],nVar] } df } #' Add statistical significance to df/dfRef difference using binomial distribution #' #' @param df data frame with target variable var, timeVar and count variable nVar #' @param dfRef a reference data frame of the same shape as df #' @param var the target variable #' @param timeVar variable indicating comparable visits #' @param nVar count variable #' #' @return data frame with var_p column of calculated signficance of difference (binom.test) and boolean var_s column notifying statistical significance at 0.05 level #' #' @export add.p.values<-function(df,dfRef,var,timeVar='visitid',nVar='total'){ df<-add.Ref(df,dfRef,var,timeVar,nVar) pName<-base::sprintf('%s_ref',var) pRef<-df[,pName] v<-df[,var] n<-df[,nVar] p<-base::rep(0, base::nrow(df)) for (i in 1:base::nrow(df)){ z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],pRef[i],"two.sided",0.95) p[i]<-z$p.value } df[,base::sprintf('%s_p',var)]<-p df[,base::sprintf('%s_s',var)]<-df[,base::sprintf('%s_p',var)]<0.05 df } #' Add statistical significance to df/dfRef difference using Fisher exact method #' #' @param df data frame with target variable var, timeVar and count variable nVar #' @param dfRef a reference data frame of the same shape as df #' @param var the target variable #' @param timeVar variable indicating comparable visits #' @param nVar count variable #' #' @return data frame with var_pF column of calculated signficance of difference (fisher.test) and var_sF, coded (+<0.05,*<0.01,**<0.001) statistical significance #' #' @export add.p.values.fisher<-function(df,dfRef,var,timeVar='visitid',nVar='total'){ df<-add.Ref(df,dfRef,var,timeVar,nVar) pRef<-df[,base::sprintf('%s_ref',var)] nRef<-df[,'nRef'] v<-df[,var] n<-df[,nVar] p<-base::rep(0, base::nrow(df)) for (i in 1:base::nrow(df)){ dat <- base::data.frame(tox = base::c(pRef[i]*nRef[i],v[i]*n[i]), noTox = base::c((1-pRef[i])*nRef[i], (1-v[i])*n[i]), row.names = base::c("ref", "hypoAfrica"), stringsAsFactors = FALSE) z<-stats::fisher.test(dat) p[i]<-z$p.value } df[,base::sprintf('%s_pF',var)]<-p df[,base::sprintf('%s_sF',var)]<-'' df[df[,base::sprintf('%s_pF',var)]<0.05,base::sprintf('%s_sF',var)]<-'(+)' df[df[,base::sprintf('%s_pF',var)]<0.01,base::sprintf('%s_sF',var)]<-'(*)' df[df[,base::sprintf('%s_pF',var)]<0.001,base::sprintf('%s_sF',var)]<-'(**)' df } #' Plot comparison of measured and reference fractions #' #' @param df data frame of measured fractions #' @param dfRef data frame of reference fractions #' @param var target fraction variable #' @param timeVar variable measuring time #' @param doseVar variable for selecting datasets #' @param nVar variable where counts are stored #' @param doseBreaks values to break plots to #' @param doseLabels labels for legend for breaks #' @param doseColors colors to use for different plots #' @param timeBreaks where time points are assigned #' @param timeLabels labels for time points #' #' @return grob object with ggplot #' #' @export #' #' @importFrom rlang .data #' get.ggplot<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',nVar='total', doseBreaks=base::c(60,62,100),doseLabels=base::c('60 Gy','62 Gy','all'),doseColors=base::c('red','magenta','blue'), timeBreaks=base::c(0,1,3,4),timeLabels=base::c('beforeRT','at20','at3mo','at12mo')){ df<-add.conf.int(df,var,nVar) dfRef<-add.conf.int(dfRef,var,nVar) ghigh<-base::sprintf("%s_high",var) glow<-base::sprintf("%s_low",var) ggplot2::ggplot()+ ggplot2::geom_ribbon(ggplot2::aes(x=.data[[timeVar]],ymin=.data[[glow]],ymax=.data[[ghigh]]),dfRef,fill='grey70')+ ggplot2::geom_ribbon(ggplot2::aes(x=.data[[timeVar]],ymin=.data[[glow]],ymax=.data[[ghigh]],fill=base::as.factor(.data[[doseVar]])),df,alpha=0.2)+ ggplot2::geom_line(ggplot2::aes(x=.data[[timeVar]],y=.data[[var]]),dfRef)+ ggplot2::geom_line(ggplot2::aes(x=.data[[timeVar]],y=.data[[var]],color=base::as.factor(.data[[doseVar]])),df)+ ggplot2::scale_color_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+ ggplot2::scale_fill_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+ ggplot2::theme(legend.position="bottom")+ ggplot2::xlab('Evaluation visit')+ ggplot2::scale_x_continuous(breaks=timeBreaks,limits=c(0,4),labels=timeLabels)+ ggplot2::ylab('Portion with toxicity') } #' Get table of values (as ggplot graphic object) #' #' @param df data frame with reference data #' @param dfRef reference data frame #' @param var target variable to be shown #' @param timeVar variable measuring time #' @param doseVar variable for selecting datasets #' @param nVar variable where counts are stored #' @param labelVar variable to print (if default NULL, var is used directly) #' @param doseBreaks which doses are in df #' @param doseLabels how to label doses in legend #' @param timeBreaks where time points are assigned #' @param dfAdd additional data frame not checked for statistics (NULL ignored) #' #' @return grobj with values laid out as text #' #' @export get.ggtext<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',nVar='total', labelVar=NULL, doseBreaks=c(59.9,60,62,100),doseLabels=c('ref','60 Gy','62 Gy','all'), timeBreaks=c(0,1,3,4),dfAdd=NULL){ df<-add.p.values.fisher(df,dfRef,var,timeVar,nVar) sf<-base::sprintf('%s_sF',var) dfRef[,sf]<-base::rep('',nrow(dfRef)) nd=0 if (base::is.null(labelVar)){ labelVar=var nd=2 } colSel<-base::c(timeVar,labelVar,sf,doseVar) df1<-base::rbind(df[,colSel],dfRef[,colSel]) if (!is.null(dfAdd)){ dfAdd[,sf]<-base::rep('',nrow(dfAdd)) df1<-base::rbind(df1[,colSel],dfAdd[,colSel]) } ggplot2::ggplot()+ ggplot2::geom_text(ggplot2::aes(x=.data[[timeVar]],y=base::as.factor(.data[[doseVar]]), label=base::paste0(base::round(.data[[labelVar]], digits = nd),.data[[sf]]), hjust=0.5, vjust=0.5),size=5,df1)+ ggplot2::scale_x_continuous(breaks=timeBreaks,limits=base::c(0,4))+ ggplot2::scale_y_discrete(breaks=doseBreaks,labels=doseLabels)+ ggplot2::theme_classic(base_size=20) + ggplot2::theme(axis.line = ggplot2::element_blank(),axis.ticks = ggplot2::element_blank(),axis.title.y = ggplot2::element_blank(), axis.title.x = ggplot2::element_blank(),axis.text.x = ggplot2::element_text(color="white"))+ ggplot2::theme(plot.title = ggplot2::element_text(size=11, hjust =-0.06))+ ggplot2::theme(panel.background = ggplot2::element_rect(fill = "white")) }