Browse Source

Adding hypoAfrica routines

Andrej 3 days ago
parent
commit
9ba9335de2
9 changed files with 383 additions and 1 deletions
  1. 2 1
      DESCRIPTION
  2. 7 0
      NAMESPACE
  3. 192 0
      R/hypoAfrica.R
  4. 25 0
      man/add.Ref.Rd
  5. 21 0
      man/add.conf.int.Rd
  6. 25 0
      man/add.p.values.Rd
  7. 25 0
      man/add.p.values.fisher.Rd
  8. 46 0
      man/get.ggplot.Rd
  9. 40 0
      man/get.ggtext.Rd

+ 2 - 1
DESCRIPTION

@@ -14,7 +14,8 @@ Imports:
     base,
     graphics,
     survival,
-    stats
+    stats,
+    rlang
 Suggests: 
     ggplot2,
     ggsurvfit

+ 7 - 0
NAMESPACE

@@ -1,9 +1,15 @@
 # Generated by roxygen2: do not edit by hand
 
+export(add.Ref)
+export(add.conf.int)
+export(add.p.values)
+export(add.p.values.fisher)
 export(convert.to.date)
 export(convert.to.sul)
 export(cox.hazard)
 export(cox.univariate)
+export(get.ggplot)
+export(get.ggtext)
 export(kaplan.meier)
 export(kaplan.meier.plot.gg)
 export(kaplan.meier.stats)
@@ -22,3 +28,4 @@ export(simple.sAUC)
 export(simple.sAUC2)
 export(simple.sAUCapprox)
 export(sumWithNA)
+importFrom(rlang,.data)

+ 192 - 0
R/hypoAfrica.R

@@ -0,0 +1,192 @@
+
+#' Add binomial confidence intervals to var, fraction estimates, evaluated over nVar participants
+#'
+#' @param df a data frame that contains var and nVar columns
+#' @param var variable to calculate CI for, should be a fraction 0..1
+#' @param nVar variable that contains number of participants
+#'
+#' @return data frame with added var_low and var_high columns containing low and high CI95 values
+#'
+#' @export
+
+add.conf.int<-function(df,var,nVar='total'){
+   v<-df[,var]
+   n<-df[,nVar]
+   y1<-base::rep(0, base::length(v))
+   y2<-base::rep(0, base::length(v))
+   for (i in 1:base::length(v)){
+      z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],0.5,"two.sided",0.95)
+      y1[i]<-z$conf.int[1]
+      y2[i]<-z$conf.int[2]
+   }
+   
+   df[,base::sprintf('%s_low',var)]=y1
+   df[,base::sprintf('%s_high',var)]=y2
+   df
+}
+
+#' Add values from reference (for statistical comparison)
+#'
+#' @param df data frame with a timeVar, target variable var and count variable nVar
+#' @param dfRef data frame of same shape as df, considered reference for experiment
+#' @param var a variable to set reference values for
+#' @param timeVar variable containg ids of time points, where data will be merged
+#' @param nVar variable containing counts
+#'
+#' @return data frame df with new coluns var_ref, var_nRef
+#'
+#' @export
+
+add.Ref<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
+   pRef<-base::sprintf('%s_ref',var)
+   df[,pRef]<-base::rep(0, base::nrow(df))
+   df[,'nRef']<-base::rep(0, base::nrow(df))
+   for (i in 1:base::nrow(df)){
+      df[i,pRef]<-dfRef[dfRef[,timeVar]==df[i,timeVar],var]
+      df[i,'nRef']<-dfRef[dfRef[,timeVar]==df[i,timeVar],nVar]
+   }
+   df
+}
+
+#' Add statistical significance to df/dfRef difference using binomial distribution
+#'
+#' @param df data frame with target variable var, timeVar and count variable nVar
+#' @param dfRef a reference data frame of the same shape as df
+#' @param var the target variable
+#' @param timeVar variable indicating comparable visits 
+#' @param nVar count variable
+#'
+#' @return data frame with var_p column of calculated signficance of difference (binom.test) and boolean var_s column notifying statistical significance at 0.05 level
+#'
+#' @export
+
+add.p.values<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
+   df<-add.Ref(df,dfRef,var,timeVar,nVar)
+   pName<-base::sprintf('%s_ref',var)
+   pRef<-df[,pName]
+   v<-df[,var]
+   n<-df[,nVar]
+   p<-base::rep(0, base::nrow(df))
+   for (i in 1:base::nrow(df)){
+      z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],pRef[i],"two.sided",0.95)
+      p[i]<-z$p.value
+   }
+   df[,base::sprintf('%s_p',var)]<-p
+   df[,base::sprintf('%s_s',var)]<-df[,base::sprintf('%s_p',var)]<0.05
+   df
+}
+
+#' Add statistical significance to df/dfRef difference using Fisher exact method
+#'
+#' @param df data frame with target variable var, timeVar and count variable nVar
+#' @param dfRef a reference data frame of the same shape as df
+#' @param var the target variable
+#' @param timeVar variable indicating comparable visits 
+#' @param nVar count variable
+#'
+#' @return data frame with var_pF column of calculated signficance of difference (fisher.test) and var_sF, coded (+<0.05,*<0.01,**<0.001) statistical significance 
+#'
+#' @export
+
+add.p.values.fisher<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
+   
+   df<-add.Ref(df,dfRef,var,timeVar,nVar)
+   pRef<-df[,base::sprintf('%s_ref',var)]
+   nRef<-df[,'nRef']
+   v<-df[,var]
+   n<-df[,'total']
+   p<-base::rep(0, base::nrow(df))
+   for (i in 1:base::nrow(df)){
+   	  dat <- base::data.frame(tox = base::c(pRef[i]*nRef[i],v[i]*n[i]),
+  	  	noTox = base::c((1-pRef[i])*nRef[i], (1-v[i])*n[i]),
+  	  	row.names = base::c("ref", "hypoAfrica"),
+	  	stringsAsFactors = FALSE)
+      z<-stats::fisher.test(dat)
+      p[i]<-z$p.value
+   }
+   df[,base::sprintf('%s_pF',var)]<-p
+   df[,base::sprintf('%s_sF',var)]<-''
+   df[df[,base::sprintf('%s_pF',var)]<0.05,base::sprintf('%s_sF',var)]<-'(+)'
+   df[df[,base::sprintf('%s_pF',var)]<0.01,base::sprintf('%s_sF',var)]<-'(*)'
+   df[df[,base::sprintf('%s_pF',var)]<0.001,base::sprintf('%s_sF',var)]<-'(**)'
+   df
+}
+
+#' Plot comparison of measured and reference fractions
+#'
+#' @param df data frame of measured fractions
+#' @param dfRef data frame of reference fractions
+#' @param var target fraction variable
+#' @param timeVar variable measuring time
+#' @param doseVar variable for selecting datasets
+#' @param doseBreaks values to break plots to
+#' @param doseLabels labels for legend for breaks
+#' @param doseColors colors to use for different plots
+#' @param timeBreaks where time points are assigned
+#' @param timeLabels labels for time points
+#'
+#' @return grob object with ggplot
+#'
+#' @export 
+#'
+#' @importFrom rlang .data
+#'
+get.ggplot<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',
+   doseBreaks=base::c(60,62,100),doseLabels=base::c('60 Gy','62 Gy','all'),doseColors=base::c('red','magenta','blue'),
+   timeBreaks=base::c(0,1,3,4),timeLabels=base::c('beforeRT','at20','at3mo','at12mo')){
+   df<-add.conf.int(df,var)
+   dfRef<-add.conf.int(dfRef,var)
+
+   ghigh<-base::sprintf("%s_high",var)
+   glow<-base::sprintf("%s_low",var)
+   xV<-.data[[timeVar]]
+   ggplot2::ggplot()+
+	ggplot2::geom_ribbon(ggplot2::aes(x=xV,ymin=.data[[glow]],ymax=.data[[ghigh]]),dfRef,fill='grey70')+
+	ggplot2::geom_ribbon(ggplot2::aes(x=xV,ymin=.data[[glow]],ymax=.data[[ghigh]],fill=base::as.factor(.data[[doseVar]])),df,alpha=0.2)+
+	ggplot2::geom_line(ggplot2::aes(x=xV,y=.data[[var]]),dfRef)+
+	ggplot2::geom_line(ggplot2::aes(x=xV,y=.data[[var]],color=base::as.factor(.data[[doseVar]])),df)+
+	ggplot2::scale_color_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+
+	ggplot2::scale_fill_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+
+	ggplot2::theme(legend.position="bottom")+
+   ggplot2::xlab('Evaluation visit')+
+   ggplot2::scale_x_continuous(breaks=timeBreaks,limits=c(0,4),labels=timeLabels)+
+   ggplot2::ylab('Portion with toxicity')
+   
+   
+}
+
+#' Get table of values (as ggplot graphic object)
+#'
+#' @param df data frame with reference data
+#' @param dfRef reference data frame 
+#' @param var target variable to be shown
+#' @param timeVar variable measuring time
+#' @param doseVar variable for selecting datasets
+#' @param doseBreaks which doses are in df
+#' @param doseLabels how to label doses in legend
+#' @param timeBreaks where time points are assigned
+#'
+#' @return grobj with values laid out as text
+#'
+#' @export
+
+get.ggtext<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',
+   doseBreaks=c(59.9,60,62,100),doseLabels=c('ref','60 Gy','62 Gy','all'),
+   timeBreaks=c(0,1,3,4)){
+
+   df<-add.p.values.fisher(df,dfRef,var)
+   sf<-base::sprintf('%s_sF',var)
+   dfRef[,sf]<-base::rep('',nrow(dfRef))
+
+   colSel<-base::c('visitid',var,sf,'dose')
+   df1<-base::rbind(df[,colSel],dfRef[,colSel])
+	ggplot2::ggplot()+
+	ggplot2::geom_text(ggplot2::aes(x=.data[[timeVar]],y=base::as.factor(.data[[doseVar]]),
+      label=base::paste0(base::round(.data[[var]], digits = 2),.data[[sf]]),hjust=0.5, vjust=0.5),size=5,df1)+
+	ggplot2::scale_x_continuous(breaks=timeBreaks,limits=base::c(0,4))+
+	ggplot2::scale_y_discrete(breaks=doseBreaks,labels=doseLabels)+
+	ggplot2::theme_classic(base_size=20) +
+	ggplot2::theme(axis.line = ggplot2::element_blank(),axis.ticks  = ggplot2::element_blank(),axis.title.y  = ggplot2::element_blank(), axis.title.x  = ggplot2::element_blank(),axis.text.x = ggplot2::element_text(color="white"))+
+	ggplot2::theme(plot.title = ggplot2::element_text(size=11, hjust =-0.06))+
+	ggplot2::theme(panel.background = ggplot2::element_rect(fill = "white"))
+}

+ 25 - 0
man/add.Ref.Rd

@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hypoAfrica.R
+\name{add.Ref}
+\alias{add.Ref}
+\title{Add values from reference (for statistical comparison)}
+\usage{
+add.Ref(df, dfRef, var, timeVar = "visitid", nVar = "total")
+}
+\arguments{
+\item{df}{data frame with a timeVar, target variable var and count variable nVar}
+
+\item{dfRef}{data frame of same shape as df, considered reference for experiment}
+
+\item{var}{a variable to set reference values for}
+
+\item{timeVar}{variable containg ids of time points, where data will be merged}
+
+\item{nVar}{variable containing counts}
+}
+\value{
+data frame df with new coluns var_ref, var_nRef
+}
+\description{
+Add values from reference (for statistical comparison)
+}

+ 21 - 0
man/add.conf.int.Rd

@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hypoAfrica.R
+\name{add.conf.int}
+\alias{add.conf.int}
+\title{Add binomial confidence intervals to var, fraction estimates, evaluated over nVar participants}
+\usage{
+add.conf.int(df, var, nVar = "total")
+}
+\arguments{
+\item{df}{a data frame that contains var and nVar columns}
+
+\item{var}{variable to calculate CI for, should be a fraction 0..1}
+
+\item{nVar}{variable that contains number of participants}
+}
+\value{
+data frame with added var_low and var_high columns containing low and high CI95 values
+}
+\description{
+Add binomial confidence intervals to var, fraction estimates, evaluated over nVar participants
+}

+ 25 - 0
man/add.p.values.Rd

@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hypoAfrica.R
+\name{add.p.values}
+\alias{add.p.values}
+\title{Add statistical significance to df/dfRef difference using binomial distribution}
+\usage{
+add.p.values(df, dfRef, var, timeVar = "visitid", nVar = "total")
+}
+\arguments{
+\item{df}{data frame with target variable var, timeVar and count variable nVar}
+
+\item{dfRef}{a reference data frame of the same shape as df}
+
+\item{var}{the target variable}
+
+\item{timeVar}{variable indicating comparable visits}
+
+\item{nVar}{count variable}
+}
+\value{
+data frame with var_p column of calculated signficance of difference (binom.test) and boolean var_s column notifying statistical significance at 0.05 level
+}
+\description{
+Add statistical significance to df/dfRef difference using binomial distribution
+}

+ 25 - 0
man/add.p.values.fisher.Rd

@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hypoAfrica.R
+\name{add.p.values.fisher}
+\alias{add.p.values.fisher}
+\title{Add statistical significance to df/dfRef difference using Fisher exact method}
+\usage{
+add.p.values.fisher(df, dfRef, var, timeVar = "visitid", nVar = "total")
+}
+\arguments{
+\item{df}{data frame with target variable var, timeVar and count variable nVar}
+
+\item{dfRef}{a reference data frame of the same shape as df}
+
+\item{var}{the target variable}
+
+\item{timeVar}{variable indicating comparable visits}
+
+\item{nVar}{count variable}
+}
+\value{
+data frame with var_pF column of calculated signficance of difference (fisher.test) and var_sF, coded (+<0.05,*<0.01,**<0.001) statistical significance
+}
+\description{
+Add statistical significance to df/dfRef difference using Fisher exact method
+}

+ 46 - 0
man/get.ggplot.Rd

@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hypoAfrica.R
+\name{get.ggplot}
+\alias{get.ggplot}
+\title{Plot comparison of measured and reference fractions}
+\usage{
+get.ggplot(
+  df,
+  dfRef,
+  var,
+  timeVar = "visitid",
+  doseVar = "dose",
+  doseBreaks = base::c(60, 62, 100),
+  doseLabels = base::c("60 Gy", "62 Gy", "all"),
+  doseColors = base::c("red", "magenta", "blue"),
+  timeBreaks = base::c(0, 1, 3, 4),
+  timeLabels = base::c("beforeRT", "at20", "at3mo", "at12mo")
+)
+}
+\arguments{
+\item{df}{data frame of measured fractions}
+
+\item{dfRef}{data frame of reference fractions}
+
+\item{var}{target fraction variable}
+
+\item{timeVar}{variable measuring time}
+
+\item{doseVar}{variable for selecting datasets}
+
+\item{doseBreaks}{values to break plots to}
+
+\item{doseLabels}{labels for legend for breaks}
+
+\item{doseColors}{colors to use for different plots}
+
+\item{timeBreaks}{where time points are assigned}
+
+\item{timeLabels}{labels for time points}
+}
+\value{
+grob object with ggplot
+}
+\description{
+Plot comparison of measured and reference fractions
+}

+ 40 - 0
man/get.ggtext.Rd

@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hypoAfrica.R
+\name{get.ggtext}
+\alias{get.ggtext}
+\title{Get table of values (as ggplot graphic object)}
+\usage{
+get.ggtext(
+  df,
+  dfRef,
+  var,
+  timeVar = "visitid",
+  doseVar = "dose",
+  doseBreaks = c(59.9, 60, 62, 100),
+  doseLabels = c("ref", "60 Gy", "62 Gy", "all"),
+  timeBreaks = c(0, 1, 3, 4)
+)
+}
+\arguments{
+\item{df}{data frame with reference data}
+
+\item{dfRef}{reference data frame}
+
+\item{var}{target variable to be shown}
+
+\item{timeVar}{variable measuring time}
+
+\item{doseVar}{variable for selecting datasets}
+
+\item{doseBreaks}{which doses are in df}
+
+\item{doseLabels}{how to label doses in legend}
+
+\item{timeBreaks}{where time points are assigned}
+}
+\value{
+grobj with values laid out as text
+}
+\description{
+Get table of values (as ggplot graphic object)
+}