hypoAfrica.R 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. #' Add binomial confidence intervals to var, fraction estimates, evaluated over nVar participants
  2. #'
  3. #' @param df a data frame that contains var and nVar columns
  4. #' @param var variable to calculate CI for, should be a fraction 0..1
  5. #' @param nVar variable that contains number of participants
  6. #'
  7. #' @return data frame with added var_low and var_high columns containing low and high CI95 values
  8. #'
  9. #' @export
  10. add.conf.int<-function(df,var,nVar='total'){
  11. v<-df[,var]
  12. n<-df[,nVar]
  13. y1<-base::rep(0, base::length(v))
  14. y2<-base::rep(0, base::length(v))
  15. for (i in 1:base::length(v)){
  16. z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],0.5,"two.sided",0.95)
  17. y1[i]<-z$conf.int[1]
  18. y2[i]<-z$conf.int[2]
  19. }
  20. df[,base::sprintf('%s_low',var)]=y1
  21. df[,base::sprintf('%s_high',var)]=y2
  22. df
  23. }
  24. #' Add values from reference (for statistical comparison)
  25. #'
  26. #' @param df data frame with a timeVar, target variable var and count variable nVar
  27. #' @param dfRef data frame of same shape as df, considered reference for experiment
  28. #' @param var a variable to set reference values for
  29. #' @param timeVar variable containg ids of time points, where data will be merged
  30. #' @param nVar variable containing counts
  31. #'
  32. #' @return data frame df with new coluns var_ref, var_nRef
  33. #'
  34. #' @export
  35. add.Ref<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
  36. pRef<-base::sprintf('%s_ref',var)
  37. df[,pRef]<-base::rep(0, base::nrow(df))
  38. df[,'nRef']<-base::rep(0, base::nrow(df))
  39. for (i in 1:base::nrow(df)){
  40. df[i,pRef]<-dfRef[dfRef[,timeVar]==df[i,timeVar],var]
  41. df[i,'nRef']<-dfRef[dfRef[,timeVar]==df[i,timeVar],nVar]
  42. }
  43. df
  44. }
  45. #' Add statistical significance to df/dfRef difference using binomial distribution
  46. #'
  47. #' @param df data frame with target variable var, timeVar and count variable nVar
  48. #' @param dfRef a reference data frame of the same shape as df
  49. #' @param var the target variable
  50. #' @param timeVar variable indicating comparable visits
  51. #' @param nVar count variable
  52. #'
  53. #' @return data frame with var_p column of calculated signficance of difference (binom.test) and boolean var_s column notifying statistical significance at 0.05 level
  54. #'
  55. #' @export
  56. add.p.values<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
  57. df<-add.Ref(df,dfRef,var,timeVar,nVar)
  58. pName<-base::sprintf('%s_ref',var)
  59. pRef<-df[,pName]
  60. v<-df[,var]
  61. n<-df[,nVar]
  62. p<-base::rep(0, base::nrow(df))
  63. for (i in 1:base::nrow(df)){
  64. z<-stats::binom.test(base::as.integer(v[i]*n[i]),n[i],pRef[i],"two.sided",0.95)
  65. p[i]<-z$p.value
  66. }
  67. df[,base::sprintf('%s_p',var)]<-p
  68. df[,base::sprintf('%s_s',var)]<-df[,base::sprintf('%s_p',var)]<0.05
  69. df
  70. }
  71. #' Add statistical significance to df/dfRef difference using Fisher exact method
  72. #'
  73. #' @param df data frame with target variable var, timeVar and count variable nVar
  74. #' @param dfRef a reference data frame of the same shape as df
  75. #' @param var the target variable
  76. #' @param timeVar variable indicating comparable visits
  77. #' @param nVar count variable
  78. #'
  79. #' @return data frame with var_pF column of calculated signficance of difference (fisher.test) and var_sF, coded (+<0.05,*<0.01,**<0.001) statistical significance
  80. #'
  81. #' @export
  82. add.p.values.fisher<-function(df,dfRef,var,timeVar='visitid',nVar='total'){
  83. df<-add.Ref(df,dfRef,var,timeVar,nVar)
  84. pRef<-df[,base::sprintf('%s_ref',var)]
  85. nRef<-df[,'nRef']
  86. v<-df[,var]
  87. n<-df[,nVar]
  88. p<-base::rep(0, base::nrow(df))
  89. for (i in 1:base::nrow(df)){
  90. dat <- base::data.frame(tox = base::c(pRef[i]*nRef[i],v[i]*n[i]),
  91. noTox = base::c((1-pRef[i])*nRef[i], (1-v[i])*n[i]),
  92. row.names = base::c("ref", "hypoAfrica"),
  93. stringsAsFactors = FALSE)
  94. z<-stats::fisher.test(dat)
  95. p[i]<-z$p.value
  96. }
  97. df[,base::sprintf('%s_pF',var)]<-p
  98. df[,base::sprintf('%s_sF',var)]<-''
  99. df[df[,base::sprintf('%s_pF',var)]<0.05,base::sprintf('%s_sF',var)]<-'(+)'
  100. df[df[,base::sprintf('%s_pF',var)]<0.01,base::sprintf('%s_sF',var)]<-'(*)'
  101. df[df[,base::sprintf('%s_pF',var)]<0.001,base::sprintf('%s_sF',var)]<-'(**)'
  102. df
  103. }
  104. #' Plot comparison of measured and reference fractions
  105. #'
  106. #' @param df data frame of measured fractions
  107. #' @param dfRef data frame of reference fractions
  108. #' @param var target fraction variable
  109. #' @param timeVar variable measuring time
  110. #' @param doseVar variable for selecting datasets
  111. #' @param nVar variable where counts are stored
  112. #' @param doseBreaks values to break plots to
  113. #' @param doseLabels labels for legend for breaks
  114. #' @param doseColors colors to use for different plots
  115. #' @param timeBreaks where time points are assigned
  116. #' @param timeLabels labels for time points
  117. #'
  118. #' @return grob object with ggplot
  119. #'
  120. #' @export
  121. #'
  122. #' @importFrom rlang .data
  123. #'
  124. get.ggplot<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',nVar='total',
  125. doseBreaks=base::c(60,62,100),doseLabels=base::c('60 Gy','62 Gy','all'),doseColors=base::c('red','magenta','blue'),
  126. timeBreaks=base::c(0,1,3,4),timeLabels=base::c('beforeRT','at20','at3mo','at12mo')){
  127. df<-add.conf.int(df,var,nVar)
  128. dfRef<-add.conf.int(dfRef,var,nVar)
  129. ghigh<-base::sprintf("%s_high",var)
  130. glow<-base::sprintf("%s_low",var)
  131. ggplot2::ggplot()+
  132. ggplot2::geom_ribbon(ggplot2::aes(x=.data[[timeVar]],ymin=.data[[glow]],ymax=.data[[ghigh]]),dfRef,fill='grey70')+
  133. ggplot2::geom_ribbon(ggplot2::aes(x=.data[[timeVar]],ymin=.data[[glow]],ymax=.data[[ghigh]],fill=base::as.factor(.data[[doseVar]])),df,alpha=0.2)+
  134. ggplot2::geom_line(ggplot2::aes(x=.data[[timeVar]],y=.data[[var]]),dfRef)+
  135. ggplot2::geom_line(ggplot2::aes(x=.data[[timeVar]],y=.data[[var]],color=base::as.factor(.data[[doseVar]])),df)+
  136. ggplot2::scale_color_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+
  137. ggplot2::scale_fill_manual(name = "Dose", breaks = doseBreaks, values = doseColors,labels=doseLabels)+
  138. ggplot2::theme(legend.position="bottom")+
  139. ggplot2::xlab('Evaluation visit')+
  140. ggplot2::scale_x_continuous(breaks=timeBreaks,limits=c(0,4),labels=timeLabels)+
  141. ggplot2::ylab('Portion with toxicity')
  142. }
  143. #' Get table of values (as ggplot graphic object)
  144. #'
  145. #' @param df data frame with reference data
  146. #' @param dfRef reference data frame
  147. #' @param var target variable to be shown
  148. #' @param timeVar variable measuring time
  149. #' @param doseVar variable for selecting datasets
  150. #' @param nVar variable where counts are stored
  151. #' @param doseBreaks which doses are in df
  152. #' @param doseLabels how to label doses in legend
  153. #' @param timeBreaks where time points are assigned
  154. #' @param dfAdd additional data frame not checked for statistics (NULL ignored)
  155. #'
  156. #' @return grobj with values laid out as text
  157. #'
  158. #' @export
  159. get.ggtext<-function(df,dfRef,var,timeVar='visitid',doseVar='dose',nVar='total',
  160. doseBreaks=c(59.9,60,62,100),doseLabels=c('ref','60 Gy','62 Gy','all'),
  161. timeBreaks=c(0,1,3,4),dfAdd=NULL){
  162. df<-add.p.values.fisher(df,dfRef,var,timeVar,nVar)
  163. sf<-base::sprintf('%s_sF',var)
  164. dfRef[,sf]<-base::rep('',nrow(dfRef))
  165. colSel<-base::c(timeVar,labelVar,sf,doseVar)
  166. df1<-base::rbind(df[,colSel],dfRef[,colSel])
  167. if (!is.null(dfAdd)){
  168. dfAdd[,sf]<-base::rep('',nrow(dfAdd))
  169. df1<-base::rbind(df1[,colSel],dfAdd[,colSel])
  170. }
  171. #set labels
  172. labelVar=sprintf('%sLabel',var)
  173. if (! labelVar %in% colnames(df1)){
  174. df1[,labelVar]=base::paste0(base::round(df1[, var], digits = 2))
  175. }
  176. ggplot2::ggplot()+
  177. ggplot2::geom_text(ggplot2::aes(x=.data[[timeVar]],y=base::as.factor(.data[[doseVar]]),
  178. label=base::paste0(.data[[labelVar]],.data[[sf]]),
  179. hjust=0.5, vjust=0.5),size=5,df1)+
  180. ggplot2::scale_x_continuous(breaks=timeBreaks,limits=base::c(0,4))+
  181. ggplot2::scale_y_discrete(breaks=doseBreaks,labels=doseLabels)+
  182. ggplot2::theme_classic(base_size=20) +
  183. ggplot2::theme(axis.line = ggplot2::element_blank(),axis.ticks = ggplot2::element_blank(),axis.title.y = ggplot2::element_blank(), axis.title.x = ggplot2::element_blank(),axis.text.x = ggplot2::element_text(color="white"))+
  184. ggplot2::theme(plot.title = ggplot2::element_text(size=11, hjust =-0.06))+
  185. ggplot2::theme(panel.background = ggplot2::element_rect(fill = "white"))
  186. }