stats.R 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #' Construct a string to report mean and SD
  2. #' @param v a vector of values
  3. #' @param nD number of decimals to print
  4. #'
  5. #' @return a combined string
  6. #'
  7. #' @export
  8. get.mean<-function(v,nD=0){
  9. x=base::sprintf('%.0f (%.0f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  10. if (nD==1){
  11. x=base::sprintf('%.1f (%.1f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  12. }
  13. if (nD==2){
  14. x=base::sprintf('%.2f (%.2f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  15. }
  16. x
  17. }
  18. #' Construct a string to report median and range
  19. #'
  20. #' @param v a vector of values
  21. #' @param nD number of decimals to print
  22. #'
  23. #' @return a combined string
  24. #'
  25. #' @export
  26. get.median<-function(v,nD=0){
  27. x=base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  28. if (nD==1){
  29. x=base::sprintf('%.1f (%.1f-%.1f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  30. }
  31. if (nD==2){
  32. x=base::sprintf('%.2f (%.2f-%.2f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  33. }
  34. x
  35. }
  36. #' Construct a string to report median and quartiles
  37. #'
  38. #' @param v a vector of values
  39. #'
  40. #' @return a combined string
  41. #'
  42. #' @export
  43. get.medianQ<-function(v){
  44. q=base::c(0.25,0.75)
  45. qv=stats::quantile(v,probs=q,na.rm=TRUE)
  46. base::print(qv[1])
  47. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),qv[1],qv[2])
  48. }
  49. #' Construct a string to report count and portion that match comma separated string list
  50. #'
  51. #' @param v a vector of values
  52. #' @param val comma separated list of values
  53. #'
  54. #' @return a combined string
  55. #'
  56. #' @export
  57. get.portion<-function(v,val){
  58. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  59. n1=base::length(v[base::is.element(v,vals)])
  60. base::sprintf('%.0f (%.0f %%)',n1,100*n1/base::length(v))
  61. }
  62. #' Report count of elements in v that match comma separated string list
  63. #'
  64. #' @param v a vector of values
  65. #' @param val comma separated list of values
  66. #'
  67. #' @return a combined string
  68. #'
  69. #' @export
  70. get.events<-function(v,val){
  71. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  72. base::length(v[base::is.element(v,vals)])
  73. }
  74. #' Repeat function and merge output in a vector
  75. #'
  76. #' @param func function to perform, with elements v and a key k
  77. #' @param v parameter of the func
  78. #' @param keys a set of k values for the func
  79. #'
  80. #' @return a combined output
  81. #'
  82. #' @export
  83. get.series<-function(func,v,keys=c()){
  84. out=base::c()
  85. for (k in keys){
  86. out=base::c(out,func(v,k))
  87. }
  88. out
  89. }
  90. #' Construct a contingency table
  91. #'
  92. #' @param df data frame with entries selected by an outcome variable
  93. #' @param df1 complementary data frame to df
  94. #' @param var variable to test dependency for
  95. #' @param keys possible values of var
  96. #'
  97. #' @return contingency table as a data frame, rows are keys, and columns are v for df and v1 for df1
  98. #'
  99. #' @export
  100. get.contingency<-function(df,df1,var,keys){
  101. v<-base::c()
  102. v1<-base::c()
  103. #for (k in names(z)){
  104. #keys=z[[k]][,'Key']
  105. v<-base::c(v,get.series(get.events,df[,var],keys))
  106. v1<-base::c(v1,get.series(get.events,df1[,var],keys))
  107. if (base::length(keys)==1){
  108. v<-base::c(v,base::nrow(df)-v[1])
  109. v1<-base::c(v1,base::nrow(df1)-v1[1])
  110. }
  111. base::data.frame(v=v,v1=v1)
  112. }
  113. #' Determine statistical significance of df/df1 splitting for variable using chi-square test
  114. #'
  115. #' @param df data frame with entries selected by an outcome variable
  116. #' @param df1 complementary data frame to df
  117. #' @param var variable to test dependency for
  118. #' @param keys possible values of var
  119. #'
  120. #' @return chisq.test output
  121. #'
  122. #' @export
  123. get.chisq<-function(df,df1,var,keys){
  124. cf<-get.contingency(df,df1,var,keys)
  125. stats::chisq.test(cf,simulate.p.value=TRUE)$p.value
  126. }
  127. #' Determine statistical significance of df/df1 splitting for variable using FIsher's exact test
  128. #'
  129. #' @param df data frame with entries selected by an outcome variable
  130. #' @param df1 complementary data frame to df
  131. #' @param var variable to test dependency for
  132. #' @param keys possible values of var
  133. #'
  134. #' @return fisher.test output
  135. #'
  136. #' @export
  137. get.fisher<-function(df,df1,var,keys){
  138. cf<-get.contingency(df,df1,var,keys)
  139. stats::fisher.test(cf,simulate.p.value=TRUE)$p.value
  140. }
  141. #' Determine statistical significance of df/df1 splitting for a continous variable (MWU/Wilcox)
  142. #'
  143. #' @param df data frame with entries selected by an outcome variable
  144. #' @param df1 complementary data frame to df
  145. #' @param var variable to test dependency for
  146. #'
  147. #' @return wilcox.test output
  148. #'
  149. #' @export
  150. get.u<-function(df,df1,var){
  151. stats::wilcox.test(df[,var],df1[,var])$p.value
  152. }
  153. #' Determine statistical significance of df/df1 splitting for a continous variable (T-test)
  154. #'
  155. #' @param df data frame with entries selected by an outcome variable
  156. #' @param df1 complementary data frame to df
  157. #' @param var variable to test dependency for
  158. #'
  159. #' @return wilcox.test output
  160. #'
  161. #' @export
  162. get.t<-function(df,df1,var){
  163. stats::t.test(df[,var],df1[,var])$p.value
  164. }