stats.R 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #' Construct a string to report mean and SD
  2. #' @param v a vector of values
  3. #' @param nD number of decimals to print
  4. #'
  5. #' @return a combined string
  6. #'
  7. #' @export
  8. get.mean<-function(v,nD=0){
  9. x=base::sprintf('%.0f (%.0f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  10. if (nD==1){
  11. x=base::sprintf('%.1f (%.1f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  12. }
  13. if (nD==2){
  14. x=base::sprintf('%.2f (%.2f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  15. }
  16. x
  17. }
  18. #' Construct a string to report median and range
  19. #'
  20. #' @param v a vector of values
  21. #' @param nD number of decimals to print
  22. #'
  23. #' @return a combined string
  24. #'
  25. #' @export
  26. get.median<-function(v,nD=0){
  27. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  28. if (nD==1){
  29. base::sprintf('%.1f (%.1f-%.1f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  30. }
  31. if (nD==2){
  32. base::sprintf('%.2f (%.2f-%.2f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  33. }
  34. }
  35. #' Construct a string to report median and quartiles
  36. #'
  37. #' @param v a vector of values
  38. #'
  39. #' @return a combined string
  40. #'
  41. #' @export
  42. get.medianQ<-function(v){
  43. q=base::c(0.25,0.75)
  44. qv=stats::quantile(v,probs=q,na.rm=TRUE)
  45. base::print(qv[1])
  46. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),qv[1],qv[2])
  47. }
  48. #' Construct a string to report count and portion that match comma separated string list
  49. #'
  50. #' @param v a vector of values
  51. #' @param val comma separated list of values
  52. #'
  53. #' @return a combined string
  54. #'
  55. #' @export
  56. get.portion<-function(v,val){
  57. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  58. n1=base::length(v[base::is.element(v,vals)])
  59. base::sprintf('%.0f (%.0f %%)',n1,100*n1/base::length(v))
  60. }
  61. #' Report count of elements in v that match comma separated string list
  62. #'
  63. #' @param v a vector of values
  64. #' @param val comma separated list of values
  65. #'
  66. #' @return a combined string
  67. #'
  68. #' @export
  69. get.events<-function(v,val){
  70. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  71. base::length(v[base::is.element(v,vals)])
  72. }
  73. #' Repeat function and merge output in a vector
  74. #'
  75. #' @param func function to perform, with elements v and a key k
  76. #' @param v parameter of the func
  77. #' @param keys a set of k values for the func
  78. #'
  79. #' @return a combined output
  80. #'
  81. #' @export
  82. get.series<-function(func,v,keys=c()){
  83. out=base::c()
  84. for (k in keys){
  85. out=base::c(out,func(v,k))
  86. }
  87. out
  88. }
  89. #' Construct a contingency table
  90. #'
  91. #' @param df data frame with entries selected by an outcome variable
  92. #' @param df1 complementary data frame to df
  93. #' @param var variable to test dependency for
  94. #' @param keys possible values of var
  95. #'
  96. #' @return contingency table as a data frame, rows are keys, and columns are v for df and v1 for df1
  97. #'
  98. #' @export
  99. get.contingency<-function(df,df1,var,keys){
  100. v<-base::c()
  101. v1<-base::c()
  102. #for (k in names(z)){
  103. #keys=z[[k]][,'Key']
  104. v<-base::c(v,get.series(get.events,df[,var],keys))
  105. v1<-base::c(v1,get.series(get.events,df1[,var],keys))
  106. if (base::length(keys)==1){
  107. v<-base::c(v,base::nrow(df)-v[1])
  108. v1<-base::c(v1,base::nrow(df1)-v1[1])
  109. }
  110. base::data.frame(v=v,v1=v1)
  111. }
  112. #' Determine statistical significance of df/df1 splitting for variable using chi-square test
  113. #'
  114. #' @param df data frame with entries selected by an outcome variable
  115. #' @param df1 complementary data frame to df
  116. #' @param var variable to test dependency for
  117. #' @param keys possible values of var
  118. #'
  119. #' @return chisq.test output
  120. #'
  121. #' @export
  122. get.chisq<-function(df,df1,var,keys){
  123. cf<-get.contingency(df,df1,var,keys)
  124. stats::chisq.test(cf,simulate.p.value=TRUE)$p.value
  125. }
  126. #' Determine statistical significance of df/df1 splitting for variable using FIsher's exact test
  127. #'
  128. #' @param df data frame with entries selected by an outcome variable
  129. #' @param df1 complementary data frame to df
  130. #' @param var variable to test dependency for
  131. #' @param keys possible values of var
  132. #'
  133. #' @return fisher.test output
  134. #'
  135. #' @export
  136. get.fisher<-function(df,df1,var,keys){
  137. cf<-get.contingency(df,df1,var,keys)
  138. stats::fisher.test(cf,simulate.p.value=TRUE)$p.value
  139. }
  140. #' Determine statistical significance of df/df1 splitting for a continous variable (MWU/Wilcox)
  141. #'
  142. #' @param df data frame with entries selected by an outcome variable
  143. #' @param df1 complementary data frame to df
  144. #' @param var variable to test dependency for
  145. #'
  146. #' @return wilcox.test output
  147. #'
  148. #' @export
  149. get.u<-function(df,df1,var){
  150. stats::wilcox.test(df[,var],df1[,var])$p.value
  151. }
  152. #' Determine statistical significance of df/df1 splitting for a continous variable (T-test)
  153. #'
  154. #' @param df data frame with entries selected by an outcome variable
  155. #' @param df1 complementary data frame to df
  156. #' @param var variable to test dependency for
  157. #'
  158. #' @return wilcox.test output
  159. #'
  160. #' @export
  161. get.t<-function(df,df1,var){
  162. stats::t.test(df[,var],df1[,var])$p.value
  163. }