stats.R 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #' Construct a string to report mean and SD
  2. #' @param v a vector of values
  3. #' @param nD number of decimals to print
  4. #'
  5. #' @return a combined string
  6. #'
  7. #' @export
  8. get.mean<-function(v,nD=0){
  9. if (nD==2){
  10. x=base::sprintf('%.2f (%.2f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  11. } else {
  12. x=base::sprintf('%.0f (%.0f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  13. }
  14. x
  15. }
  16. #' Construct a string to report median and range
  17. #'
  18. #' @param v a vector of values
  19. #'
  20. #' @return a combined string
  21. #'
  22. #' @export
  23. get.median<-function(v){
  24. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  25. }
  26. #' Construct a string to report median and quartiles
  27. #'
  28. #' @param v a vector of values
  29. #'
  30. #' @return a combined string
  31. #'
  32. #' @export
  33. get.medianQ<-function(v){
  34. q=base::c(0.25,0.75)
  35. qv=stats::quantile(v,probs=q,na.rm=TRUE)
  36. base::print(qv[1])
  37. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),qv[1],qv[2])
  38. }
  39. #' Construct a string to report count and portion that match comma separated string list
  40. #'
  41. #' @param v a vector of values
  42. #' @param val comma separated list of values
  43. #'
  44. #' @return a combined string
  45. #'
  46. #' @export
  47. get.portion<-function(v,val){
  48. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  49. n1=base::length(v[base::is.element(v,vals)])
  50. base::sprintf('%.0f (%.0f %%)',n1,100*n1/base::length(v))
  51. }
  52. #' Report count of elements in v that match comma separated string list
  53. #'
  54. #' @param v a vector of values
  55. #' @param val comma separated list of values
  56. #'
  57. #' @return a combined string
  58. #'
  59. #' @export
  60. get.events<-function(v,val){
  61. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  62. base::length(v[base::is.element(v,vals)])
  63. }
  64. #' Repeat function and merge output in a vector
  65. #'
  66. #' @param func function to perform, with elements v and a key k
  67. #' @param v parameter of the func
  68. #' @param keys a set of k values for the func
  69. #'
  70. #' @return a combined output
  71. #'
  72. #' @export
  73. get.series<-function(func,v,keys=c()){
  74. out=base::c()
  75. for (k in keys){
  76. out=base::c(out,func(v,k))
  77. }
  78. out
  79. }
  80. #' Construct a contingency table
  81. #'
  82. #' @param df data frame with entries selected by an outcome variable
  83. #' @param df1 complementary data frame to df
  84. #' @param var variable to test dependency for
  85. #' @param keys possible values of var
  86. #'
  87. #' @return contingency table as a data frame, rows are keys, and columns are v for df and v1 for df1
  88. #'
  89. #' @export
  90. get.contingency<-function(df,df1,var,keys){
  91. v<-base::c()
  92. v1<-base::c()
  93. #for (k in names(z)){
  94. #keys=z[[k]][,'Key']
  95. v<-base::c(v,get.series(get.events,df[,var],keys))
  96. v1<-base::c(v1,get.series(get.events,df1[,var],keys))
  97. if (base::length(keys)==1){
  98. v<-base::c(v,base::nrow(df)-v[1])
  99. v1<-base::c(v1,base::nrow(df1)-v1[1])
  100. }
  101. base::data.frame(v=v,v1=v1)
  102. }
  103. #' Determine statistical significance of df/df1 splitting for variable using chi-square test
  104. #'
  105. #' @param df data frame with entries selected by an outcome variable
  106. #' @param df1 complementary data frame to df
  107. #' @param var variable to test dependency for
  108. #' @param keys possible values of var
  109. #'
  110. #' @return chisq.test output
  111. #'
  112. #' @export
  113. get.chisq<-function(df,df1,var,keys){
  114. cf<-get.contingency(df,df1,var,keys)
  115. stats::chisq.test(cf,simulate.p.value=TRUE)$p.value
  116. }
  117. #' Determine statistical significance of df/df1 splitting for variable using FIsher's exact test
  118. #'
  119. #' @param df data frame with entries selected by an outcome variable
  120. #' @param df1 complementary data frame to df
  121. #' @param var variable to test dependency for
  122. #' @param keys possible values of var
  123. #'
  124. #' @return fisher.test output
  125. #'
  126. #' @export
  127. get.fisher<-function(df,df1,var,keys){
  128. cf<-get.contingency(df,df1,var,keys)
  129. stats::fisher.test(cf,simulate.p.value=TRUE)$p.value
  130. }
  131. #' Determine statistical significance of df/df1 splitting for a continous variable (MWU/Wilcox)
  132. #'
  133. #' @param df data frame with entries selected by an outcome variable
  134. #' @param df1 complementary data frame to df
  135. #' @param var variable to test dependency for
  136. #'
  137. #' @return wilcox.test output
  138. #'
  139. #' @export
  140. get.u<-function(df,df1,var){
  141. stats::wilcox.test(df[,var],df1[,var])$p.value
  142. }
  143. #' Determine statistical significance of df/df1 splitting for a continous variable (T-test)
  144. #'
  145. #' @param df data frame with entries selected by an outcome variable
  146. #' @param df1 complementary data frame to df
  147. #' @param var variable to test dependency for
  148. #'
  149. #' @return wilcox.test output
  150. #'
  151. #' @export
  152. get.t<-function(df,df1,var){
  153. stats::t.test(df[,var],df1[,var])$p.value
  154. }