stats.R 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. #' Construct a string to report mean and SD
  2. #' @param v a vector of values
  3. #' @param nD number of decimals to print
  4. #'
  5. #' @return a combined string
  6. #'
  7. #' @export
  8. get.mean<-function(v,nD=0){
  9. x=base::sprintf('%.0f (%.0f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  10. if (nD==1){
  11. x=base::sprintf('%.1f (%.1f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  12. }
  13. if (nD==2){
  14. x=base::sprintf('%.2f (%.2f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  15. }
  16. x
  17. }
  18. #' Construct a string to report median and range
  19. #'
  20. #' @param v a vector of values
  21. #'
  22. #' @return a combined string
  23. #'
  24. #' @export
  25. get.median<-function(v){
  26. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  27. }
  28. #' Construct a string to report median and quartiles
  29. #'
  30. #' @param v a vector of values
  31. #'
  32. #' @return a combined string
  33. #'
  34. #' @export
  35. get.medianQ<-function(v){
  36. q=base::c(0.25,0.75)
  37. qv=stats::quantile(v,probs=q,na.rm=TRUE)
  38. base::print(qv[1])
  39. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),qv[1],qv[2])
  40. }
  41. #' Construct a string to report count and portion that match comma separated string list
  42. #'
  43. #' @param v a vector of values
  44. #' @param val comma separated list of values
  45. #'
  46. #' @return a combined string
  47. #'
  48. #' @export
  49. get.portion<-function(v,val){
  50. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  51. n1=base::length(v[base::is.element(v,vals)])
  52. base::sprintf('%.0f (%.0f %%)',n1,100*n1/base::length(v))
  53. }
  54. #' Report count of elements in v that match comma separated string list
  55. #'
  56. #' @param v a vector of values
  57. #' @param val comma separated list of values
  58. #'
  59. #' @return a combined string
  60. #'
  61. #' @export
  62. get.events<-function(v,val){
  63. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  64. base::length(v[base::is.element(v,vals)])
  65. }
  66. #' Repeat function and merge output in a vector
  67. #'
  68. #' @param func function to perform, with elements v and a key k
  69. #' @param v parameter of the func
  70. #' @param keys a set of k values for the func
  71. #'
  72. #' @return a combined output
  73. #'
  74. #' @export
  75. get.series<-function(func,v,keys=c()){
  76. out=base::c()
  77. for (k in keys){
  78. out=base::c(out,func(v,k))
  79. }
  80. out
  81. }
  82. #' Construct a contingency table
  83. #'
  84. #' @param df data frame with entries selected by an outcome variable
  85. #' @param df1 complementary data frame to df
  86. #' @param var variable to test dependency for
  87. #' @param keys possible values of var
  88. #'
  89. #' @return contingency table as a data frame, rows are keys, and columns are v for df and v1 for df1
  90. #'
  91. #' @export
  92. get.contingency<-function(df,df1,var,keys){
  93. v<-base::c()
  94. v1<-base::c()
  95. #for (k in names(z)){
  96. #keys=z[[k]][,'Key']
  97. v<-base::c(v,get.series(get.events,df[,var],keys))
  98. v1<-base::c(v1,get.series(get.events,df1[,var],keys))
  99. if (base::length(keys)==1){
  100. v<-base::c(v,base::nrow(df)-v[1])
  101. v1<-base::c(v1,base::nrow(df1)-v1[1])
  102. }
  103. base::data.frame(v=v,v1=v1)
  104. }
  105. #' Determine statistical significance of df/df1 splitting for variable using chi-square test
  106. #'
  107. #' @param df data frame with entries selected by an outcome variable
  108. #' @param df1 complementary data frame to df
  109. #' @param var variable to test dependency for
  110. #' @param keys possible values of var
  111. #'
  112. #' @return chisq.test output
  113. #'
  114. #' @export
  115. get.chisq<-function(df,df1,var,keys){
  116. cf<-get.contingency(df,df1,var,keys)
  117. stats::chisq.test(cf,simulate.p.value=TRUE)$p.value
  118. }
  119. #' Determine statistical significance of df/df1 splitting for variable using FIsher's exact test
  120. #'
  121. #' @param df data frame with entries selected by an outcome variable
  122. #' @param df1 complementary data frame to df
  123. #' @param var variable to test dependency for
  124. #' @param keys possible values of var
  125. #'
  126. #' @return fisher.test output
  127. #'
  128. #' @export
  129. get.fisher<-function(df,df1,var,keys){
  130. cf<-get.contingency(df,df1,var,keys)
  131. stats::fisher.test(cf,simulate.p.value=TRUE)$p.value
  132. }
  133. #' Determine statistical significance of df/df1 splitting for a continous variable (MWU/Wilcox)
  134. #'
  135. #' @param df data frame with entries selected by an outcome variable
  136. #' @param df1 complementary data frame to df
  137. #' @param var variable to test dependency for
  138. #'
  139. #' @return wilcox.test output
  140. #'
  141. #' @export
  142. get.u<-function(df,df1,var){
  143. stats::wilcox.test(df[,var],df1[,var])$p.value
  144. }
  145. #' Determine statistical significance of df/df1 splitting for a continous variable (T-test)
  146. #'
  147. #' @param df data frame with entries selected by an outcome variable
  148. #' @param df1 complementary data frame to df
  149. #' @param var variable to test dependency for
  150. #'
  151. #' @return wilcox.test output
  152. #'
  153. #' @export
  154. get.t<-function(df,df1,var){
  155. stats::t.test(df[,var],df1[,var])$p.value
  156. }