stats.R 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #' Construct a string to report mean and SD
  2. #' @param v a vector of values
  3. #'
  4. #' @return a combined string
  5. #'
  6. #' @export
  7. get.mean<-function(v){
  8. base::sprintf('%.0f (%.0f)',base::mean(v,na.rm=TRUE),stats::sd(v,na.rm=TRUE))
  9. }
  10. #' Construct a string to report median and range
  11. #'
  12. #' @param v a vector of values
  13. #'
  14. #' @return a combined string
  15. #'
  16. #' @export
  17. get.median<-function(v){
  18. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),base::min(v,na.rm=TRUE),base::max(v,na.rm=TRUE))
  19. }
  20. #' Construct a string to report median and quartiles
  21. #'
  22. #' @param v a vector of values
  23. #'
  24. #' @return a combined string
  25. #'
  26. #' @export
  27. get.medianQ<-function(v){
  28. q=base::c(0.25,0.75)
  29. qv=stats::quantile(v,probs=q,na.rm=TRUE)
  30. base::print(qv[1])
  31. base::sprintf('%.0f (%.0f-%.0f)',stats::median(v,na.rm=TRUE),qv[1],qv[2])
  32. }
  33. #' Construct a string to report count and portion that match comma separated string list
  34. #'
  35. #' @param v a vector of values
  36. #' @param val comma separated list of values
  37. #'
  38. #' @return a combined string
  39. #'
  40. #' @export
  41. get.portion<-function(v,val){
  42. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  43. n1=base::length(v[base::is.element(v,vals)])
  44. base::sprintf('%.0f (%.0f %%)',n1,100*n1/base::length(v))
  45. }
  46. #' Report count of elements in v that match comma separated string list
  47. #'
  48. #' @param v a vector of values
  49. #' @param val comma separated list of values
  50. #'
  51. #' @return a combined string
  52. #'
  53. #' @export
  54. get.events<-function(v,val){
  55. vals<-base::as.integer(base::strsplit(base::as.character(val),',')[[1]])
  56. base::length(v[base::is.element(v,vals)])
  57. }
  58. #' Repeat function and merge output in a vector
  59. #'
  60. #' @param func function to perform, with elements v and a key k
  61. #' @param v parameter of the func
  62. #' @param keys a set of k values for the func
  63. #'
  64. #' @return a combined output
  65. #'
  66. #' @export
  67. get.series<-function(func,v,keys=c()){
  68. out=base::c()
  69. for (k in keys){
  70. out=base::c(out,func(v,k))
  71. }
  72. out
  73. }
  74. #' Construct a contingency table
  75. #'
  76. #' @param df data frame with entries selected by an outcome variable
  77. #' @param df1 complementary data frame to df
  78. #' @param var variable to test dependency for
  79. #' @param keys possible values of var
  80. #'
  81. #' @return contingency table as a data frame, rows are keys, and columns are v for df and v1 for df1
  82. #'
  83. #' @export
  84. get.contingency<-function(df,df1,var,keys){
  85. v<-base::c()
  86. v1<-base::c()
  87. #for (k in names(z)){
  88. #keys=z[[k]][,'Key']
  89. v<-base::c(v,get.series(get.events,df[,var],keys))
  90. v1<-base::c(v1,get.series(get.events,df1[,var],keys))
  91. if (base::length(keys)==1){
  92. v<-base::c(v,base::nrow(df)-v[1])
  93. v1<-base::c(v1,base::nrow(df1)-v1[1])
  94. }
  95. base::data.frame(v=v,v1=v1)
  96. }
  97. #' Determine statistical significance of df/df1 splitting for variable using chi-square test
  98. #'
  99. #' @param df data frame with entries selected by an outcome variable
  100. #' @param df1 complementary data frame to df
  101. #' @param var variable to test dependency for
  102. #' @param keys possible values of var
  103. #'
  104. #' @return chisq.test output
  105. #'
  106. #' @export
  107. get.chisq<-function(df,df1,var,keys){
  108. cf<-get.contingency(df,df1,var,keys)
  109. stats::chisq.test(cf,simulate.p.value=TRUE)$p.value
  110. }
  111. #' Determine statistical significance of df/df1 splitting for variable using FIsher's exact test
  112. #'
  113. #' @param df data frame with entries selected by an outcome variable
  114. #' @param df1 complementary data frame to df
  115. #' @param var variable to test dependency for
  116. #' @param keys possible values of var
  117. #'
  118. #' @return fisher.test output
  119. #'
  120. #' @export
  121. get.fisher<-function(df,df1,var,keys){
  122. cf<-get.contingency(df,df1,var,keys)
  123. stats::fisher.test(cf,simulate.p.value=TRUE)$p.value
  124. }
  125. #' Determine statistical significance of df/df1 splitting for a continous variable (MWU/Wilcox)
  126. #'
  127. #' @param df data frame with entries selected by an outcome variable
  128. #' @param df1 complementary data frame to df
  129. #' @param var variable to test dependency for
  130. #'
  131. #' @return wilcox.test output
  132. #'
  133. #' @export
  134. get.u<-function(df,df1,var){
  135. stats::wilcox.test(df[,var],df1[,var])$p.value
  136. }
  137. #' Determine statistical significance of df/df1 splitting for a continous variable (T-test)
  138. #'
  139. #' @param df data frame with entries selected by an outcome variable
  140. #' @param df1 complementary data frame to df
  141. #' @param var variable to test dependency for
  142. #'
  143. #' @return wilcox.test output
  144. #'
  145. #' @export
  146. get.t<-function(df,df1,var){
  147. stats::t.test(df[,var],df1[,var])$p.value
  148. }