modifyData.R 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #' Sum columns in data frame df where some of the entries might be NA
  2. #'
  3. #' @param df data frame
  4. #' @param var1 first column
  5. #' @param var2 second column
  6. #' @param outVar which variable to store sum into
  7. #' @param valIfNA which value to use for NA
  8. #'
  9. #' @return updated data frame
  10. #' @export
  11. #'
  12. ## examples df<-sumWithNA(df,'lesionmtv41','metastasesmtv41','totalmtv41',0)
  13. sumWithNA<-function(df,var1='lesionmtv41',var2='metastasesmtv41',outVar='totalmtv41',valIfNA=0){
  14. v1=df[,var1]
  15. v2=df[,var2]
  16. v1[base::is.na(v1)]=valIfNA
  17. v2[base::is.na(v2)]=valIfNA
  18. df[,outVar]=v1+v2
  19. df
  20. }
  21. #' Map NA to value
  22. #'
  23. #' @param df data frame
  24. #' @param var variable to modify
  25. #' @param valIfNA change from NA to this value
  26. #'
  27. #' @return updated data frame
  28. #'
  29. #' @export
  30. mapNA<-function(df,var,valIfNA=0){
  31. df[base::is.na(df[,var]),var]=valIfNA
  32. df
  33. }
  34. #' Remove entries with missing variables for selected variables
  35. #'
  36. #' @param x data frame
  37. #' @param vars vector of variable names as strings
  38. #'
  39. #' @return data frame with entries where vars were NA removed
  40. #'
  41. #' @export
  42. remove.NA<-function(x,vars){
  43. for (v in vars){
  44. x<-x[!base::is.na(x[,v]),]
  45. }
  46. x
  47. }
  48. #' Map variable time OS status to status at cutoff
  49. #'
  50. #' @param df data frame
  51. #' @param cutoff time instance where OS is evaluated, same units as timeVar
  52. #' @param timeVar name of column where times of status evaluation are recorded (from treatment start)
  53. #' @param osVar name of column with status at evaluation (1-alive/progress free, 2-censored, 3-dead/w/disease)
  54. #' @param targetVar name of target variable holding status at cutoff (0-dead, 1-alive, 2-censored)
  55. #'
  56. #' @return updated data frame with targetVar
  57. #' @export
  58. selectValid<-function(df,cutoff=2,timeVar='years_to_event',osVar='st_osMAP',targetVar='osAtCutoff'){
  59. #if alive and ytoevent>c -> alive
  60. #if censored and ytoe>c -> alive
  61. #if dod and ytovent>c -> alive
  62. df[,targetVar]=1
  63. #if alive and ytoe<c -> censored
  64. df[(df[,timeVar]<cutoff) & df[,osVar] == 1,targetVar]=2
  65. #if censored and ytoe<c -> censored
  66. df[df[,timeVar]<cutoff & df[,osVar] == 2,targetVar]=2
  67. #if dod and ytovent<c -> dead
  68. df[df[,timeVar]<cutoff & df[,osVar] == 3,targetVar]=0
  69. df
  70. }
  71. #' Remap/change value of variable according to a lookup table
  72. #'
  73. #' @param df data frame
  74. #' @param origVar name of original variable
  75. #' @param mapVar name of variable with updated values
  76. #' @param valueMap mapping of variables from old to new, named list
  77. #' where names() are values to be found in data frame and values() contain new values
  78. #' @param num ensure output value is numeric
  79. #'
  80. #' @return data frame with updated column
  81. #' @export
  82. remapVariable <- function(df, origVar, mapVar, valueMap, num=TRUE) {
  83. if (!origVar %in% colnames(df)) {
  84. base::stop(base::sprintf("Variable '%s' not found in data frame", origVar))
  85. }
  86. if (num){
  87. # Ensures output type consistency
  88. df[[mapVar]] <- base::vapply(base::as.character(df[[origVar]]), getNewValue, map=valueMap,FUN.VALUE = numeric(1))
  89. }
  90. else
  91. df[[mapVar]] <- base::sapply(df[[origVar]], getNewValue, map=valueMap)
  92. df
  93. }
  94. getNewValue<-function(x,map){
  95. if (x %in% base::names(map))
  96. map[[x]]
  97. else
  98. NA
  99. }
  100. #'Set event time
  101. #'
  102. #' @param df data frame
  103. #' @param duration name of the output column
  104. #' @param eventColumn dates of evaluation visits (as.Date)
  105. #' @param startColumn dates of treatment start or diagnosis (as.Date)
  106. #'
  107. #' @return updated data frame
  108. #'
  109. #' @export
  110. setEventTime<-function(df,duration='years_to_event',eventColumn='d_os',startColumn='rtstartdate1'){
  111. t0=df[,startColumn]
  112. t1=df[,eventColumn]
  113. df[,duration] <-base::as.numeric(base::difftime(t1, t0, units = "days")) / 365.25
  114. df
  115. }
  116. #' Convert string variable to date using prescribed format
  117. #'
  118. #' @param x data frame
  119. #' @param var variable to be converted
  120. #' @param format format of data variable as set by as.Date R function
  121. #'
  122. #' @return data frame with selected variable set as date
  123. #'
  124. #' @export
  125. convert.to.date<-function(x,var,format="%Y-%m-%d"){
  126. x[,var]<-base::as.Date(x[,var],format=format)
  127. x
  128. }