modifyData.R 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. #' Sum columns in data frame df where some of the entries might be NA
  2. #'
  3. #' @param df data frame
  4. #' @param var1 first column
  5. #' @param var2 second column
  6. #' @param outVar which variable to store sum into
  7. #' @param valIfNA which value to use for NA
  8. #'
  9. #' @return updated data frame
  10. #' @export
  11. #'
  12. ## examples df<-sumWithNA(df,'lesionmtv41','metastasesmtv41','totalmtv41',0)
  13. sumWithNA<-function(df,var1='lesionmtv41',var2='metastasesmtv41',outVar='totalmtv41',valIfNA=0){
  14. v1=df[,var1]
  15. v2=df[,var2]
  16. v1[is.na(v1)]=valIfNA
  17. v2[is.na(v2)]=valIfNA
  18. df[,outVar]=v1+v2
  19. df
  20. }
  21. #' Map NA to value
  22. #'
  23. #' @param df data frame
  24. #' @param var variable to modify
  25. #' @param valIfNA change from NA to this value
  26. #'
  27. #' @return updated data frame
  28. #'
  29. #' @export
  30. mapNA<-function(df,var,valIfNA=0){
  31. df[is.na(df[,var]),var]=valIfNA
  32. df
  33. }
  34. #' Map variable time OS status to status at cutoff
  35. #'
  36. #' @param df data frame
  37. #' @param cutoff time instance where OS is evaluated, same units as timeVar
  38. #' @param timeVar name of column where times of status evaluation are recorded (from treatment start)
  39. #' @param osVar name of column with status at evaluation (1-alive/progress free, 2-censored, 3-dead/w/disease)
  40. #' @param targetVar name of target variable holding status at cutoff (0-dead, 1-alive, 2-censored)
  41. #'
  42. #' @return updated data frame with targetVar
  43. #' @export
  44. selectValid<-function(df,cutoff=2,timeVar='years_to_event',osVar='st_osMAP',targetVar='osAtCutoff'){
  45. #if alive and ytoevent>c -> alive
  46. #if censored and ytoe>c -> alive
  47. #if dod and ytovent>c -> alive
  48. df[,targetVar]=1
  49. #if alive and ytoe<c -> censored
  50. df[(df[,timeVar]<cutoff) & df[,osVar] == 1,targetVar]=2
  51. #if censored and ytoe<c -> censored
  52. df[df[,timeVar]<cutoff & df[,osVar] == 2,targetVar]=2
  53. #if dod and ytovent<c -> dead
  54. df[df[,timeVar]<cutoff & df[,osVar] == 3,targetVar]=0
  55. df
  56. }
  57. #' Remap/change value of variable according to a lookup table
  58. #'
  59. #' @param df data frame
  60. #' @param origVar name of original variable
  61. #' @param mapVar name of variable with updated values
  62. #' @param valueMap mapping of variables from old to new, named list
  63. #' where names() are values to be found in data frame and values() contain new values
  64. #' @param num ensure output value is numeric
  65. #'
  66. #' @return data frame with updated column
  67. #' @export
  68. remapVariable <- function(df, origVar, mapVar, valueMap, num=TRUE) {
  69. if (!origVar %in% colnames(df)) {
  70. base::stop(base::sprintf("Variable '%s' not found in data frame", origVar))
  71. }
  72. if (num){
  73. # Ensures output type consistency
  74. df[[mapVar]] <- base::vapply(base::as.character(df[[origVar]]), getNewValue, map=valueMap,FUN.VALUE = numeric(1))
  75. }
  76. else
  77. df[[mapVar]] <- base::sapply(df[[origVar]], getNewValue, map=valueMap)
  78. df
  79. }
  80. getNewValue<-function(x,map){
  81. if (x %in% base::names(map))
  82. map[[x]]
  83. else
  84. NA
  85. }
  86. #'Set event time
  87. #'
  88. #' @param df data frame
  89. #' @param duration name of the output column
  90. #' @param eventColumn dates of evaluation visits (as.Date)
  91. #' @param startColumn dates of treatment start or diagnosis (as.Date)
  92. #'
  93. #' @return updated data frame
  94. #'
  95. #' @export
  96. setEventTime<-function(df,duration='years_to_event',eventColumn='d_os',startColumn='rtstartdate1'){
  97. t0=df[,startColumn]
  98. t1=df[,eventColumn]
  99. df[,duration] <-base::as.numeric(base::difftime(t1, t0, units = "days")) / 365.25
  100. df
  101. }