modifyData.R 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #' Sum columns in data frame df where some of the entries might be NA
  2. #'
  3. #' @param df data frame
  4. #' @param var1 first column
  5. #' @param var2 second column
  6. #' @param outVar which variable to store sum into
  7. #' @param valIfNA which value to use for NA
  8. #'
  9. #' @return updated data frame
  10. #' @export
  11. #'
  12. ## examples df<-sumWithNA(df,'lesionmtv41','metastasesmtv41','totalmtv41',0)
  13. sumWithNA<-function(df,var1='lesionmtv41',var2='metastasesmtv41',outVar='totalmtv41',valIfNA=0){
  14. v1=df[,var1]
  15. v2=df[,var2]
  16. v1[is.na(v1)]=valIfNA
  17. v2[is.na(v2)]=valIfNA
  18. df[,outVar]=v1+v2
  19. df
  20. }
  21. #' Map variable time OS status to status at cutoff
  22. #'
  23. #' @param df data frame
  24. #' @param cutoff time instance where OS is evaluated, same units as timeVar
  25. #' @param timeVar name of column where times of status evaluation are recorded (from treatment start)
  26. #' @param osVar name of column with status at evaluation (1-alive/progress free, 2-censored, 3-dead/w/disease)
  27. #' @param targetVar name of target variable holding status at cutoff (0-dead, 1-alive, 2-censored)
  28. #'
  29. #' @return updated data frame with targetVar
  30. #' @export
  31. selectValid<-function(df,cutoff=2,timeVar='years_to_event',osVar='st_osMAP',targetVar='osAtCutoff'){
  32. #if alive and ytoevent>c -> alive
  33. #if censored and ytoe>c -> alive
  34. #if dod and ytovent>c -> alive
  35. df[,targetVar]=1
  36. #if alive and ytoe<c -> censored
  37. df[(df[,timeVar]<cutoff) & df[,osVar] == 1,targetVar]=2
  38. #if censored and ytoe<c -> censored
  39. df[df[,timeVar]<cutoff & df[,osVar] == 2,targetVar]=2
  40. #if dod and ytovent<c -> dead
  41. df[df[,timeVar]<cutoff & df[,osVar] == 3,targetVar]=0
  42. df
  43. }
  44. #' Remap/change value of variable according to a lookup table
  45. #'
  46. #' @param df data frame
  47. #' @param origVar name of original variable
  48. #' @param mapVar name of variable with updated values
  49. #' @param valueMap mapping of variables from old to new, named list
  50. #' where names() are values to be found in data frame and values() contain new values
  51. #' @param num ensure output value is numeric
  52. #'
  53. #' @return data frame with updated column
  54. #' @export
  55. remapVariable <- function(df, origVar, mapVar, valueMap, num=TRUE) {
  56. if (!origVar %in% colnames(df)) {
  57. base::stop(base::sprintf("Variable '%s' not found in data frame", origVar))
  58. }
  59. if (num){
  60. # Ensures output type consistency
  61. df[[mapVar]] <- base::vapply(base::as.character(df[[origVar]]), getNewValue, map=valueMap,FUN.VALUE = numeric(1))
  62. }
  63. else
  64. df[[mapVar]] <- base::sapply(df[[origVar]], getNewValue, map=valueMap)
  65. df
  66. }
  67. getNewValue<-function(x,map){
  68. if (x %in% base::names(map))
  69. map[[x]]
  70. else
  71. NA
  72. }