| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 | #' Create a ROC curve#'#' @param labels vector of outcomes#' @param scores vector of variable values#' @param decreasing ordering of scores#'#' @return data frame with columns TPR, FPR, labels, scores, V #' V is combination of V0 (negative cases) and V1 (positive cases)#' V0 is portion of positive cases with value larger than score#' V1 is portion of negative cases with value below score#'#' @exportsimple.roc <- function(labels, scores, decreasing = TRUE) {  # Odstranimo NA vrednosti iz labels in scores  valid_data = stats::complete.cases(labels, scores)  labels = labels[valid_data]  scores = scores[valid_data]  labels <- labels[order(scores, decreasing=decreasing)]  scores <- scores[order(scores, decreasing=decreasing)]  labels = base::as.logical(labels)  x = base::data.frame(TPR = base::cumsum(labels) / base::sum(labels),                 FPR = base::cumsum(!labels) / base::sum(!labels),                 labels, scores)  s1 = scores[labels]  s0 = scores[!labels]  #V0 are 0 components of test, for each test-negative case find number of positive cases with value above it (ideally, count is equal to n1, number of positive cases)  V0 = base::sapply(s0, psi01, s=s1)  #V1 are 1 components of test; for each test-positive case find number of negative cases with score below its score (ideally, count is equal to n0, number of negative cases)  V1 = base::sapply(s1, psi10, s=s0)  n0=base::length(s0)  n1=base::length(s1)  #convert V0 to portions, to [0,1] range  V0=V0/n1  #convert V1 to portions, to [0,1] range  V1=V1/n0  V = labels  V[labels] <- V1  V[!labels] <- V0  x$V = V  x}psi01 <- function(y, s) {  v1 = base::sum(s > y, na.rm = TRUE)  v2 = base::sum(s == y, na.rm = TRUE)  v1 + 0.5 * v2}psi10 <- function(x, s) {  v1 = base::sum(s < x, na.rm = TRUE)  v2 = base::sum(s == x, na.rm = TRUE)  v1 + 0.5 * v2}#' Calculate AUC#' @param roc as returned by simple.roc#'#' @return AUC (numeric)#'#' @exportsimple.getAUC <- function(roc) {  V0 = roc[!roc$labels, 'V']  mean(V0)}#' Calculate metrics associated with ROC #'#' @param ROC roc as returned by simple.roc#'#' @return a list with elemnts: : FPR, TPR, threshold, FPR CI95:min , FPR CI95: max, TPR CI95:min, TPR CI95:max#' all evaluated at optimum point (Youden index)#'#' @exportsimple.compute_roc_metrics <- function(ROC) {#determine optimal point on ROC curve using Youden index#ROC is the output of simple.roc#output is a list : tpr, tpr CI95:min , tpr CI95: max, fpr, fpr CI95:min, fpr CI95:max, value of score,#all evaluated at optimum point,       n=base::sum(ROC$labels)   m=base::sum(!ROC$labels)   nt=base::length(ROC$TPR)   dist<-base::abs(ROC$TPR-ROC$FPR)   imax<-base::which.max(dist)   hTPR<-stats::prop.test(x=ROC$TPR[imax]*n,n=n,conf.level=0.95,correct=FALSE)   hFPR<-stats::prop.test(x=ROC$FPR[imax]*m,n=m,conf.level=0.95,correct=FALSE)        return(base::list(    FPR = ROC$FPR[imax],    TPR = ROC$TPR[imax],    threshold = ROC$scores[imax],    lFPR = hFPR$conf.int[1],    hFPR = hFPR$conf.int[2],    lTPR = hTPR$conf.int[1],    hTPR = hTPR$conf.int[2]  ))}#' Calculate covariance of AUC (deLonghi)#'#' @param roc roc as calculated by simple.roc#'#' @return standard deviation of AUC#'#' @exportsimple.sAUC <- function(roc) {  V1 = roc[roc$labels, 'V']  AUC = base::mean(V1)  V0 = roc[!roc$labels, 'V']  n0 = base::length(V0)  n1 = base::length(V1)  S0 = base::sum((V0 - AUC) * (V0 - AUC)) / (n0 - 1)  S1 = base::sum((V1 - AUC) * (V1 - AUC)) / (n1 - 1)  base::sqrt(S0 / n0 + S1 / n1)}#' Calculate covariance of AUC (approximation)#'#' @param roc roc as calculated by simple.roc#'#' @return standard deviation of AUC#'#' @exportsimple.sAUCapprox <- function(roc) {  n = base::length(roc$labels)  auc = simple.getAUC(roc)  auc_se = base::sqrt((auc * (1 - auc) + (n - 1) * (auc - 0.5)^2) / n)  auc_se}#' Check whether two AUCs are statistically significantly different#'#' @param rocA roc of the first test as returned by simple.roc#' @param rocB roc of the second test as returned by simple.roc#'#' @return p of the test#'#' @exportsimple.sAUC2<-function(rocA,rocB){   #NCSS   #https://www.ncss.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Comparing_Two_ROC_Curves-Paired_Design.pdf   #calculate combined variance of two predictions on the same dataset   #and give p-value that their performance is the same (or, with low p, that it is different)   V0A=rocA[!rocB$labels,'V']   V0B=rocB[!rocB$labels,'V']   V1A=rocA[rocA$labels,'V']   V1B=rocB[rocB$labels,'V']   AUCA=base::mean(V0A)   AUCB=base::mean(V0B)   n0=base::length(V0A)#the same as V0B   n1=base::length(V1A)#the same as V1B   base::print(base::sprintf('A=%f B=%f',AUCA,AUCB))   #variance of the 0 component, A   S0A=base::sum((V0A-AUCA)*(V0A-AUCA))/(n0-1)   #variance of 1 component, A   S1A=base::sum((V1A-AUCA)*(V1A-AUCA))/(n1-1)   #variance of A   SA=S0A/n0+S1A/n1   #variance of the 0 component, B   S0B=base::sum((V0B-AUCB)*(V0B-AUCB))/(n0-1)   #variance of 1 component, B   S1B=base::sum((V1B-AUCB)*(V1B-AUCB))/(n1-1)   #variance of B   SB=S0B/n0+S1B/n1   #covariance 0 component   S0AB=base::sum((V0A-AUCA)*(V0B-AUCB))/(n0-1)   #covariance 1 component   S1AB=base::sum((V1A-AUCA)*(V1B-AUCB))/(n1-1)   #covariance   SAB=S0AB/n0+S1AB/n1   S=SA+SB-2*SAB   #is there a significant difference   z=base::abs(AUCA-AUCB)/base::sqrt(S)   p=2*stats::pnorm(z,mean=0,sd=1,lower.tail=FALSE)   #is A larger than B   #z=(AUCA-AUCB)/sqrt(S)   #p=pnorm(z,mean=0,sd=1,lower.tail=FALSE)   base::print(base::sprintf('SA2=%f SB2=%f SAB2=%f S2=%f S=%f z=%f p=%f',SA,SB,SAB,S,base::sqrt(S),z,p))   p}#' Plot a ROC curve with associated annotations#'#'@param df data frame#'@param var variable to use to stratify patients#'@param col color of the line drawn#'@param x x coordinate of legend#'@param y y coordinate of legend#'@param unit - what unit to associate to thrshold on legend (ml)#'@param precise number of decimal places to use when reporting opt threshold, TRUE:2, FALSE:0#'@param target column that holds binary outcomes#'#'@return list object with items: roc object as created by simple.roc, thr optimal threshold, legend_text text to be put on final legend#'@exportsimple.plotROC<-function(df,var,col="black",x=0.65,y=0.1,unit="ml",precise=FALSE,target='alive'){	roc=simple.roc(df[,target],df[,var])   auc=simple.getAUC(roc)   #calculate sAUC (sigma AUC, see DeLonghi)   sAUC=simple.sAUC(roc)   #approximate version of sAUC for historical reasons   sAUCapprox <- simple.sAUCapprox(roc)   #determine optimum point for test use (Youden index)   roc_metrics <- simple.compute_roc_metrics(roc)   #report sensitivity/specificity/CI95 at opt threshold   print(sprintf('[%s] Opt: sens %.2f (%.2f,%.2f), spec %.2f (%.2f,%.2f)',      var,roc_metrics$TPR,roc_metrics$lTPR,roc_metrics$hTPR,      1-roc_metrics$FPR,1-roc_metrics$hFPR,1-roc_metrics$lFPR))   legend_text <- sprintf("[%s] AUC: %.2f (+- %.2f/%.2f), OPT THR: %.2f",                            var, auc, sAUC, sAUCapprox, roc_metrics$threshold)   #draw opt point   graphics::points(roc_metrics$FPR,roc_metrics$TPR,pch=1,col=col,cex=2)   graphics::lines(roc$FPR,roc$TPR,col=col)   base::list(roc=roc,thr=roc_metrics$threshold,legend_text=legend_text)}#'Plot an assembly of ROCs#'#'@param df data frame#'@param vars vector of variables#'@param cols vector of color names#'@param x coordinate for legend#'@param y coordinate for legend#'@param unit unit for threshold in labels#'@param precise number of decimal places to use when reporting opt threshold, TRUE:2, FALSE:0#'@param target column that holds binary outcomes#'#'@return ggplot2 graphical object#'#'@exportsimple.plotROCgg<-function(df,vars,cols,x=0.7,y=0.3,unit="ml",precise="FALSE",target="alive"){   if (!requireNamespace('ggplot2',quiet=TRUE)){      print('ggplot2 not available. Use simple.plotROC function')      return(NULL)   }   #ggplot alternative   cvalues<-base::c()   colors_used<-base::c()   i=1   g<-ggplot2::ggplot()   for (var in vars) {      if (var %in% base::names(df)) {      # Pred izračunom ROC odstranimo vrstice z NA vrednostmi         df<-mapNA(df,var,0)         roc=simple.roc(df[,target],df[,var]) #!! defuses evaluation of variable. https://rlang.r-lib.org/reference/topic-inject.html         col=cols[i]         roc_metrics <- simple.compute_roc_metrics(roc)         auc=simple.getAUC(roc)         sAUC=simple.sAUC(roc)         lab <- base::sprintf("[%s] AUC: %.2f (+- %.2f), OPT THR: %.2f",                           var, auc, sAUC, roc_metrics$threshold)         g<-g+ggplot2::geom_line(ggplot2::aes(x=!!roc$FPR,y=!!roc$TPR,color=!!lab))         aes_p=ggplot2::aes(x=!!roc_metrics$FPR,y=!!roc_metrics$TPR,color=!!lab)         g<-g+ggplot2::geom_point(aes_p,size=4,shape=1,show.legend=FALSE)         colors_used<-base::c(colors_used,col)         cvalues<-base::c(cvalues,lab)          base::print(sprintf('%d/%d',base::length(colors_used),base::length(cvalues)))         i=i+1      }   }   base::names(colors_used)<-cvalues   g+ggplot2::xlab('1-specificity')+      ggplot2::ylab('sensitivity')+      ggplot2::scale_color_manual(name='Variables',values=colors_used)+      ggplot2::guides(color = ggplot2::guide_legend(position = "inside"))+      myTheme()+      ggplot2::theme(legend.position.inside=base::c(x,y))}myTheme<-function(){   ggplot2::theme(            axis.text=ggplot2::element_text(size=14),            axis.title=ggplot2::element_text(size=16,face="bold"),            legend.title=ggplot2::element_text(size=16,face="bold"),            legend.text=ggplot2::element_text(size=12),            plot.title=ggplot2::element_text(size=16,face="bold",hjust=0.5))}
 |