#' Create a ROC curve #' #' @param labels vector of outcomes #' @param scores vector of variable values #' @param decreasing ordering of scores #' #' @return data frame with columns TPR, FPR, labels, scores, V #' V is combination of V0 (negative cases) and V1 (positive cases) #' V0 is portion of positive cases with value larger than score #' V1 is portion of negative cases with value below score #' #' @export simple.roc <- function(labels, scores, decreasing = TRUE) { # Odstranimo NA vrednosti iz labels in scores valid_data = stats::complete.cases(labels, scores) labels = labels[valid_data] scores = scores[valid_data] labels <- labels[order(scores, decreasing=decreasing)] scores <- scores[order(scores, decreasing=decreasing)] labels = base::as.logical(labels) x = base::data.frame(TPR = base::cumsum(labels) / base::sum(labels), FPR = base::cumsum(!labels) / base::sum(!labels), labels, scores) s1 = scores[labels] s0 = scores[!labels] #V0 are 0 components of test, for each test-negative case find number of positive cases with value above it (ideally, count is equal to n1, number of positive cases) V0 = base::sapply(s0, psi01, s=s1) #V1 are 1 components of test; for each test-positive case find number of negative cases with score below its score (ideally, count is equal to n0, number of negative cases) V1 = base::sapply(s1, psi10, s=s0) n0=base::length(s0) n1=base::length(s1) #convert V0 to portions, to [0,1] range V0=V0/n1 #convert V1 to portions, to [0,1] range V1=V1/n0 V = labels V[labels] <- V1 V[!labels] <- V0 x$V = V x } psi01 <- function(y, s) { v1 = base::sum(s > y, na.rm = TRUE) v2 = base::sum(s == y, na.rm = TRUE) v1 + 0.5 * v2 } psi10 <- function(x, s) { v1 = base::sum(s < x, na.rm = TRUE) v2 = base::sum(s == x, na.rm = TRUE) v1 + 0.5 * v2 } #' Calculate AUC #' @param roc as returned by simple.roc #' #' @return AUC (numeric) #' #' @export simple.getAUC <- function(roc) { V0 = roc[!roc$labels, 'V'] mean(V0) } #' Calculate metrics associated with ROC #' #' @param ROC roc as returned by simple.roc #' #' @return a list with elemnts: : FPR, TPR, threshold, FPR CI95:min , FPR CI95: max, TPR CI95:min, TPR CI95:max #' all evaluated at optimum point (Youden index) #' #' @export simple.compute_roc_metrics <- function(ROC) { #determine optimal point on ROC curve using Youden index #ROC is the output of simple.roc #output is a list : tpr, tpr CI95:min , tpr CI95: max, fpr, fpr CI95:min, fpr CI95:max, value of score, #all evaluated at optimum point, n=base::sum(ROC$labels) m=base::sum(!ROC$labels) nt=base::length(ROC$TPR) dist<-base::abs(ROC$TPR-ROC$FPR) imax<-base::which.max(dist) hTPR<-stats::prop.test(x=ROC$TPR[imax]*n,n=n,conf.level=0.95,correct=FALSE) hFPR<-stats::prop.test(x=ROC$FPR[imax]*m,n=m,conf.level=0.95,correct=FALSE) return(base::list( FPR = ROC$FPR[imax], TPR = ROC$TPR[imax], threshold = ROC$scores[imax], lFPR = hFPR$conf.int[1], hFPR = hFPR$conf.int[2], lTPR = hTPR$conf.int[1], hTPR = hTPR$conf.int[2] )) } #' Calculate covariance of AUC (deLonghi) #' #' @param roc roc as calculated by simple.roc #' #' @return standard deviation of AUC #' #' @export simple.sAUC <- function(roc) { V1 = roc[roc$labels, 'V'] AUC = base::mean(V1) V0 = roc[!roc$labels, 'V'] n0 = base::length(V0) n1 = base::length(V1) S0 = base::sum((V0 - AUC) * (V0 - AUC)) / (n0 - 1) S1 = base::sum((V1 - AUC) * (V1 - AUC)) / (n1 - 1) base::sqrt(S0 / n0 + S1 / n1) } #' Calculate covariance of AUC (approximation) #' #' @param roc roc as calculated by simple.roc #' #' @return standard deviation of AUC #' #' @export simple.sAUCapprox <- function(roc) { n = base::length(roc$labels) auc = simple.getAUC(roc) auc_se = base::sqrt((auc * (1 - auc) + (n - 1) * (auc - 0.5)^2) / n) auc_se } #' Check whether two AUCs are statistically significantly different #' #' @param rocA roc of the first test as returned by simple.roc #' @param rocB roc of the second test as returned by simple.roc #' #' @return p of the test #' #' @export simple.sAUC2<-function(rocA,rocB){ #NCSS #https://www.ncss.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Comparing_Two_ROC_Curves-Paired_Design.pdf #calculate combined variance of two predictions on the same dataset #and give p-value that their performance is the same (or, with low p, that it is different) V0A=rocA[!rocB$labels,'V'] V0B=rocB[!rocB$labels,'V'] V1A=rocA[rocA$labels,'V'] V1B=rocB[rocB$labels,'V'] AUCA=base::mean(V0A) AUCB=base::mean(V0B) n0=base::length(V0A)#the same as V0B n1=base::length(V1A)#the same as V1B base::print(base::sprintf('A=%f B=%f',AUCA,AUCB)) #variance of the 0 component, A S0A=base::sum((V0A-AUCA)*(V0A-AUCA))/(n0-1) #variance of 1 component, A S1A=base::sum((V1A-AUCA)*(V1A-AUCA))/(n1-1) #variance of A SA=S0A/n0+S1A/n1 #variance of the 0 component, B S0B=base::sum((V0B-AUCB)*(V0B-AUCB))/(n0-1) #variance of 1 component, B S1B=base::sum((V1B-AUCB)*(V1B-AUCB))/(n1-1) #variance of B SB=S0B/n0+S1B/n1 #covariance 0 component S0AB=base::sum((V0A-AUCA)*(V0B-AUCB))/(n0-1) #covariance 1 component S1AB=base::sum((V1A-AUCA)*(V1B-AUCB))/(n1-1) #covariance SAB=S0AB/n0+S1AB/n1 S=SA+SB-2*SAB #is there a significant difference z=base::abs(AUCA-AUCB)/base::sqrt(S) p=2*stats::pnorm(z,mean=0,sd=1,lower.tail=FALSE) #is A larger than B #z=(AUCA-AUCB)/sqrt(S) #p=pnorm(z,mean=0,sd=1,lower.tail=FALSE) base::print(base::sprintf('SA2=%f SB2=%f SAB2=%f S2=%f S=%f z=%f p=%f',SA,SB,SAB,S,base::sqrt(S),z,p)) p } #' Plot a ROC curve with associated annotations #' #'@param df data frame #'@param var variable to use to stratify patients #'@param col color of the line drawn #'@param x x coordinate of legend #'@param y y coordinate of legend #'@param unit - what unit to associate to thrshold on legend (ml) #'@param precise number of decimal places to use when reporting opt threshold, TRUE:2, FALSE:0 #'@param target column that holds binary outcomes #' #'@return list object with items: roc object as created by simple.roc, thr optimal threshold, legend_text text to be put on final legend #'@export simple.plotROC<-function(df,var,col="black",x=0.65,y=0.1,unit="ml",precise=FALSE,target='alive'){ roc=simple.roc(df[,target],df[,var]) auc=simple.getAUC(roc) #calculate sAUC (sigma AUC, see DeLonghi) sAUC=simple.sAUC(roc) #approximate version of sAUC for historical reasons sAUCapprox <- simple.sAUCapprox(roc) #determine optimum point for test use (Youden index) roc_metrics <- simple.compute_roc_metrics(roc) #report sensitivity/specificity/CI95 at opt threshold print(sprintf('[%s] Opt: sens %.2f (%.2f,%.2f), spec %.2f (%.2f,%.2f)', var,roc_metrics$TPR,roc_metrics$lTPR,roc_metrics$hTPR, 1-roc_metrics$FPR,1-roc_metrics$hFPR,1-roc_metrics$lFPR)) legend_text <- sprintf("[%s] AUC: %.2f (+- %.2f/%.2f), OPT THR: %.2f", var, auc, sAUC, sAUCapprox, roc_metrics$threshold) #draw opt point graphics::points(roc_metrics$FPR,roc_metrics$TPR,pch=1,col=col,cex=2) graphics::lines(roc$FPR,roc$TPR,col=col) base::list(roc=roc,thr=roc_metrics$threshold,legend_text=legend_text) } #'Plot an assembly of ROCs #' #'@param df data frame #'@param vars vector of variables #'@param cols vector of color names #'@param x coordinate for legend #'@param y coordinate for legend #'@param unit unit for threshold in labels #'@param precise number of decimal places to use when reporting opt threshold, TRUE:2, FALSE:0 #'@param target column that holds binary outcomes #' #'@return ggplot2 graphical object #' #'@export simple.plotROCgg<-function(df,vars,cols,x=0.7,y=0.3,unit="ml",precise="FALSE",target="alive"){ if (!requireNamespace('ggplot2',quiet=TRUE)){ print('ggplot2 not available. Use simple.plotROC function') return(NULL) } #ggplot alternative cvalues<-base::c() colors_used<-base::c() i=0 g<-ggplot2::ggplot() for (var in vars) { if (var %in% base::names(df)) { # Pred izračunom ROC odstranimo vrstice z NA vrednostmi df<-mapNA(df,var,0) roc=simple.roc(df[,target],df[,var]) #!! defuses evaluation of variable. https://rlang.r-lib.org/reference/topic-inject.html col=cols[i] roc_metrics <- simple.compute_roc_metrics(roc) auc=simple.getAUC(roc) sAUC=simple.sAUC(roc) lab <- base::sprintf("[%s] AUC: %.2f (+- %.2f), OPT THR: %.2f", var, auc, sAUC, roc_metrics$threshold) g<-g+ggplot2::geom_line(ggplot2::aes(x=!!roc$FPR,y=!!roc$TPR,color=!!lab)) aes_p=ggplot2::aes(x=!!roc_metrics$FPR,y=!!roc_metrics$TPR,color=!!lab) g<-g+ggplot2::geom_point(aes_p,size=4,shape=1,show.legend=FALSE) colors_used<-base::c(colors_used,col) cvalues<-base::c(cvalues,lab) i=i+1 } } base::print(sprintf('%d/%d',base::length(colors_used),base::length(cvalues))) base::names(colors_used)<-cvalues g+ggplot2::xlab('1-specificity')+ ggplot2::ylab('sensitivity')+ ggplot2::scale_color_manual(name='Variables',values=colors_used)+ ggplot2::guides(color = ggplot2::guide_legend(position = "inside"))+ ggplot2::theme(legend.position.inside=base::c(x,y)) }