studen
/
rNIX


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
							#' Create a ROC curve
#'
#' @param labels vector of outcomes
#' @param scores vector of variable values
#' @param decreasing ordering of scores
#'
#' @return data frame with columns TPR, FPR, labels, scores, V 
#' V is combination of V0 (negative cases) and V1 (positive cases)
#' V0 is portion of positive cases with value larger than score
#' V1 is portion of negative cases with value below score
#'
#' @export

simple.roc <- function(labels, scores, decreasing = TRUE) {
  # Odstranimo NA vrednosti iz labels in scores
  valid_data = stats::complete.cases(labels, scores)
  labels = labels[valid_data]
  scores = scores[valid_data]

  labels <- labels[order(scores, decreasing=decreasing)]
  scores <- scores[order(scores, decreasing=decreasing)]

  labels = base::as.logical(labels)

  x = base::data.frame(TPR = base::cumsum(labels) / base::sum(labels),
                 FPR = base::cumsum(!labels) / base::sum(!labels),
                 labels, scores)

  s1 = scores[labels]
  s0 = scores[!labels]
  #V0 are 0 components of test, for each test-negative case find number of positive cases with value above it (ideally, count is equal to n1, number of positive cases)
  V0 = base::sapply(s0, psi01, s=s1)
  #V1 are 1 components of test; for each test-positive case find number of negative cases with score below its score (ideally, count is equal to n0, number of negative cases)
  V1 = base::sapply(s1, psi10, s=s0)
  n0=base::length(s0)
  n1=base::length(s1)
  #convert V0 to portions, to [0,1] range
  V0=V0/n1
  #convert V1 to portions, to [0,1] range
  V1=V1/n0

  V = labels
  V[labels] <- V1
  V[!labels] <- V0
  x$V = V
  x
}

psi01 <- function(y, s) {
  v1 = base::sum(s > y, na.rm = TRUE)
  v2 = base::sum(s == y, na.rm = TRUE)
  v1 + 0.5 * v2
}

psi10 <- function(x, s) {
  v1 = base::sum(s < x, na.rm = TRUE)
  v2 = base::sum(s == x, na.rm = TRUE)
  v1 + 0.5 * v2
}

#' Calculate AUC
#' @param roc as returned by simple.roc
#'
#' @return AUC (numeric)
#'
#' @export
simple.getAUC <- function(roc) {
  V0 = roc[!roc$labels, 'V']
  mean(V0)
}

#' Calculate metrics associated with ROC 
#'
#' @param ROC roc as returned by simple.roc
#'
#' @return a list with elemnts: : FPR, TPR, threshold, FPR CI95:min , FPR CI95: max, TPR CI95:min, TPR CI95:max
#' all evaluated at optimum point (Youden index)
#'
#' @export

simple.compute_roc_metrics <- function(ROC) {

#determine optimal point on ROC curve using Youden index
#ROC is the output of simple.roc
#output is a list : tpr, tpr CI95:min , tpr CI95: max, fpr, fpr CI95:min, fpr CI95:max, value of score,
#all evaluated at optimum point, 
   
   n=base::sum(ROC$labels)
   m=base::sum(!ROC$labels)
   nt=base::length(ROC$TPR)
   dist<-base::abs(ROC$TPR-ROC$FPR)
   imax<-base::which.max(dist)
   hTPR<-stats::prop.test(x=ROC$TPR[imax]*n,n=n,conf.level=0.95,correct=FALSE)
   hFPR<-stats::prop.test(x=ROC$FPR[imax]*m,n=m,conf.level=0.95,correct=FALSE)
   
  
   return(base::list(
    FPR = ROC$FPR[imax],
    TPR = ROC$TPR[imax],
    threshold = ROC$scores[imax],
    lFPR = hFPR$conf.int[1],
    hFPR = hFPR$conf.int[2],
    lTPR = hTPR$conf.int[1],
    hTPR = hTPR$conf.int[2]
  ))
}

#' Calculate covariance of AUC (deLonghi)
#'
#' @param roc roc as calculated by simple.roc
#'
#' @return standard deviation of AUC
#'
#' @export

simple.sAUC <- function(roc) {
  V1 = roc[roc$labels, 'V']
  AUC = base::mean(V1)
  V0 = roc[!roc$labels, 'V']
  n0 = base::length(V0)
  n1 = base::length(V1)

  S0 = base::sum((V0 - AUC) * (V0 - AUC)) / (n0 - 1)
  S1 = base::sum((V1 - AUC) * (V1 - AUC)) / (n1 - 1)
  base::sqrt(S0 / n0 + S1 / n1)
}

#' Calculate covariance of AUC (approximation)
#'
#' @param roc roc as calculated by simple.roc
#'
#' @return standard deviation of AUC
#'
#' @export

simple.sAUCapprox <- function(roc) {
  n = base::length(roc$labels)
  auc = simple.getAUC(roc)
  auc_se = base::sqrt((auc * (1 - auc) + (n - 1) * (auc - 0.5)^2) / n)
  auc_se
}

#' Check whether two AUCs are statistically significantly different
#'
#' @param rocA roc of the first test as returned by simple.roc
#' @param rocB roc of the second test as returned by simple.roc
#'
#' @return p of the test
#'
#' @export

simple.sAUC2<-function(rocA,rocB){
   #NCSS
   #https://www.ncss.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Comparing_Two_ROC_Curves-Paired_Design.pdf
   #calculate combined variance of two predictions on the same dataset
   #and give p-value that their performance is the same (or, with low p, that it is different)
   V0A=rocA[!rocB$labels,'V']
   V0B=rocB[!rocB$labels,'V']
   V1A=rocA[rocA$labels,'V']
   V1B=rocB[rocB$labels,'V']
   AUCA=base::mean(V0A)
   AUCB=base::mean(V0B)
   n0=base::length(V0A)#the same as V0B
   n1=base::length(V1A)#the same as V1B
   base::print(base::sprintf('A=%f B=%f',AUCA,AUCB))
   #variance of the 0 component, A
   S0A=base::sum((V0A-AUCA)*(V0A-AUCA))/(n0-1)
   #variance of 1 component, A
   S1A=base::sum((V1A-AUCA)*(V1A-AUCA))/(n1-1)
   #variance of A
   SA=S0A/n0+S1A/n1
   #variance of the 0 component, B
   S0B=base::sum((V0B-AUCB)*(V0B-AUCB))/(n0-1)
   #variance of 1 component, B
   S1B=base::sum((V1B-AUCB)*(V1B-AUCB))/(n1-1)
   #variance of B
   SB=S0B/n0+S1B/n1
   #covariance 0 component
   S0AB=base::sum((V0A-AUCA)*(V0B-AUCB))/(n0-1)
   #covariance 1 component
   S1AB=base::sum((V1A-AUCA)*(V1B-AUCB))/(n1-1)
   #covariance
   SAB=S0AB/n0+S1AB/n1
   S=SA+SB-2*SAB
   #is there a significant difference
   z=base::abs(AUCA-AUCB)/base::sqrt(S)
   p=2*stats::pnorm(z,mean=0,sd=1,lower.tail=FALSE)

   #is A larger than B
   #z=(AUCA-AUCB)/sqrt(S)
   #p=pnorm(z,mean=0,sd=1,lower.tail=FALSE)

   base::print(base::sprintf('SA2=%f SB2=%f SAB2=%f S2=%f S=%f z=%f p=%f',SA,SB,SAB,S,base::sqrt(S),z,p))
   p

}