three_parameter_screening.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. import numpy as np
  2. from numpy.random import Generator
  3. from numpy.typing import NDArray
  4. from scipy import stats
  5. from scipy.optimize import minimize
  6. # region 3 Key Parameters
  7. def sensitivity_function(t, b0, b1, t_bar):
  8. """
  9. Calculate the sensitivity function β(t).
  10. Parameters
  11. ----------
  12. t : float
  13. Time variable.
  14. b0 : float
  15. Coefficient b0.
  16. b1 : float
  17. Coefficient b1.
  18. t_bar : float
  19. Average age at entry in the study group.
  20. average_life_expectancy : float
  21. Average life expectancy in years.
  22. Returns
  23. -------
  24. float
  25. Sensitivity at time t.
  26. """
  27. return 1 / (1 + np.exp(-b0 - b1 * (t - t_bar)))
  28. def transition_probability_function(t, w_max, mu, sigma):
  29. """
  30. Calculate the transition probability function w(t).
  31. Parameters
  32. ----------
  33. t : float
  34. Time variable, must be greater than 0.
  35. w_max : float
  36. Maximum weight.
  37. mu : float
  38. Mean of the log-normal distribution.
  39. sigma : float
  40. Standard deviation of the log-normal distribution.
  41. Returns
  42. -------
  43. float
  44. Transition probability at time t.
  45. """
  46. if t <= 0:
  47. raise ValueError("t must be greater than 0")
  48. return w_max * (1 / (np.sqrt(2 * np.pi) * sigma * t)) * np.exp(-((np.log(t) - mu) ** 2) / (2 * sigma ** 2))
  49. def sojourn_time_function(x, kappa, rho):
  50. """
  51. Calculate the sojourn time function q(x).
  52. Parameters
  53. ----------
  54. x : float
  55. Time variable.
  56. kappa : float
  57. Shape parameter.
  58. rho : float
  59. Scale parameter.
  60. Returns
  61. -------
  62. float
  63. Sojourn time at x.
  64. """
  65. return (kappa * x ** (kappa - 1) * rho ** kappa) / ((1 + (x * rho) ** kappa) ** 2)
  66. # endregion
  67. # region Random Variates
  68. def transition_survival_rvs(rng: Generator, w: float, size: int = 1) -> np.ndarray[np.float64]:
  69. """
  70. Generate random variates of transition survival.
  71. Parameters
  72. ----------
  73. rng : np.random.Generator
  74. Random number generator.
  75. w : float
  76. Scale parameter that represents the probability of transition
  77. from healthy state to preclinical state.
  78. size : int, optional
  79. Number of random variates to generate.
  80. Returns
  81. -------
  82. np.ndarray[np.float64]
  83. Times when a population transitions from healthy state to preclinical state.
  84. """
  85. return rng.uniform(low=0, high=1, size=size) / w
  86. def transition_survival_rvs_alt(rng: Generator, t: float, a: float, size: int = 1) -> np.ndarray[np.float64]:
  87. """
  88. Generate random variates of transition survival.
  89. Alternative implementation of transition_survival_rvs that returns np.nan for times that are greater than T.
  90. Parameters
  91. ----------
  92. rng : np.random.Generator
  93. Random number generator.
  94. t : float
  95. Life expectancy.
  96. a : float
  97. Lifetime risk.
  98. size : int, optional
  99. Number of random variates to generate.
  100. Returns
  101. -------
  102. np.ndarray[np.float64]
  103. Times when a population transitions from healthy state to preclinical state.
  104. """
  105. times = rng.uniform(low=0, high=1, size=size) / a
  106. times[times < 1] = np.nan
  107. return times * t
  108. def sojourn_survival_rvs(rng: Generator, mu: float, t_w: np.ndarray[np.float64]) -> np.ndarray[np.float64]:
  109. """
  110. Generate random variates of sojourn survival.
  111. Parameters
  112. ----------
  113. rng : np.random.Generator
  114. Random number generator.
  115. mu : float
  116. Scale parameter that represents the probability of transition
  117. from preclinical state to detectable state.
  118. t_w : float
  119. Scale parameter that indicates the time when the person transitioned
  120. from healthy state to preclinical state.
  121. Returns
  122. -------
  123. float
  124. Time when a person transitions from preclinical state to cancer state.
  125. """
  126. return -mu * np.log(1-rng.uniform(low=0, high=1, size=len(t_w)))
  127. # region Likelihood
  128. def likelihood_function(D, I, n, s, r, alpha, beta):
  129. """
  130. Calculate the likelihood function.
  131. Parameters
  132. ----------
  133. D : list
  134. List of probabilities of preclinical diagnosis.
  135. I : list
  136. List of probabilities of clinical incidence.
  137. n : list
  138. List of total number of cases for each interval.
  139. s : list
  140. List of number of preclinical diagnoses for each interval.
  141. r : list
  142. List of number of clinical incidences for each interval.
  143. alpha : list
  144. List of alpha values for the product term.
  145. beta : float
  146. The probability of detection.
  147. Returns
  148. -------
  149. float
  150. The likelihood value.
  151. """
  152. L = []
  153. for i in range(len(D)):
  154. qs=np.sum(s[i])
  155. term1 = D[i] ** qs
  156. term2 = I[i] ** r[i]
  157. term3 = (1 - D[i] - I[i]) ** (n[i] - qs - r[i])
  158. product_term = np.prod((alpha / beta) ** s[i] )
  159. L_i = term1 * term2 * term3 * product_term
  160. L.append( L_i )
  161. return L
  162. def mylog(x):
  163. if x==0:
  164. return - np.inf
  165. return np.log(x)
  166. def loglikelihood_function(D, I, n, s, r, alpha, beta):
  167. # Same as likelihood above
  168. L = []
  169. for i in range(len(D)):
  170. qs=np.sum(s[i])
  171. term1 = qs*mylog(D[i])
  172. term2=r[i]*mylog(I[i])
  173. term3 = (n[i]-qs-r[i])*mylog(1 - D[i] - I[i])
  174. product_term=0
  175. if beta!=0:
  176. product_term = np.sum(s[i]*mylog(alpha / beta))
  177. L_i = term1 + term2 + term3 + product_term
  178. L.append( L_i )
  179. return L
  180. def likelihood_function1(pars,n,s,r):
  181. w=pars[0]
  182. mu=pars[1]
  183. beta=pars[2]
  184. def Q(x):
  185. return np.exp(-x/mu)
  186. t=25+np.linspace(0,len(n),len(n)+1)
  187. I=probability_of_clinical_incidence(beta,w,mu,t,Q)
  188. D=probability_of_preclinical_diagnosis(beta,w,mu,t,Q)
  189. qs=np.expand_dims(s,1)
  190. alpha=np.array([beta])
  191. L=loglikelihood_function(D,I,n,s,r,alpha,beta)
  192. return np.sum(L)
  193. def fI(pars,t):
  194. w=pars[0]
  195. mu=pars[1]
  196. beta=pars[2]
  197. def Q(x):
  198. return np.exp(-x/mu)
  199. return probability_of_clinical_incidence(beta,w,mu,t,Q)
  200. def fD(pars,t):
  201. w=pars[0]
  202. mu=pars[1]
  203. beta=pars[2]
  204. def Q(x):
  205. return np.exp(-x/mu)
  206. return probability_of_preclinical_diagnosis(beta,w,mu,t,Q)
  207. def probability_of_clinical_incidence(beta, w, mu, t, Q):
  208. #Calculate the probability of clinical incidence.
  209. # Parameters
  210. # ----------
  211. # w : float
  212. # Weighting factor.
  213. # mu : float
  214. # Mean of the distribution.
  215. # t : list
  216. # List of screening times [0,..,tN] of length N+1 (where N is number intervals)
  217. # Q : function
  218. # Function Q(t) representing the probability distribution.
  219. # beta : float
  220. # The probability of detection.
  221. #
  222. # Returns
  223. # -------
  224. # list
  225. # A list of probabilities I_i for each time interval (of length N, 0 corresponds to interval [t0,t1))
  226. # """
  227. I = []
  228. for i in range(1,len(t)):
  229. sum_terms=[(1 - beta) ** (i - j - 1) * (Q(t[i - 1] - t[j]) - Q(t[i] - t[j])) for j in range(i)]
  230. sum_term = np.sum(sum_terms)
  231. dt=(t[i]-t[i-1])
  232. I_i = w * (dt - beta * mu * sum_term)
  233. I.append(I_i)
  234. return np.array(I)
  235. def probability_of_preclinical_diagnosis(beta, w, mu, t, Q):
  236. """
  237. Calculate the probability of preclinical diagnosis.
  238. Parameters
  239. ----------
  240. beta : float
  241. The probability of detection.
  242. w : float
  243. Weighting factor.
  244. mu : float
  245. Mean of the distribution.
  246. t : list
  247. List of time intervals.
  248. Q : function
  249. Function Q(t) representing the probability distribution.
  250. Returns
  251. -------
  252. list
  253. A list of probabilities D_i for each time interval.
  254. """
  255. D = []
  256. for i in range(1, len(t) ):
  257. if i == 1:
  258. D_i = beta * w * mu
  259. else:
  260. sum_terms=[(1 - beta) ** (i - j - 1) * Q(t[i - 1] - t[j - 1]) for j in range(1, i)]
  261. sum_term = np.sum(sum_terms)
  262. D_i = beta * w * mu * (1 - beta * sum_term)
  263. D.append(D_i)
  264. return np.array(D)
  265. # endregion
  266. # region Optimization
  267. def loss_function(params, real_data, population_size):
  268. sensitivity, specificity, risk, screening_interval, onset_interval = params
  269. simulated_data = analytic_simulation(sensitivity, specificity, risk, screening_interval, onset_interval, population_size)
  270. return np.sum((simulated_data - real_data) ** 2)
  271. def fit_to_real_data(real_data, initial_guess, population_size):
  272. result = minimize(loss_function, initial_guess, args=(real_data, population_size), method='BFGS')
  273. return result.x
  274. # endregion
  275. # region Deprecated
  276. def analytic_simulation(sensitivity, specificity, risk, screening_interval, onset_interval, population_size, average_life_expectancy) -> np.array:
  277. """
  278. Analytic simulation of screening programme.
  279. Parameters
  280. ----------
  281. sensitivity : float
  282. Sensitivity of the screening test.
  283. specificity : float
  284. Specificity of the screening test.
  285. risk : float
  286. Risk of cancer in the population.
  287. screening_interval : float
  288. Interval between screenings in years.
  289. onset_interval : float
  290. Interval between onset of cancer and detection in years.
  291. population_size : int
  292. Size of the population.
  293. Returns
  294. -------
  295. array
  296. A numpy array with the following elements:
  297. - The number of people with cancer.
  298. - The number of people with cancer detected through screening.
  299. - The number of people with cancer that was not detected through screening and was detected through symptoms.
  300. - The number of people without cancer.
  301. - The number of false positives.
  302. """
  303. population = stats.bernoulli.rvs(risk, size=population_size)
  304. detected_probability = stats.geom.cdf(onset_interval * screening_interval, sensitivity)
  305. # cancer
  306. cancer_quantity = sum(population)
  307. screening_population = stats.bernoulli.rvs(p=detected_probability, size=cancer_quantity)
  308. screening_detected_q = sum(screening_population)
  309. interval_cancer_q = len(screening_population) - screening_detected_q
  310. # recall
  311. recall_q = len(population) - cancer_quantity
  312. recall_total = 0
  313. recall_total = np.sum(stats.binom.rvs(n=(int)(average_life_expectancy * screening_interval), p=(1-specificity), size=recall_q))
  314. return np.array([cancer_quantity, screening_detected_q, interval_cancer_q, recall_q, recall_total])
  315. # endregion