vor 2 Monaten · a2838fc38d
--- a/data/prenticedataa.xlsx
+++ b/data/prenticedataa.xlsx
--- a/fisher_exact.ipynb
+++ b/fisher_exact.ipynb
@@ -0,0 +1,205 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "58d00ccd-7950-4367-aec8-2c18aef079d1",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Fisher exact tests\n",
			
 
				+    "\n",
			
 
				+    "<br>\n",
			
 
				+    "Author: Martin Horvat, March 2026\n",
			
 
				+    "\n",
			
 
				+    "Ref:\n",
			
 
				+    "* https://en.wikipedia.org/wiki/Fisher%27s_exact_test\n",
			
 
				+    "* https://mathworld.wolfram.com/FishersExactTest.html"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "9b24f6f9-4bf2-4751-8312-f011afd3422c",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Common"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 18,
			
 
				+   "id": "362e9a29-2caf-4155-8d90-a591005b4b25",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "from scipy.special import gammaln\n",
			
 
				+    "\n",
			
 
				+    "def _log_table_prob(a, b, c, d):\n",
			
 
				+    "    r1, r2 = a + b, c + d\n",
			
 
				+    "    c1, c2 = a + c, b + d\n",
			
 
				+    "    n = r1 + r2\n",
			
 
				+    "    return (\n",
			
 
				+    "        gammaln(r1 + 1) + gammaln(r2 + 1) + gammaln(c1 + 1) + gammaln(c2 + 1)\n",
			
 
				+    "        - (gammaln(a + 1) + gammaln(b + 1) + gammaln(c + 1) + gammaln(d + 1) + gammaln(n + 1))\n",
			
 
				+    "    )\n",
			
 
				+    "\n",
			
 
				+    "def calculate_table_prob(a, b, c, d):\n",
			
 
				+    "    return float(np.exp(_log_table_prob(int(a), int(b), int(c), int(d))))\n",
			
 
				+    "\n",
			
 
				+    "def arange_at_margins(r, c):\n",
			
 
				+    "    amin = max(0, r[0] - c[1])\n",
			
 
				+    "    amax = min(r[0], c[0])\n",
			
 
				+    "    return np.arange(amin, amax + 1, dtype=np.int64)\n",
			
 
				+    "\n",
			
 
				+    "def fisher_p_value(table, eps: float = 1e-12):\n",
			
 
				+    "    mat = np.asarray(table, dtype=np.int64)\n",
			
 
				+    "    if mat.shape != (2, 2) or np.any(mat < 0):\n",
			
 
				+    "        raise ValueError(\"table must be a 2x2 array of nonnegative counts\")\n",
			
 
				+    "\n",
			
 
				+    "    a, b, c_, d = map(int, mat.ravel())\n",
			
 
				+    "    r = mat.sum(axis=1)  # [r1, r2]\n",
			
 
				+    "    c = mat.sum(axis=0)  # [c1, c2]\n",
			
 
				+    "\n",
			
 
				+    "    a_vals = arange_at_margins(r, c)\n",
			
 
				+    "\n",
			
 
				+    "    n = int(r[0] + r[1])\n",
			
 
				+    "    const = (\n",
			
 
				+    "        gammaln(int(r[0]) + 1) + gammaln(int(r[1]) + 1)\n",
			
 
				+    "        + gammaln(int(c[0]) + 1) + gammaln(int(c[1]) + 1)\n",
			
 
				+    "        - gammaln(n + 1)\n",
			
 
				+    "    )\n",
			
 
				+    "\n",
			
 
				+    "    b_vals = r[0] - a_vals\n",
			
 
				+    "    c_vals = c[0] - a_vals\n",
			
 
				+    "    d_vals = r[1] - c_vals\n",
			
 
				+    "\n",
			
 
				+    "    logp = const - (\n",
			
 
				+    "        gammaln(a_vals + 1) + gammaln(b_vals + 1)\n",
			
 
				+    "        + gammaln(c_vals + 1) + gammaln(d_vals + 1)\n",
			
 
				+    "    )\n",
			
 
				+    "\n",
			
 
				+    "    logp_obs = _log_table_prob(a, b, c_, d)\n",
			
 
				+    "    observed_p = float(np.exp(logp_obs))\n",
			
 
				+    "\n",
			
 
				+    "    mask = logp <= (logp_obs + np.log1p(eps))\n",
			
 
				+    "    two_sided_p = float(np.exp(logp[mask]).sum())\n",
			
 
				+    "\n",
			
 
				+    "    return observed_p, two_sided_p"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "3b5109a3-acf0-4285-8cfd-227074cfb015",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Check"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 16,
			
 
				+   "id": "9d0b1232-ff1a-41da-965e-7f27ed61cd77",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "[0.01048951048951043, 0.11013986013986005, 0.33041958041957936, 0.3671328671328668, 0.1573426573426565, 0.023601398601398604, 0.0008741258741258679]\n",
			
 
				+      "0.9999999999999976\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# Check \n",
			
 
				+    "table = [[8, 2], [1, 5]]\n",
			
 
				+    "\n",
			
 
				+    "# margins\n",
			
 
				+    "r = np.sum(table, axis = 1)\n",
			
 
				+    "c = np.sum(table, axis = 0)\n",
			
 
				+    "\n",
			
 
				+    "# values of a\n",
			
 
				+    "avals = arange_mat_margins(r, c)\n",
			
 
				+    "\n",
			
 
				+    "probs = [prob_mat_margins(x, r, c) for x in avals]\n",
			
 
				+    "\n",
			
 
				+    "print(probs)\n",
			
 
				+    "print(sum(probs))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "218076bd-89d8-4305-8656-41cde67de1b4",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Example"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 19,
			
 
				+   "id": "5b861593-4786-4b00-bdce-da1710b68af1",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Odds Ratio: 20.0000\n",
			
 
				+      "P-value: 0.0350\n",
			
 
				+      "\n",
			
 
				+      "Result is statistically significant (Reject H0)\n",
			
 
				+      "\n",
			
 
				+      "Observed Table Probability: 0.0236\n",
			
 
				+      "Two-Sided P-Value: 0.0350\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# 2x2 Contingency Table\n",
			
 
				+    "#             Success | Failure\n",
			
 
				+    "# Treatment A:   8    |    2\n",
			
 
				+    "# Treatment B:   1    |    5\n",
			
 
				+    "table = [[8, 2], [1, 5]]\n",
			
 
				+    "\n",
			
 
				+    "# Perform the test\n",
			
 
				+    "# returns the odds ratio and the p-value\n",
			
 
				+    "odds_ratio, p_value = scipy.stats.fisher_exact(table)\n",
			
 
				+    "\n",
			
 
				+    "print(f\"Odds Ratio: {odds_ratio:.4f}\")\n",
			
 
				+    "print(f\"P-value: {p_value:.4f}\")\n",
			
 
				+    "\n",
			
 
				+    "# Interpretation\n",
			
 
				+    "alpha = 0.05\n",
			
 
				+    "if p_value < alpha:\n",
			
 
				+    "    print(\"\\nResult is statistically significant (Reject H0)\")\n",
			
 
				+    "else:\n",
			
 
				+    "    print(\"\\nResult is not statistically significant (Fail to reject H0)\")\n",
			
 
				+    "\n",
			
 
				+    "obs_p, final_p = fisher_p_value(table)\n",
			
 
				+    "print(f\"\\nObserved Table Probability: {obs_p:.4f}\")\n",
			
 
				+    "print(f\"Two-Sided P-Value: {final_p:.4f}\")\n"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.12"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/prentice_criteria.ipynb
+++ b/prentice_criteria.ipynb
--- a/refs/Algorithm643_6497.214326.pdf
+++ b/refs/Algorithm643_6497.214326.pdf
--- a/refs/Baker2018.pdf
+++ b/refs/Baker2018.pdf
--- a/refs/Buyse1998.pdf
+++ b/refs/Buyse1998.pdf
--- a/refs/Fisher1922.pdf
+++ b/refs/Fisher1922.pdf
--- a/refs/Fisher_Exact_Tests_L07.pdf
+++ b/refs/Fisher_Exact_Tests_L07.pdf
--- a/refs/Fisher_Exact_Tests_supp21.pdf
+++ b/refs/Fisher_Exact_Tests_supp21.pdf
--- a/refs/Freedman1992.pdf
+++ b/refs/Freedman1992.pdf
--- a/refs/Martin_Notes.jpg
+++ b/refs/Martin_Notes.jpg
--- a/refs/Prentice1989.pdf
+++ b/refs/Prentice1989.pdf
--- a/refs/Raunig2015.pdf
+++ b/refs/Raunig2015.pdf
--- a/Presentation_1.pdf
+++ b/Presentation_1.pdf
--- a/refs/Wang2002.pdf
+++ b/refs/Wang2002.pdf
--- a/src/utils.py
+++ b/src/utils.py
@@ -0,0 +1,94 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from scipy.stats import chi2_contingency, fisher_exact
			
 
				+
			
 
				+def read_data(filename, cols_in, cols_out, logscale = False):
			
 
				+    
			
 
				+    df_data = pd.read_excel(filename)
			
 
				+
			
 
				+    df_work = df_data[cols_in].rename(columns=dict(zip(cols_in, cols_out)))
			
 
				+    df_work['Z'] = df_work['Z'].map({'PEMBRO': 0, 'IPI+NIVO': 1, 'IPI': 0, 'NIVO': 0})
			
 
				+    
			
 
				+    if logscale: df_work['S']  = np.log(df_work['S'])
			
 
				+    
			
 
				+    return df_data, df_work
			
 
				+
			
 
				+def tz_analysis(
			
 
				+    df: pd.DataFrame,
			
 
				+    t_col: str = "T",
			
 
				+    z_col: str = "Z",
			
 
				+    alpha: float = 0.05,
			
 
				+    chi2_yates: bool = False
			
 
				+) -> dict:
			
 
				+    
			
 
				+    # Drop missing and enforce 0/1
			
 
				+    d = df[[t_col, z_col]].dropna()
			
 
				+    T = d[t_col].astype(int)
			
 
				+    Z = d[z_col].astype(int)
			
 
				+    
			
 
				+    if not set(T.unique()).issubset({0, 1}) or not set(Z.unique()).issubset({0, 1}):
			
 
				+        raise ValueError("T and Z must be binary (0/1 or bool).")
			
 
				+
			
 
				+    # Counts F[t,z] with explicit order t=0,1 and z=0,1
			
 
				+    # https://en.wikipedia.org/wiki/Contingency_table
			
 
				+    F_df = pd.crosstab(T, Z).reindex(index=[0, 1], columns=[0, 1], fill_value=0)
			
 
				+    F = F_df.to_numpy(dtype=int)
			
 
				+    N = int(F.sum())
			
 
				+    if N == 0:
			
 
				+        raise ValueError("No data after dropping missing values.")
			
 
				+
			
 
				+    # Empirical joint/marginals from your computation
			
 
				+    P = F / N                 # P(T,Z)
			
 
				+    PT = P.sum(axis=1)        # P(T)
			
 
				+    PZ = P.sum(axis=0)        # P(Z)
			
 
				+
			
 
				+    # Conditionals P(T|Z) as 2x2: rows t, cols z
			
 
				+    # P(T=t | Z=z) = P(T=t,Z=z)/P(Z=z)
			
 
				+    with np.errstate(divide="ignore", invalid="ignore"):
			
 
				+        PT_given_Z = P / PZ   # broadcast over columns
			
 
				+        PT_given_Z[:, PZ == 0] = np.nan
			
 
				+
			
 
				+    # Global tests of independence
			
 
				+    # https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
			
 
				+    # https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity
			
 
				+    
			
 
				+    #chi2_yates = np.any(F.ravel() < 5)
			
 
				+    chi2, p_chi2, dof, expected = chi2_contingency(F, correction=chi2_yates)
			
 
				+
			
 
				+    # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
			
 
				+    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.fisher_exact.html
			
 
				+    # https://docs.scipy.org/doc/scipy/tutorial/stats/hypothesis_fisher_exact.html
			
 
				+    # odds ratio 
			
 
				+    #   OR = (P[1,1] P[0,0])/(P[1,0] P[0,1]) = O(Z=0)/O(Z=1) 
			
 
				+    # with odds 
			
 
				+    #   O(Z) = P(T=0|Z)/P(T=1|Z)
			
 
				+    # Note: 
			
 
				+    #   OR = 1: No association; the odds of the outcome are the same in both groups (Null Hypothesis).
			
 
				+    #   'two-sided': Tests if the odds ratio is simply not 1
			
 
				+
			
 
				+    odds_ratio, p_fisher = fisher_exact(F, alternative="two-sided")
			
 
				+
			
 
				+    return {
			
 
				+        "meta": {"t_col": t_col, "z_col": z_col, "alpha": float(alpha)},
			
 
				+        "counts": {"F": F, "N": N},
			
 
				+        "probabilities": {
			
 
				+            "P_TZ": P,                 # 2x2 array [t,z]
			
 
				+            "P_T": PT,                 # length-2 [t]
			
 
				+            "P_Z": PZ,                 # length-2 [z]
			
 
				+            "P_T_given_Z": PT_given_Z, # 2x2 array [t,z]
			
 
				+        },
			
 
				+        "global_tests": {
			
 
				+            "chi2": {
			
 
				+                "stat": float(chi2),
			
 
				+                "dof": int(dof),
			
 
				+                "pvalue": float(p_chi2),
			
 
				+                "expected": expected.astype(float),
			
 
				+                "reject_alpha": bool(p_chi2 < alpha),
			
 
				+            },
			
 
				+            "fisher": {
			
 
				+                "odds_ratio": float(odds_ratio),   
			
 
				+                "pvalue": float(p_fisher),
			
 
				+                "reject_alpha": bool(p_fisher < alpha),
			
 
				+            },
			
 
				+        },
			
 
				+    }