Martin Horvat vor 1 Woche
Ursprung
Commit
a2838fc38d

BIN
data/prenticedataa.xlsx


+ 205 - 0
fisher_exact.ipynb

@@ -0,0 +1,205 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "58d00ccd-7950-4367-aec8-2c18aef079d1",
+   "metadata": {},
+   "source": [
+    "# Fisher exact tests\n",
+    "\n",
+    "<br>\n",
+    "Author: Martin Horvat, March 2026\n",
+    "\n",
+    "Ref:\n",
+    "* https://en.wikipedia.org/wiki/Fisher%27s_exact_test\n",
+    "* https://mathworld.wolfram.com/FishersExactTest.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9b24f6f9-4bf2-4751-8312-f011afd3422c",
+   "metadata": {},
+   "source": [
+    "## Common"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "362e9a29-2caf-4155-8d90-a591005b4b25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from scipy.special import gammaln\n",
+    "\n",
+    "def _log_table_prob(a, b, c, d):\n",
+    "    r1, r2 = a + b, c + d\n",
+    "    c1, c2 = a + c, b + d\n",
+    "    n = r1 + r2\n",
+    "    return (\n",
+    "        gammaln(r1 + 1) + gammaln(r2 + 1) + gammaln(c1 + 1) + gammaln(c2 + 1)\n",
+    "        - (gammaln(a + 1) + gammaln(b + 1) + gammaln(c + 1) + gammaln(d + 1) + gammaln(n + 1))\n",
+    "    )\n",
+    "\n",
+    "def calculate_table_prob(a, b, c, d):\n",
+    "    return float(np.exp(_log_table_prob(int(a), int(b), int(c), int(d))))\n",
+    "\n",
+    "def arange_at_margins(r, c):\n",
+    "    amin = max(0, r[0] - c[1])\n",
+    "    amax = min(r[0], c[0])\n",
+    "    return np.arange(amin, amax + 1, dtype=np.int64)\n",
+    "\n",
+    "def fisher_p_value(table, eps: float = 1e-12):\n",
+    "    mat = np.asarray(table, dtype=np.int64)\n",
+    "    if mat.shape != (2, 2) or np.any(mat < 0):\n",
+    "        raise ValueError(\"table must be a 2x2 array of nonnegative counts\")\n",
+    "\n",
+    "    a, b, c_, d = map(int, mat.ravel())\n",
+    "    r = mat.sum(axis=1)  # [r1, r2]\n",
+    "    c = mat.sum(axis=0)  # [c1, c2]\n",
+    "\n",
+    "    a_vals = arange_at_margins(r, c)\n",
+    "\n",
+    "    n = int(r[0] + r[1])\n",
+    "    const = (\n",
+    "        gammaln(int(r[0]) + 1) + gammaln(int(r[1]) + 1)\n",
+    "        + gammaln(int(c[0]) + 1) + gammaln(int(c[1]) + 1)\n",
+    "        - gammaln(n + 1)\n",
+    "    )\n",
+    "\n",
+    "    b_vals = r[0] - a_vals\n",
+    "    c_vals = c[0] - a_vals\n",
+    "    d_vals = r[1] - c_vals\n",
+    "\n",
+    "    logp = const - (\n",
+    "        gammaln(a_vals + 1) + gammaln(b_vals + 1)\n",
+    "        + gammaln(c_vals + 1) + gammaln(d_vals + 1)\n",
+    "    )\n",
+    "\n",
+    "    logp_obs = _log_table_prob(a, b, c_, d)\n",
+    "    observed_p = float(np.exp(logp_obs))\n",
+    "\n",
+    "    mask = logp <= (logp_obs + np.log1p(eps))\n",
+    "    two_sided_p = float(np.exp(logp[mask]).sum())\n",
+    "\n",
+    "    return observed_p, two_sided_p"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3b5109a3-acf0-4285-8cfd-227074cfb015",
+   "metadata": {},
+   "source": [
+    "## Check"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "9d0b1232-ff1a-41da-965e-7f27ed61cd77",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.01048951048951043, 0.11013986013986005, 0.33041958041957936, 0.3671328671328668, 0.1573426573426565, 0.023601398601398604, 0.0008741258741258679]\n",
+      "0.9999999999999976\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check \n",
+    "table = [[8, 2], [1, 5]]\n",
+    "\n",
+    "# margins\n",
+    "r = np.sum(table, axis = 1)\n",
+    "c = np.sum(table, axis = 0)\n",
+    "\n",
+    "# values of a\n",
+    "avals = arange_mat_margins(r, c)\n",
+    "\n",
+    "probs = [prob_mat_margins(x, r, c) for x in avals]\n",
+    "\n",
+    "print(probs)\n",
+    "print(sum(probs))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "218076bd-89d8-4305-8656-41cde67de1b4",
+   "metadata": {},
+   "source": [
+    "## Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "5b861593-4786-4b00-bdce-da1710b68af1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Odds Ratio: 20.0000\n",
+      "P-value: 0.0350\n",
+      "\n",
+      "Result is statistically significant (Reject H0)\n",
+      "\n",
+      "Observed Table Probability: 0.0236\n",
+      "Two-Sided P-Value: 0.0350\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 2x2 Contingency Table\n",
+    "#             Success | Failure\n",
+    "# Treatment A:   8    |    2\n",
+    "# Treatment B:   1    |    5\n",
+    "table = [[8, 2], [1, 5]]\n",
+    "\n",
+    "# Perform the test\n",
+    "# returns the odds ratio and the p-value\n",
+    "odds_ratio, p_value = scipy.stats.fisher_exact(table)\n",
+    "\n",
+    "print(f\"Odds Ratio: {odds_ratio:.4f}\")\n",
+    "print(f\"P-value: {p_value:.4f}\")\n",
+    "\n",
+    "# Interpretation\n",
+    "alpha = 0.05\n",
+    "if p_value < alpha:\n",
+    "    print(\"\\nResult is statistically significant (Reject H0)\")\n",
+    "else:\n",
+    "    print(\"\\nResult is not statistically significant (Fail to reject H0)\")\n",
+    "\n",
+    "obs_p, final_p = fisher_p_value(table)\n",
+    "print(f\"\\nObserved Table Probability: {obs_p:.4f}\")\n",
+    "print(f\"Two-Sided P-Value: {final_p:.4f}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 335 - 0
prentice_criteria.ipynb


BIN
refs/Algorithm643_6497.214326.pdf


BIN
refs/Baker2018.pdf


BIN
refs/Buyse1998.pdf


BIN
refs/Fisher1922.pdf


BIN
refs/Fisher_Exact_Tests_L07.pdf


BIN
refs/Fisher_Exact_Tests_supp21.pdf


BIN
refs/Freedman1992.pdf


BIN
refs/Martin_Notes.jpg


BIN
refs/Prentice1989.pdf


BIN
refs/Raunig2015.pdf


BIN
refs/Robert Presentation_1.pdf


BIN
refs/Wang2002.pdf


+ 94 - 0
src/utils.py

@@ -0,0 +1,94 @@
+import pandas as pd
+import numpy as np
+from scipy.stats import chi2_contingency, fisher_exact
+
+def read_data(filename, cols_in, cols_out, logscale = False):
+    
+    df_data = pd.read_excel(filename)
+
+    df_work = df_data[cols_in].rename(columns=dict(zip(cols_in, cols_out)))
+    df_work['Z'] = df_work['Z'].map({'PEMBRO': 0, 'IPI+NIVO': 1, 'IPI': 0, 'NIVO': 0})
+    
+    if logscale: df_work['S']  = np.log(df_work['S'])
+    
+    return df_data, df_work
+
+def tz_analysis(
+    df: pd.DataFrame,
+    t_col: str = "T",
+    z_col: str = "Z",
+    alpha: float = 0.05,
+    chi2_yates: bool = False
+) -> dict:
+    
+    # Drop missing and enforce 0/1
+    d = df[[t_col, z_col]].dropna()
+    T = d[t_col].astype(int)
+    Z = d[z_col].astype(int)
+    
+    if not set(T.unique()).issubset({0, 1}) or not set(Z.unique()).issubset({0, 1}):
+        raise ValueError("T and Z must be binary (0/1 or bool).")
+
+    # Counts F[t,z] with explicit order t=0,1 and z=0,1
+    # https://en.wikipedia.org/wiki/Contingency_table
+    F_df = pd.crosstab(T, Z).reindex(index=[0, 1], columns=[0, 1], fill_value=0)
+    F = F_df.to_numpy(dtype=int)
+    N = int(F.sum())
+    if N == 0:
+        raise ValueError("No data after dropping missing values.")
+
+    # Empirical joint/marginals from your computation
+    P = F / N                 # P(T,Z)
+    PT = P.sum(axis=1)        # P(T)
+    PZ = P.sum(axis=0)        # P(Z)
+
+    # Conditionals P(T|Z) as 2x2: rows t, cols z
+    # P(T=t | Z=z) = P(T=t,Z=z)/P(Z=z)
+    with np.errstate(divide="ignore", invalid="ignore"):
+        PT_given_Z = P / PZ   # broadcast over columns
+        PT_given_Z[:, PZ == 0] = np.nan
+
+    # Global tests of independence
+    # https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
+    # https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity
+    
+    #chi2_yates = np.any(F.ravel() < 5)
+    chi2, p_chi2, dof, expected = chi2_contingency(F, correction=chi2_yates)
+
+    # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
+    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.fisher_exact.html
+    # https://docs.scipy.org/doc/scipy/tutorial/stats/hypothesis_fisher_exact.html
+    # odds ratio 
+    #   OR = (P[1,1] P[0,0])/(P[1,0] P[0,1]) = O(Z=0)/O(Z=1) 
+    # with odds 
+    #   O(Z) = P(T=0|Z)/P(T=1|Z)
+    # Note: 
+    #   OR = 1: No association; the odds of the outcome are the same in both groups (Null Hypothesis).
+    #   'two-sided': Tests if the odds ratio is simply not 1
+
+    odds_ratio, p_fisher = fisher_exact(F, alternative="two-sided")
+
+    return {
+        "meta": {"t_col": t_col, "z_col": z_col, "alpha": float(alpha)},
+        "counts": {"F": F, "N": N},
+        "probabilities": {
+            "P_TZ": P,                 # 2x2 array [t,z]
+            "P_T": PT,                 # length-2 [t]
+            "P_Z": PZ,                 # length-2 [z]
+            "P_T_given_Z": PT_given_Z, # 2x2 array [t,z]
+        },
+        "global_tests": {
+            "chi2": {
+                "stat": float(chi2),
+                "dof": int(dof),
+                "pvalue": float(p_chi2),
+                "expected": expected.astype(float),
+                "reject_alpha": bool(p_chi2 < alpha),
+            },
+            "fisher": {
+                "odds_ratio": float(odds_ratio),   
+                "pvalue": float(p_fisher),
+                "reject_alpha": bool(p_fisher < alpha),
+            },
+        },
+    }

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.