{ "cells": [ { "cell_type": "markdown", "id": "58d00ccd-7950-4367-aec8-2c18aef079d1", "metadata": {}, "source": [ "# Fisher exact tests\n", "\n", "
\n", "Author: Martin Horvat, March 2026\n", "\n", "Ref:\n", "* https://en.wikipedia.org/wiki/Fisher%27s_exact_test\n", "* https://mathworld.wolfram.com/FishersExactTest.html" ] }, { "cell_type": "markdown", "id": "9b24f6f9-4bf2-4751-8312-f011afd3422c", "metadata": {}, "source": [ "## Common" ] }, { "cell_type": "code", "execution_count": 18, "id": "362e9a29-2caf-4155-8d90-a591005b4b25", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from scipy.special import gammaln\n", "\n", "def _log_table_prob(a, b, c, d):\n", " r1, r2 = a + b, c + d\n", " c1, c2 = a + c, b + d\n", " n = r1 + r2\n", " return (\n", " gammaln(r1 + 1) + gammaln(r2 + 1) + gammaln(c1 + 1) + gammaln(c2 + 1)\n", " - (gammaln(a + 1) + gammaln(b + 1) + gammaln(c + 1) + gammaln(d + 1) + gammaln(n + 1))\n", " )\n", "\n", "def calculate_table_prob(a, b, c, d):\n", " return float(np.exp(_log_table_prob(int(a), int(b), int(c), int(d))))\n", "\n", "def arange_at_margins(r, c):\n", " amin = max(0, r[0] - c[1])\n", " amax = min(r[0], c[0])\n", " return np.arange(amin, amax + 1, dtype=np.int64)\n", "\n", "def fisher_p_value(table, eps: float = 1e-12):\n", " mat = np.asarray(table, dtype=np.int64)\n", " if mat.shape != (2, 2) or np.any(mat < 0):\n", " raise ValueError(\"table must be a 2x2 array of nonnegative counts\")\n", "\n", " a, b, c_, d = map(int, mat.ravel())\n", " r = mat.sum(axis=1) # [r1, r2]\n", " c = mat.sum(axis=0) # [c1, c2]\n", "\n", " a_vals = arange_at_margins(r, c)\n", "\n", " n = int(r[0] + r[1])\n", " const = (\n", " gammaln(int(r[0]) + 1) + gammaln(int(r[1]) + 1)\n", " + gammaln(int(c[0]) + 1) + gammaln(int(c[1]) + 1)\n", " - gammaln(n + 1)\n", " )\n", "\n", " b_vals = r[0] - a_vals\n", " c_vals = c[0] - a_vals\n", " d_vals = r[1] - c_vals\n", "\n", " logp = const - (\n", " gammaln(a_vals + 1) + gammaln(b_vals + 1)\n", " + gammaln(c_vals + 1) + gammaln(d_vals + 1)\n", " )\n", "\n", " logp_obs = _log_table_prob(a, b, c_, d)\n", " observed_p = float(np.exp(logp_obs))\n", "\n", " mask = logp <= (logp_obs + np.log1p(eps))\n", " two_sided_p = float(np.exp(logp[mask]).sum())\n", "\n", " return observed_p, two_sided_p" ] }, { "cell_type": "markdown", "id": "3b5109a3-acf0-4285-8cfd-227074cfb015", "metadata": {}, "source": [ "## Check" ] }, { "cell_type": "code", "execution_count": 16, "id": "9d0b1232-ff1a-41da-965e-7f27ed61cd77", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.01048951048951043, 0.11013986013986005, 0.33041958041957936, 0.3671328671328668, 0.1573426573426565, 0.023601398601398604, 0.0008741258741258679]\n", "0.9999999999999976\n" ] } ], "source": [ "# Check \n", "table = [[8, 2], [1, 5]]\n", "\n", "# margins\n", "r = np.sum(table, axis = 1)\n", "c = np.sum(table, axis = 0)\n", "\n", "# values of a\n", "avals = arange_mat_margins(r, c)\n", "\n", "probs = [prob_mat_margins(x, r, c) for x in avals]\n", "\n", "print(probs)\n", "print(sum(probs))" ] }, { "cell_type": "markdown", "id": "218076bd-89d8-4305-8656-41cde67de1b4", "metadata": {}, "source": [ "## Example" ] }, { "cell_type": "code", "execution_count": 19, "id": "5b861593-4786-4b00-bdce-da1710b68af1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Odds Ratio: 20.0000\n", "P-value: 0.0350\n", "\n", "Result is statistically significant (Reject H0)\n", "\n", "Observed Table Probability: 0.0236\n", "Two-Sided P-Value: 0.0350\n" ] } ], "source": [ "# 2x2 Contingency Table\n", "# Success | Failure\n", "# Treatment A: 8 | 2\n", "# Treatment B: 1 | 5\n", "table = [[8, 2], [1, 5]]\n", "\n", "# Perform the test\n", "# returns the odds ratio and the p-value\n", "odds_ratio, p_value = scipy.stats.fisher_exact(table)\n", "\n", "print(f\"Odds Ratio: {odds_ratio:.4f}\")\n", "print(f\"P-value: {p_value:.4f}\")\n", "\n", "# Interpretation\n", "alpha = 0.05\n", "if p_value < alpha:\n", " print(\"\\nResult is statistically significant (Reject H0)\")\n", "else:\n", " print(\"\\nResult is not statistically significant (Fail to reject H0)\")\n", "\n", "obs_p, final_p = fisher_p_value(table)\n", "print(f\"\\nObserved Table Probability: {obs_p:.4f}\")\n", "print(f\"Two-Sided P-Value: {final_p:.4f}\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }