prentice_analysis.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import pandas as pd
  2. import numpy as np
  3. import plotly.graph_objects as go
  4. from plotly.subplots import make_subplots
  5. import statsmodels.api as sm
  6. from sklearn.metrics import roc_auc_score
  7. from scipy.io import loadmat
  8. # ============================================================
  9. # CONFIGURATION SECTION (Customize Data Paths and Column Names)
  10. # ============================================================
  11. # Replace these with your actual data file paths
  12. suv_data_path = 'path_to_suv_data.xlsx' # Update with the path to your SUV data (Excel)
  13. flags_data_path = 'path_to_flags_data.mat' # Update with the path to your AE flags data (MAT)
  14. # Column names for your dataset (Modify based on your data)
  15. treatment_column = 'Treatment' # Treatment type (Monotherapy, Combo)
  16. suv_column = 'SUV_percentile' # SUV percentiles column
  17. ae_flag_column = 'Lung_AE_Flag' # AE Flag column (0 or 1)
  18. percentile_column = 'percentile' # Percentile column (5, 10, 15, ..., 95)
  19. # ============================================================
  20. # DATA IMPORT SECTION: Load Data
  21. # ============================================================
  22. # Load SUV data (Adjust this if you are using another format, like CSV)
  23. suv_data = pd.read_excel(suv_data_path)
  24. # Load AE flags data from MAT file
  25. flags_data = loadmat(flags_data_path)
  26. ae_flags = flags_data['Lung_AE_Flag'] # Ensure this is the correct key in your MAT file
  27. # ============================================================
  28. # Step 1: Treatment Effect (C1) - β_Z (Logistic Regression)
  29. # ============================================================
  30. def calculate_treatment_effect(data, treatment_column, ae_flag_column):
  31. """
  32. Calculate Treatment Effect (C1) using logistic regression.
  33. """
  34. X = sm.add_constant(data[treatment_column]) # Add intercept term
  35. y = data[ae_flag_column] # Response variable (AE flag)
  36. model = sm.Logit(y, X).fit(disp=False) # Logistic regression
  37. beta_Z = model.params[treatment_column] # Treatment effect (coefficient)
  38. p_value_C1 = model.pvalues[treatment_column] # p-value for treatment effect
  39. return beta_Z, p_value_C1
  40. # Example usage for Treatment Effect (C1)
  41. beta_Z, p_value_C1 = calculate_treatment_effect(suv_data, treatment_column, ae_flag_column)
  42. # ============================================================
  43. # Step 2: Treatment Effect on Biomarker (C2) - α_Z
  44. # ============================================================
  45. def calculate_treatment_on_biomarker(data, treatment_column, suv_column):
  46. """
  47. Calculate Treatment Effect on Biomarker (C2) using linear regression.
  48. """
  49. X = sm.add_constant(data[treatment_column]) # Add intercept term
  50. y = data[suv_column] # SUV values as response
  51. model = sm.OLS(y, X).fit(disp=False) # OLS regression
  52. alpha_Z = model.params[treatment_column] # Treatment effect on the biomarker
  53. p_value_C2 = model.pvalues[treatment_column] # p-value for treatment effect on biomarker
  54. return alpha_Z, p_value_C2
  55. # Example usage for Treatment Effect on Biomarker (C2)
  56. alpha_Z, p_value_C2 = calculate_treatment_on_biomarker(suv_data, treatment_column, suv_column)
  57. # ============================================================
  58. # Step 3: Surrogate Effect (C3) - η_S
  59. # ============================================================
  60. def calculate_surrogate_effect(data, suv_column, ae_flag_column):
  61. """
  62. Calculate the Surrogate Effect (C3) using logistic regression.
  63. """
  64. X = sm.add_constant(data[suv_column]) # Add intercept term
  65. y = data[ae_flag_column] # AE flag as response
  66. model = sm.Logit(y, X).fit(disp=False) # Logistic regression
  67. eta_S = model.params[suv_column] # Surrogate effect (coefficient)
  68. p_value_C3 = model.pvalues[suv_column] # p-value for surrogate effect
  69. return eta_S, p_value_C3
  70. # Example usage for Surrogate Effect (C3)
  71. eta_S, p_value_C3 = calculate_surrogate_effect(suv_data, suv_column, ae_flag_column)
  72. # ============================================================
  73. # Step 4: AUC Calculation (C3)
  74. # ============================================================
  75. def calculate_auc(data, suv_column, ae_flag_column):
  76. """
  77. Calculate AUC for Criterion 3 (C3).
  78. """
  79. auc = roc_auc_score(data[ae_flag_column], data[suv_column]) # AUC calculation
  80. return auc
  81. # Example usage for AUC (C3)
  82. auc_C3 = calculate_auc(suv_data, suv_column, ae_flag_column)
  83. # ============================================================
  84. # Step 5: PE Risk Calculation (C4)
  85. # ============================================================
  86. def calculate_pe_risk(data, treatment_column, suv_column, ae_flag_column):
  87. """
  88. Calculate PE Risk for Criterion 4.
  89. """
  90. X = sm.add_constant(data[suv_column]) # Add intercept term
  91. y = data[ae_flag_column] # AE flag as response
  92. model = sm.Logit(y, X).fit(disp=False) # Logistic regression
  93. PE_risk = model.params[suv_column] # PE Risk calculation (coefficient)
  94. return PE_risk
  95. # Example usage for PE Risk (C4)
  96. PE_risk_C4 = calculate_pe_risk(suv_data, treatment_column, suv_column, ae_flag_column)
  97. # ============================================================
  98. # Step 6: Creating a Clean Table with All Results
  99. # ============================================================
  100. # Combine the results into a DataFrame for easy visualization
  101. results = {
  102. "Percentile": suv_data[percentile_column],
  103. "Treatment Effect (C1)": [beta_Z] * len(suv_data),
  104. "p-value (C1)": [p_value_C1] * len(suv_data),
  105. "Treatment Effect (C2)": [alpha_Z] * len(suv_data),
  106. "p-value (C2)": [p_value_C2] * len(suv_data),
  107. "Surrogate Effect (C3)": [eta_S] * len(suv_data),
  108. "p-value (C3)": [p_value_C3] * len(suv_data),
  109. "AUC (C3)": [auc_C3] * len(suv_data),
  110. "PE Risk (C4)": [PE_risk_C4] * len(suv_data)
  111. }
  112. results_df = pd.DataFrame(results)
  113. # ============================================================
  114. # Step 7: Visualize Results using Plotly (Interactive Plot)
  115. # ============================================================
  116. def create_interactive_plot():
  117. """
  118. Create an interactive plot to visualize Treatment Effects, Surrogate Effects, AUC, and PE Risk across percentiles.
  119. """
  120. # Create a figure with subplots
  121. fig = make_subplots(rows=1, cols=1, subplot_titles=("Treatment Effects and Metrics"))
  122. # Plot the results
  123. fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=alpha_Z, mode="lines+markers", name="Treatment Effect on Biomarker (C2)"))
  124. fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=eta_S, mode="lines+markers", name="Surrogate Effect (C3)"))
  125. fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=auc_C3, mode="lines+markers", name="AUC (C3)"))
  126. fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=PE_risk_C4, mode="lines+markers", name="PE Risk (C4)"))
  127. # Update layout
  128. fig.update_layout(
  129. title="Comparison of Treatment Effects, Surrogate Effects, AUC, and PE Risk Across Percentiles",
  130. xaxis_title="SUV Percentile",
  131. yaxis_title="Metric Value",
  132. width=1000,
  133. height=600
  134. )
  135. fig.show()
  136. # Call the function to visualize the plot
  137. create_interactive_plot()
  138. # ============================================================
  139. # Export Results to CSV and Excel
  140. # ============================================================
  141. results_df.to_csv('results_table.csv', index=False) # Save table as CSV
  142. results_df.to_excel('results_table.xlsx', index=False) # Save table as Excel