| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- import pandas as pd
- import numpy as np
- import plotly.graph_objects as go
- from plotly.subplots import make_subplots
- import statsmodels.api as sm
- from sklearn.metrics import roc_auc_score
- from scipy.io import loadmat
- # ============================================================
- # CONFIGURATION SECTION (Customize Data Paths and Column Names)
- # ============================================================
- # Replace these with your actual data file paths
- suv_data_path = 'path_to_suv_data.xlsx' # Update with the path to your SUV data (Excel)
- flags_data_path = 'path_to_flags_data.mat' # Update with the path to your AE flags data (MAT)
- # Column names for your dataset (Modify based on your data)
- treatment_column = 'Treatment' # Treatment type (Monotherapy, Combo)
- suv_column = 'SUV_percentile' # SUV percentiles column
- ae_flag_column = 'Lung_AE_Flag' # AE Flag column (0 or 1)
- percentile_column = 'percentile' # Percentile column (5, 10, 15, ..., 95)
- # ============================================================
- # DATA IMPORT SECTION: Load Data
- # ============================================================
- # Load SUV data (Adjust this if you are using another format, like CSV)
- suv_data = pd.read_excel(suv_data_path)
- # Load AE flags data from MAT file
- flags_data = loadmat(flags_data_path)
- ae_flags = flags_data['Lung_AE_Flag'] # Ensure this is the correct key in your MAT file
- # ============================================================
- # Step 1: Treatment Effect (C1) - β_Z (Logistic Regression)
- # ============================================================
- def calculate_treatment_effect(data, treatment_column, ae_flag_column):
- """
- Calculate Treatment Effect (C1) using logistic regression.
- """
- X = sm.add_constant(data[treatment_column]) # Add intercept term
- y = data[ae_flag_column] # Response variable (AE flag)
- model = sm.Logit(y, X).fit(disp=False) # Logistic regression
- beta_Z = model.params[treatment_column] # Treatment effect (coefficient)
- p_value_C1 = model.pvalues[treatment_column] # p-value for treatment effect
- return beta_Z, p_value_C1
- # Example usage for Treatment Effect (C1)
- beta_Z, p_value_C1 = calculate_treatment_effect(suv_data, treatment_column, ae_flag_column)
- # ============================================================
- # Step 2: Treatment Effect on Biomarker (C2) - α_Z
- # ============================================================
- def calculate_treatment_on_biomarker(data, treatment_column, suv_column):
- """
- Calculate Treatment Effect on Biomarker (C2) using linear regression.
- """
- X = sm.add_constant(data[treatment_column]) # Add intercept term
- y = data[suv_column] # SUV values as response
- model = sm.OLS(y, X).fit(disp=False) # OLS regression
- alpha_Z = model.params[treatment_column] # Treatment effect on the biomarker
- p_value_C2 = model.pvalues[treatment_column] # p-value for treatment effect on biomarker
- return alpha_Z, p_value_C2
- # Example usage for Treatment Effect on Biomarker (C2)
- alpha_Z, p_value_C2 = calculate_treatment_on_biomarker(suv_data, treatment_column, suv_column)
- # ============================================================
- # Step 3: Surrogate Effect (C3) - η_S
- # ============================================================
- def calculate_surrogate_effect(data, suv_column, ae_flag_column):
- """
- Calculate the Surrogate Effect (C3) using logistic regression.
- """
- X = sm.add_constant(data[suv_column]) # Add intercept term
- y = data[ae_flag_column] # AE flag as response
- model = sm.Logit(y, X).fit(disp=False) # Logistic regression
- eta_S = model.params[suv_column] # Surrogate effect (coefficient)
- p_value_C3 = model.pvalues[suv_column] # p-value for surrogate effect
- return eta_S, p_value_C3
- # Example usage for Surrogate Effect (C3)
- eta_S, p_value_C3 = calculate_surrogate_effect(suv_data, suv_column, ae_flag_column)
- # ============================================================
- # Step 4: AUC Calculation (C3)
- # ============================================================
- def calculate_auc(data, suv_column, ae_flag_column):
- """
- Calculate AUC for Criterion 3 (C3).
- """
- auc = roc_auc_score(data[ae_flag_column], data[suv_column]) # AUC calculation
- return auc
- # Example usage for AUC (C3)
- auc_C3 = calculate_auc(suv_data, suv_column, ae_flag_column)
- # ============================================================
- # Step 5: PE Risk Calculation (C4)
- # ============================================================
- def calculate_pe_risk(data, treatment_column, suv_column, ae_flag_column):
- """
- Calculate PE Risk for Criterion 4.
- """
- X = sm.add_constant(data[suv_column]) # Add intercept term
- y = data[ae_flag_column] # AE flag as response
- model = sm.Logit(y, X).fit(disp=False) # Logistic regression
- PE_risk = model.params[suv_column] # PE Risk calculation (coefficient)
- return PE_risk
- # Example usage for PE Risk (C4)
- PE_risk_C4 = calculate_pe_risk(suv_data, treatment_column, suv_column, ae_flag_column)
- # ============================================================
- # Step 6: Creating a Clean Table with All Results
- # ============================================================
- # Combine the results into a DataFrame for easy visualization
- results = {
- "Percentile": suv_data[percentile_column],
- "Treatment Effect (C1)": [beta_Z] * len(suv_data),
- "p-value (C1)": [p_value_C1] * len(suv_data),
- "Treatment Effect (C2)": [alpha_Z] * len(suv_data),
- "p-value (C2)": [p_value_C2] * len(suv_data),
- "Surrogate Effect (C3)": [eta_S] * len(suv_data),
- "p-value (C3)": [p_value_C3] * len(suv_data),
- "AUC (C3)": [auc_C3] * len(suv_data),
- "PE Risk (C4)": [PE_risk_C4] * len(suv_data)
- }
- results_df = pd.DataFrame(results)
- # ============================================================
- # Step 7: Visualize Results using Plotly (Interactive Plot)
- # ============================================================
- def create_interactive_plot():
- """
- Create an interactive plot to visualize Treatment Effects, Surrogate Effects, AUC, and PE Risk across percentiles.
- """
- # Create a figure with subplots
- fig = make_subplots(rows=1, cols=1, subplot_titles=("Treatment Effects and Metrics"))
- # Plot the results
- fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=alpha_Z, mode="lines+markers", name="Treatment Effect on Biomarker (C2)"))
- fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=eta_S, mode="lines+markers", name="Surrogate Effect (C3)"))
- fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=auc_C3, mode="lines+markers", name="AUC (C3)"))
- fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=PE_risk_C4, mode="lines+markers", name="PE Risk (C4)"))
- # Update layout
- fig.update_layout(
- title="Comparison of Treatment Effects, Surrogate Effects, AUC, and PE Risk Across Percentiles",
- xaxis_title="SUV Percentile",
- yaxis_title="Metric Value",
- width=1000,
- height=600
- )
-
- fig.show()
- # Call the function to visualize the plot
- create_interactive_plot()
- # ============================================================
- # Export Results to CSV and Excel
- # ============================================================
- results_df.to_csv('results_table.csv', index=False) # Save table as CSV
- results_df.to_excel('results_table.xlsx', index=False) # Save table as Excel
|