import pandas as pd import numpy as np import plotly.graph_objects as go from plotly.subplots import make_subplots import statsmodels.api as sm from sklearn.metrics import roc_auc_score from scipy.io import loadmat # ============================================================ # CONFIGURATION SECTION (Customize Data Paths and Column Names) # ============================================================ # Replace these with your actual data file paths suv_data_path = 'path_to_suv_data.xlsx' # Update with the path to your SUV data (Excel) flags_data_path = 'path_to_flags_data.mat' # Update with the path to your AE flags data (MAT) # Column names for your dataset (Modify based on your data) treatment_column = 'Treatment' # Treatment type (Monotherapy, Combo) suv_column = 'SUV_percentile' # SUV percentiles column ae_flag_column = 'Lung_AE_Flag' # AE Flag column (0 or 1) percentile_column = 'percentile' # Percentile column (5, 10, 15, ..., 95) # ============================================================ # DATA IMPORT SECTION: Load Data # ============================================================ # Load SUV data (Adjust this if you are using another format, like CSV) suv_data = pd.read_excel(suv_data_path) # Load AE flags data from MAT file flags_data = loadmat(flags_data_path) ae_flags = flags_data['Lung_AE_Flag'] # Ensure this is the correct key in your MAT file # ============================================================ # Step 1: Treatment Effect (C1) - β_Z (Logistic Regression) # ============================================================ def calculate_treatment_effect(data, treatment_column, ae_flag_column): """ Calculate Treatment Effect (C1) using logistic regression. """ X = sm.add_constant(data[treatment_column]) # Add intercept term y = data[ae_flag_column] # Response variable (AE flag) model = sm.Logit(y, X).fit(disp=False) # Logistic regression beta_Z = model.params[treatment_column] # Treatment effect (coefficient) p_value_C1 = model.pvalues[treatment_column] # p-value for treatment effect return beta_Z, p_value_C1 # Example usage for Treatment Effect (C1) beta_Z, p_value_C1 = calculate_treatment_effect(suv_data, treatment_column, ae_flag_column) # ============================================================ # Step 2: Treatment Effect on Biomarker (C2) - α_Z # ============================================================ def calculate_treatment_on_biomarker(data, treatment_column, suv_column): """ Calculate Treatment Effect on Biomarker (C2) using linear regression. """ X = sm.add_constant(data[treatment_column]) # Add intercept term y = data[suv_column] # SUV values as response model = sm.OLS(y, X).fit(disp=False) # OLS regression alpha_Z = model.params[treatment_column] # Treatment effect on the biomarker p_value_C2 = model.pvalues[treatment_column] # p-value for treatment effect on biomarker return alpha_Z, p_value_C2 # Example usage for Treatment Effect on Biomarker (C2) alpha_Z, p_value_C2 = calculate_treatment_on_biomarker(suv_data, treatment_column, suv_column) # ============================================================ # Step 3: Surrogate Effect (C3) - η_S # ============================================================ def calculate_surrogate_effect(data, suv_column, ae_flag_column): """ Calculate the Surrogate Effect (C3) using logistic regression. """ X = sm.add_constant(data[suv_column]) # Add intercept term y = data[ae_flag_column] # AE flag as response model = sm.Logit(y, X).fit(disp=False) # Logistic regression eta_S = model.params[suv_column] # Surrogate effect (coefficient) p_value_C3 = model.pvalues[suv_column] # p-value for surrogate effect return eta_S, p_value_C3 # Example usage for Surrogate Effect (C3) eta_S, p_value_C3 = calculate_surrogate_effect(suv_data, suv_column, ae_flag_column) # ============================================================ # Step 4: AUC Calculation (C3) # ============================================================ def calculate_auc(data, suv_column, ae_flag_column): """ Calculate AUC for Criterion 3 (C3). """ auc = roc_auc_score(data[ae_flag_column], data[suv_column]) # AUC calculation return auc # Example usage for AUC (C3) auc_C3 = calculate_auc(suv_data, suv_column, ae_flag_column) # ============================================================ # Step 5: PE Risk Calculation (C4) # ============================================================ def calculate_pe_risk(data, treatment_column, suv_column, ae_flag_column): """ Calculate PE Risk for Criterion 4. """ X = sm.add_constant(data[suv_column]) # Add intercept term y = data[ae_flag_column] # AE flag as response model = sm.Logit(y, X).fit(disp=False) # Logistic regression PE_risk = model.params[suv_column] # PE Risk calculation (coefficient) return PE_risk # Example usage for PE Risk (C4) PE_risk_C4 = calculate_pe_risk(suv_data, treatment_column, suv_column, ae_flag_column) # ============================================================ # Step 6: Creating a Clean Table with All Results # ============================================================ # Combine the results into a DataFrame for easy visualization results = { "Percentile": suv_data[percentile_column], "Treatment Effect (C1)": [beta_Z] * len(suv_data), "p-value (C1)": [p_value_C1] * len(suv_data), "Treatment Effect (C2)": [alpha_Z] * len(suv_data), "p-value (C2)": [p_value_C2] * len(suv_data), "Surrogate Effect (C3)": [eta_S] * len(suv_data), "p-value (C3)": [p_value_C3] * len(suv_data), "AUC (C3)": [auc_C3] * len(suv_data), "PE Risk (C4)": [PE_risk_C4] * len(suv_data) } results_df = pd.DataFrame(results) # ============================================================ # Step 7: Visualize Results using Plotly (Interactive Plot) # ============================================================ def create_interactive_plot(): """ Create an interactive plot to visualize Treatment Effects, Surrogate Effects, AUC, and PE Risk across percentiles. """ # Create a figure with subplots fig = make_subplots(rows=1, cols=1, subplot_titles=("Treatment Effects and Metrics")) # Plot the results fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=alpha_Z, mode="lines+markers", name="Treatment Effect on Biomarker (C2)")) fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=eta_S, mode="lines+markers", name="Surrogate Effect (C3)")) fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=auc_C3, mode="lines+markers", name="AUC (C3)")) fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=PE_risk_C4, mode="lines+markers", name="PE Risk (C4)")) # Update layout fig.update_layout( title="Comparison of Treatment Effects, Surrogate Effects, AUC, and PE Risk Across Percentiles", xaxis_title="SUV Percentile", yaxis_title="Metric Value", width=1000, height=600 ) fig.show() # Call the function to visualize the plot create_interactive_plot() # ============================================================ # Export Results to CSV and Excel # ============================================================ results_df.to_csv('results_table.csv', index=False) # Save table as CSV results_df.to_excel('results_table.xlsx', index=False) # Save table as Excel