horvat
/
prentice_criteria


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
							import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from sklearn.metrics import roc_auc_score
from scipy.io import loadmat

# ============================================================
# CONFIGURATION SECTION (Customize Data Paths and Column Names)
# ============================================================
# Replace these with your actual data file paths
suv_data_path = 'path_to_suv_data.xlsx'  # Update with the path to your SUV data (Excel)
flags_data_path = 'path_to_flags_data.mat'  # Update with the path to your AE flags data (MAT)

# Column names for your dataset (Modify based on your data)
treatment_column = 'Treatment'  # Treatment type (Monotherapy, Combo)
suv_column = 'SUV_percentile'  # SUV percentiles column
ae_flag_column = 'Lung_AE_Flag'  # AE Flag column (0 or 1)
percentile_column = 'percentile'  # Percentile column (5, 10, 15, ..., 95)

# ============================================================
# DATA IMPORT SECTION: Load Data
# ============================================================
# Load SUV data (Adjust this if you are using another format, like CSV)
suv_data = pd.read_excel(suv_data_path)

# Load AE flags data from MAT file
flags_data = loadmat(flags_data_path)
ae_flags = flags_data['Lung_AE_Flag']  # Ensure this is the correct key in your MAT file

# ============================================================
# Step 1: Treatment Effect (C1) - β_Z (Logistic Regression)
# ============================================================
def calculate_treatment_effect(data, treatment_column, ae_flag_column):
    """
    Calculate Treatment Effect (C1) using logistic regression.
    """
    X = sm.add_constant(data[treatment_column])  # Add intercept term
    y = data[ae_flag_column]  # Response variable (AE flag)
    model = sm.Logit(y, X).fit(disp=False)  # Logistic regression
    beta_Z = model.params[treatment_column]  # Treatment effect (coefficient)
    p_value_C1 = model.pvalues[treatment_column]  # p-value for treatment effect
    return beta_Z, p_value_C1

# Example usage for Treatment Effect (C1)
beta_Z, p_value_C1 = calculate_treatment_effect(suv_data, treatment_column, ae_flag_column)

# ============================================================
# Step 2: Treatment Effect on Biomarker (C2) - α_Z
# ============================================================
def calculate_treatment_on_biomarker(data, treatment_column, suv_column):
    """
    Calculate Treatment Effect on Biomarker (C2) using linear regression.
    """
    X = sm.add_constant(data[treatment_column])  # Add intercept term
    y = data[suv_column]  # SUV values as response
    model = sm.OLS(y, X).fit(disp=False)  # OLS regression
    alpha_Z = model.params[treatment_column]  # Treatment effect on the biomarker
    p_value_C2 = model.pvalues[treatment_column]  # p-value for treatment effect on biomarker
    return alpha_Z, p_value_C2

# Example usage for Treatment Effect on Biomarker (C2)
alpha_Z, p_value_C2 = calculate_treatment_on_biomarker(suv_data, treatment_column, suv_column)

# ============================================================
# Step 3: Surrogate Effect (C3) - η_S
# ============================================================
def calculate_surrogate_effect(data, suv_column, ae_flag_column):
    """
    Calculate the Surrogate Effect (C3) using logistic regression.
    """
    X = sm.add_constant(data[suv_column])  # Add intercept term
    y = data[ae_flag_column]  # AE flag as response
    model = sm.Logit(y, X).fit(disp=False)  # Logistic regression
    eta_S = model.params[suv_column]  # Surrogate effect (coefficient)
    p_value_C3 = model.pvalues[suv_column]  # p-value for surrogate effect
    return eta_S, p_value_C3

# Example usage for Surrogate Effect (C3)
eta_S, p_value_C3 = calculate_surrogate_effect(suv_data, suv_column, ae_flag_column)

# ============================================================
# Step 4: AUC Calculation (C3)
# ============================================================
def calculate_auc(data, suv_column, ae_flag_column):
    """
    Calculate AUC for Criterion 3 (C3).
    """
    auc = roc_auc_score(data[ae_flag_column], data[suv_column])  # AUC calculation
    return auc

# Example usage for AUC (C3)
auc_C3 = calculate_auc(suv_data, suv_column, ae_flag_column)

# ============================================================
# Step 5: PE Risk Calculation (C4)
# ============================================================
def calculate_pe_risk(data, treatment_column, suv_column, ae_flag_column):
    """
    Calculate PE Risk for Criterion 4.
    """
    X = sm.add_constant(data[suv_column])  # Add intercept term
    y = data[ae_flag_column]  # AE flag as response
    model = sm.Logit(y, X).fit(disp=False)  # Logistic regression
    PE_risk = model.params[suv_column]  # PE Risk calculation (coefficient)
    return PE_risk

# Example usage for PE Risk (C4)
PE_risk_C4 = calculate_pe_risk(suv_data, treatment_column, suv_column, ae_flag_column)

# ============================================================
# Step 6: Creating a Clean Table with All Results
# ============================================================
# Combine the results into a DataFrame for easy visualization
results = {
    "Percentile": suv_data[percentile_column],
    "Treatment Effect (C1)": [beta_Z] * len(suv_data),
    "p-value (C1)": [p_value_C1] * len(suv_data),
    "Treatment Effect (C2)": [alpha_Z] * len(suv_data),
    "p-value (C2)": [p_value_C2] * len(suv_data),
    "Surrogate Effect (C3)": [eta_S] * len(suv_data),
    "p-value (C3)": [p_value_C3] * len(suv_data),
    "AUC (C3)": [auc_C3] * len(suv_data),
    "PE Risk (C4)": [PE_risk_C4] * len(suv_data)
}

results_df = pd.DataFrame(results)

# ============================================================
# Step 7: Visualize Results using Plotly (Interactive Plot)
# ============================================================
def create_interactive_plot():
    """
    Create an interactive plot to visualize Treatment Effects, Surrogate Effects, AUC, and PE Risk across percentiles.
    """
    # Create a figure with subplots
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Treatment Effects and Metrics"))

    # Plot the results
    fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=alpha_Z, mode="lines+markers", name="Treatment Effect on Biomarker (C2)"))
    fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=eta_S, mode="lines+markers", name="Surrogate Effect (C3)"))
    fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=auc_C3, mode="lines+markers", name="AUC (C3)"))
    fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=PE_risk_C4, mode="lines+markers", name="PE Risk (C4)"))

    # Update layout
    fig.update_layout(
        title="Comparison of Treatment Effects, Surrogate Effects, AUC, and PE Risk Across Percentiles",
        xaxis_title="SUV Percentile",
        yaxis_title="Metric Value",
        width=1000,
        height=600
    )
    
    fig.show()

# Call the function to visualize the plot
create_interactive_plot()

# ============================================================
# Export Results to CSV and Excel
# ============================================================
results_df.to_csv('results_table.csv', index=False)  # Save table as CSV
results_df.to_excel('results_table.xlsx', index=False)  # Save table as Excel