|
|
@@ -1,118 +1,164 @@
|
|
|
-import numpy as np
|
|
|
-import pandas as pd
|
|
|
-from scipy.io import loadmat
|
|
|
-import statsmodels.api as sm
|
|
|
-
|
|
|
-
|
|
|
-def load_prentice_mat(flags_path, suv_path):
|
|
|
- flags_mat = loadmat(flags_path)
|
|
|
- suv_mat = loadmat(suv_path)
|
|
|
-
|
|
|
- patients_raw = flags_mat["patients"]
|
|
|
- patient_ids = [str(patients_raw[i, 0].squeeze()) for i in range(patients_raw.shape[0])]
|
|
|
-
|
|
|
- flags = flags_mat["flags"]
|
|
|
- days = flags_mat["days"]
|
|
|
- lung = suv_mat["lung_SUVperc_COMBINED"]
|
|
|
-
|
|
|
- return {
|
|
|
- "patient_ids": patient_ids,
|
|
|
- "flags": flags,
|
|
|
- "days": days,
|
|
|
- "lung": lung,
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-def extract_lung_percentile_max(lung_array, percentile):
|
|
|
- idx = int(percentile) - 1
|
|
|
- x = lung_array[:, :, idx]
|
|
|
- x = np.where(np.isnan(x), -np.inf, x)
|
|
|
- x_max = np.max(x, axis=1)
|
|
|
- x_max[np.isneginf(x_max)] = np.nan
|
|
|
- return x_max
|
|
|
-
|
|
|
-
|
|
|
-def build_analysis_df_from_mat(data, treatment_col, ae_col, percentiles):
|
|
|
- df = pd.DataFrame({
|
|
|
- "Patient_ID": data["patient_ids"],
|
|
|
- "Treatment_raw": data["flags"][:, treatment_col],
|
|
|
- "AE_raw": data["flags"][:, ae_col],
|
|
|
- })
|
|
|
-
|
|
|
- for p in percentiles:
|
|
|
- df[f"Lung_SUV_{p}_max"] = extract_lung_percentile_max(data["lung"], p)
|
|
|
-
|
|
|
- return df
|
|
|
-
|
|
|
-
|
|
|
-def collapse_treatment_to_two_groups(series, mapping):
|
|
|
- return series.map(mapping)
|
|
|
-
|
|
|
-
|
|
|
-def add_constant_safe(x):
|
|
|
- return sm.add_constant(x, has_constant="add")
|
|
|
-
|
|
|
-
|
|
|
-def fit_logit(y, x):
|
|
|
- return sm.Logit(y, add_constant_safe(x)).fit(disp=0)
|
|
|
-
|
|
|
-
|
|
|
-def fit_ols(y, x):
|
|
|
- return sm.OLS(y, add_constant_safe(x)).fit()
|
|
|
-
|
|
|
-
|
|
|
-def null_logit_llf(y):
|
|
|
- return sm.Logit(y, np.ones((len(y), 1))).fit(disp=0).llf
|
|
|
-
|
|
|
-
|
|
|
-def run_prentice_single(df, biomarker_col, treatment_col="Treatment", ae_col="AE", alpha=0.05):
|
|
|
- dfx = df[[treatment_col, ae_col, biomarker_col]].dropna().copy()
|
|
|
-
|
|
|
- Z = dfx[treatment_col].astype(float).values
|
|
|
- T = dfx[ae_col].astype(float).values
|
|
|
- S = dfx[biomarker_col].astype(float).values
|
|
|
-
|
|
|
- m1 = fit_logit(T, Z)
|
|
|
- m2 = fit_ols(S, Z)
|
|
|
- m3 = fit_logit(T, S)
|
|
|
- m4 = fit_logit(T, pd.DataFrame({"Z": Z, "S": S}))
|
|
|
-
|
|
|
- ll_aa = m1.llf
|
|
|
- ll_ab = m4.llf
|
|
|
- ll_bb = null_logit_llf(T)
|
|
|
- pe = (ll_aa - ll_ab) / (ll_aa - ll_bb) if (ll_aa - ll_bb) != 0 else np.nan
|
|
|
-
|
|
|
- p_aa = m1.predict(add_constant_safe(Z))
|
|
|
- p_ab = m4.predict(add_constant_safe(pd.DataFrame({"Z": Z, "S": S})))
|
|
|
- p_bb = np.repeat(np.mean(T), len(T))
|
|
|
-
|
|
|
- aa = np.mean((T - p_aa) ** 2)
|
|
|
- ab = np.mean((T - p_ab) ** 2)
|
|
|
- bb = np.mean((T - p_bb) ** 2)
|
|
|
- pe_risk = (aa - ab) / (aa - bb) if (aa - bb) != 0 else np.nan
|
|
|
-
|
|
|
- return {
|
|
|
- "biomarker": biomarker_col,
|
|
|
- "C1_p": m1.pvalues[1],
|
|
|
- "C2_p": m2.pvalues[1],
|
|
|
- "C3_p": m3.pvalues[1],
|
|
|
- "C4_Z_p": m4.pvalues[1],
|
|
|
- "C4_S_p": m4.pvalues[2],
|
|
|
- "PE": pe,
|
|
|
- "PE_risk": pe_risk,
|
|
|
- "C1": m1.pvalues[1] < alpha,
|
|
|
- "C2": m2.pvalues[1] < alpha,
|
|
|
- "C3": m3.pvalues[1] < alpha,
|
|
|
- "C4": (m4.pvalues[2] < alpha) and (m4.pvalues[1] > alpha),
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-def run_prentice_multiple(df, biomarker_cols, treatment_col="Treatment", ae_col="AE", alpha=0.05):
|
|
|
- rows = []
|
|
|
- for col in biomarker_cols:
|
|
|
- try:
|
|
|
- out = run_prentice_single(df, col, treatment_col=treatment_col, ae_col=ae_col, alpha=alpha)
|
|
|
- except Exception as e:
|
|
|
- out = {"biomarker": col, "error": str(e)}
|
|
|
- rows.append(out)
|
|
|
- return pd.DataFrame(rows)
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+import plotly.graph_objects as go
|
|
|
+from plotly.subplots import make_subplots
|
|
|
+import statsmodels.api as sm
|
|
|
+from sklearn.metrics import roc_auc_score
|
|
|
+from scipy.io import loadmat
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# CONFIGURATION SECTION (Customize Data Paths and Column Names)
|
|
|
+# ============================================================
|
|
|
+# Replace these with your actual data file paths
|
|
|
+suv_data_path = 'path_to_suv_data.xlsx' # Update with the path to your SUV data (Excel)
|
|
|
+flags_data_path = 'path_to_flags_data.mat' # Update with the path to your AE flags data (MAT)
|
|
|
+
|
|
|
+# Column names for your dataset (Modify based on your data)
|
|
|
+treatment_column = 'Treatment' # Treatment type (Monotherapy, Combo)
|
|
|
+suv_column = 'SUV_percentile' # SUV percentiles column
|
|
|
+ae_flag_column = 'Lung_AE_Flag' # AE Flag column (0 or 1)
|
|
|
+percentile_column = 'percentile' # Percentile column (5, 10, 15, ..., 95)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# DATA IMPORT SECTION: Load Data
|
|
|
+# ============================================================
|
|
|
+# Load SUV data (Adjust this if you are using another format, like CSV)
|
|
|
+suv_data = pd.read_excel(suv_data_path)
|
|
|
+
|
|
|
+# Load AE flags data from MAT file
|
|
|
+flags_data = loadmat(flags_data_path)
|
|
|
+ae_flags = flags_data['Lung_AE_Flag'] # Ensure this is the correct key in your MAT file
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 1: Treatment Effect (C1) - β_Z (Logistic Regression)
|
|
|
+# ============================================================
|
|
|
+def calculate_treatment_effect(data, treatment_column, ae_flag_column):
|
|
|
+ """
|
|
|
+ Calculate Treatment Effect (C1) using logistic regression.
|
|
|
+ """
|
|
|
+ X = sm.add_constant(data[treatment_column]) # Add intercept term
|
|
|
+ y = data[ae_flag_column] # Response variable (AE flag)
|
|
|
+ model = sm.Logit(y, X).fit(disp=False) # Logistic regression
|
|
|
+ beta_Z = model.params[treatment_column] # Treatment effect (coefficient)
|
|
|
+ p_value_C1 = model.pvalues[treatment_column] # p-value for treatment effect
|
|
|
+ return beta_Z, p_value_C1
|
|
|
+
|
|
|
+# Example usage for Treatment Effect (C1)
|
|
|
+beta_Z, p_value_C1 = calculate_treatment_effect(suv_data, treatment_column, ae_flag_column)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 2: Treatment Effect on Biomarker (C2) - α_Z
|
|
|
+# ============================================================
|
|
|
+def calculate_treatment_on_biomarker(data, treatment_column, suv_column):
|
|
|
+ """
|
|
|
+ Calculate Treatment Effect on Biomarker (C2) using linear regression.
|
|
|
+ """
|
|
|
+ X = sm.add_constant(data[treatment_column]) # Add intercept term
|
|
|
+ y = data[suv_column] # SUV values as response
|
|
|
+ model = sm.OLS(y, X).fit(disp=False) # OLS regression
|
|
|
+ alpha_Z = model.params[treatment_column] # Treatment effect on the biomarker
|
|
|
+ p_value_C2 = model.pvalues[treatment_column] # p-value for treatment effect on biomarker
|
|
|
+ return alpha_Z, p_value_C2
|
|
|
+
|
|
|
+# Example usage for Treatment Effect on Biomarker (C2)
|
|
|
+alpha_Z, p_value_C2 = calculate_treatment_on_biomarker(suv_data, treatment_column, suv_column)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 3: Surrogate Effect (C3) - η_S
|
|
|
+# ============================================================
|
|
|
+def calculate_surrogate_effect(data, suv_column, ae_flag_column):
|
|
|
+ """
|
|
|
+ Calculate the Surrogate Effect (C3) using logistic regression.
|
|
|
+ """
|
|
|
+ X = sm.add_constant(data[suv_column]) # Add intercept term
|
|
|
+ y = data[ae_flag_column] # AE flag as response
|
|
|
+ model = sm.Logit(y, X).fit(disp=False) # Logistic regression
|
|
|
+ eta_S = model.params[suv_column] # Surrogate effect (coefficient)
|
|
|
+ p_value_C3 = model.pvalues[suv_column] # p-value for surrogate effect
|
|
|
+ return eta_S, p_value_C3
|
|
|
+
|
|
|
+# Example usage for Surrogate Effect (C3)
|
|
|
+eta_S, p_value_C3 = calculate_surrogate_effect(suv_data, suv_column, ae_flag_column)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 4: AUC Calculation (C3)
|
|
|
+# ============================================================
|
|
|
+def calculate_auc(data, suv_column, ae_flag_column):
|
|
|
+ """
|
|
|
+ Calculate AUC for Criterion 3 (C3).
|
|
|
+ """
|
|
|
+ auc = roc_auc_score(data[ae_flag_column], data[suv_column]) # AUC calculation
|
|
|
+ return auc
|
|
|
+
|
|
|
+# Example usage for AUC (C3)
|
|
|
+auc_C3 = calculate_auc(suv_data, suv_column, ae_flag_column)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 5: PE Risk Calculation (C4)
|
|
|
+# ============================================================
|
|
|
+def calculate_pe_risk(data, treatment_column, suv_column, ae_flag_column):
|
|
|
+ """
|
|
|
+ Calculate PE Risk for Criterion 4.
|
|
|
+ """
|
|
|
+ X = sm.add_constant(data[suv_column]) # Add intercept term
|
|
|
+ y = data[ae_flag_column] # AE flag as response
|
|
|
+ model = sm.Logit(y, X).fit(disp=False) # Logistic regression
|
|
|
+ PE_risk = model.params[suv_column] # PE Risk calculation (coefficient)
|
|
|
+ return PE_risk
|
|
|
+
|
|
|
+# Example usage for PE Risk (C4)
|
|
|
+PE_risk_C4 = calculate_pe_risk(suv_data, treatment_column, suv_column, ae_flag_column)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 6: Creating a Clean Table with All Results
|
|
|
+# ============================================================
|
|
|
+# Combine the results into a DataFrame for easy visualization
|
|
|
+results = {
|
|
|
+ "Percentile": suv_data[percentile_column],
|
|
|
+ "Treatment Effect (C1)": [beta_Z] * len(suv_data),
|
|
|
+ "p-value (C1)": [p_value_C1] * len(suv_data),
|
|
|
+ "Treatment Effect (C2)": [alpha_Z] * len(suv_data),
|
|
|
+ "p-value (C2)": [p_value_C2] * len(suv_data),
|
|
|
+ "Surrogate Effect (C3)": [eta_S] * len(suv_data),
|
|
|
+ "p-value (C3)": [p_value_C3] * len(suv_data),
|
|
|
+ "AUC (C3)": [auc_C3] * len(suv_data),
|
|
|
+ "PE Risk (C4)": [PE_risk_C4] * len(suv_data)
|
|
|
+}
|
|
|
+
|
|
|
+results_df = pd.DataFrame(results)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Step 7: Visualize Results using Plotly (Interactive Plot)
|
|
|
+# ============================================================
|
|
|
+def create_interactive_plot():
|
|
|
+ """
|
|
|
+ Create an interactive plot to visualize Treatment Effects, Surrogate Effects, AUC, and PE Risk across percentiles.
|
|
|
+ """
|
|
|
+ # Create a figure with subplots
|
|
|
+ fig = make_subplots(rows=1, cols=1, subplot_titles=("Treatment Effects and Metrics"))
|
|
|
+
|
|
|
+ # Plot the results
|
|
|
+ fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=alpha_Z, mode="lines+markers", name="Treatment Effect on Biomarker (C2)"))
|
|
|
+ fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=eta_S, mode="lines+markers", name="Surrogate Effect (C3)"))
|
|
|
+ fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=auc_C3, mode="lines+markers", name="AUC (C3)"))
|
|
|
+ fig.add_trace(go.Scatter(x=suv_data[percentile_column], y=PE_risk_C4, mode="lines+markers", name="PE Risk (C4)"))
|
|
|
+
|
|
|
+ # Update layout
|
|
|
+ fig.update_layout(
|
|
|
+ title="Comparison of Treatment Effects, Surrogate Effects, AUC, and PE Risk Across Percentiles",
|
|
|
+ xaxis_title="SUV Percentile",
|
|
|
+ yaxis_title="Metric Value",
|
|
|
+ width=1000,
|
|
|
+ height=600
|
|
|
+ )
|
|
|
+
|
|
|
+ fig.show()
|
|
|
+
|
|
|
+# Call the function to visualize the plot
|
|
|
+create_interactive_plot()
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Export Results to CSV and Excel
|
|
|
+# ============================================================
|
|
|
+results_df.to_csv('results_table.csv', index=False) # Save table as CSV
|
|
|
+results_df.to_excel('results_table.xlsx', index=False) # Save table as Excel
|