From b1be3a8b6fb54c17f3ff948ce053361f3e9fee70 Mon Sep 17 00:00:00 2001 From: nuluh Date: Thu, 16 Oct 2025 12:12:34 +0700 Subject: [PATCH] feat(src): add grid result summary function to export DataFrame to LaTeX --- code/src/representation.py | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 code/src/representation.py diff --git a/code/src/representation.py b/code/src/representation.py new file mode 100644 index 0000000..b39c2f6 --- /dev/null +++ b/code/src/representation.py @@ -0,0 +1,68 @@ +import pandas as pd +import numpy as np + +def grid_result_summary(df: pd.DataFrame, output_path: str, include_efficiency: bool = True, verbose=False) -> None: + """ + Export a DataFrame to a LaTeX file with optional inclusion of the efficiency column. + + Parameters: + ----------- + df : pd.DataFrame + The DataFrame to export. + output_path : str + The file path where the LaTeX file will be saved. + include_efficiency : bool, optional + Whether to include the efficiency column (E_i) in the output. Default is True. + + Returns: + -------- + None + """ + # Define the columns to include + columns: list[str] = [ + "param_pca__n_components", + "param_svc__C", + "param_svc__gamma", + "mean_test_score", + "mean_fit_time" + ] + # Filter DataFrame to include only the best mean_test_score for each param_pca__n_components + df = df.loc[df.groupby('param_pca__n_components')['mean_test_score'].idxmax()][columns] + + # Convert C and gamma to log2 scale + df['param_svc__C'] = np.log2(df['param_svc__C']).astype(int) + df['param_svc__gamma'] = np.log2(df['param_svc__gamma']).astype(int) + + if include_efficiency: + columns.append("time_score_ratio") + df['time_score_ratio'] = df['mean_test_score'] / df['mean_fit_time'] * 1e3 + + # Rename columns for LaTeX formatting + column_names = { + "param_pca__n_components": r"$n_{\text{components}}$", + "param_svc__C": r"$C (\log_2)$", + "param_svc__gamma": r"$\gamma (\log_2)$", + "mean_test_score": r"$S_i$", + "mean_fit_time": r"$T_i$", + "time_score_ratio": r"$E_i (\times 10^{-3})$" + } + + # Filter and rename columns + df_filtered = df[columns].rename(columns=column_names) + + # Export to LaTeX + latex_table = df_filtered.to_latex( + index=False, + float_format="%.5f", + column_format="cccccc" if include_efficiency else "ccccc", + # caption="Best hyperparameters for each PCA component size.", + # label="tab:best_hyperparams" + ) + + # Save to file + with open(output_path, "w") as f: + f.write(latex_table) + + if verbose: + print(latex_table) + print(f"LaTeX table exported to '{output_path}'") \ No newline at end of file