Files
thesis/code/src/representation.py

68 lines
2.2 KiB
Python

import pandas as pd
import numpy as np
def grid_result_summary(df: pd.DataFrame, output_path: str, include_efficiency: bool = True, verbose=False) -> None:
"""
Export a DataFrame to a LaTeX file with optional inclusion of the efficiency column.
Parameters:
-----------
df : pd.DataFrame
The DataFrame to export.
output_path : str
The file path where the LaTeX file will be saved.
include_efficiency : bool, optional
Whether to include the efficiency column (E_i) in the output. Default is True.
Returns:
--------
None
"""
# Define the columns to include
columns: list[str] = [
"param_pca__n_components",
"param_svc__C",
"param_svc__gamma",
"mean_test_score",
"mean_fit_time"
]
# Filter DataFrame to include only the best mean_test_score for each param_pca__n_components
df = df.loc[df.groupby('param_pca__n_components')['mean_test_score'].idxmax()][columns]
# Convert C and gamma to log2 scale
df['param_svc__C'] = np.log2(df['param_svc__C']).astype(int)
df['param_svc__gamma'] = np.log2(df['param_svc__gamma']).astype(int)
if include_efficiency:
columns.append("time_score_ratio")
df['time_score_ratio'] = df['mean_test_score'] / df['mean_fit_time'] * 1e3
# Rename columns for LaTeX formatting
column_names = {
"param_pca__n_components": r"$n_{\text{components}}$",
"param_svc__C": r"$C (\log_2)$",
"param_svc__gamma": r"$\gamma (\log_2)$",
"mean_test_score": r"$S_i$",
"mean_fit_time": r"$T_i$",
"time_score_ratio": r"$E_i (\times 10^{-3})$"
}
# Filter and rename columns
df_filtered = df[columns].rename(columns=column_names)
# Export to LaTeX
latex_table = df_filtered.to_latex(
index=False,
float_format="%.5f",
column_format="cccccc" if include_efficiency else "ccccc",
# caption="Best hyperparameters for each PCA component size.",
# label="tab:best_hyperparams"
)
# Save to file
with open(output_path, "w") as f:
f.write(latex_table)
if verbose:
print(latex_table)
print(f"LaTeX table exported to '{output_path}'")