feat(src): add grid result summary function to export DataFrame to LaTeX
This commit is contained in:
68
code/src/representation.py
Normal file
68
code/src/representation.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
def grid_result_summary(df: pd.DataFrame, output_path: str, include_efficiency: bool = True, verbose=False) -> None:
|
||||
"""
|
||||
Export a DataFrame to a LaTeX file with optional inclusion of the efficiency column.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pd.DataFrame
|
||||
The DataFrame to export.
|
||||
output_path : str
|
||||
The file path where the LaTeX file will be saved.
|
||||
include_efficiency : bool, optional
|
||||
Whether to include the efficiency column (E_i) in the output. Default is True.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
None
|
||||
"""
|
||||
# Define the columns to include
|
||||
columns: list[str] = [
|
||||
"param_pca__n_components",
|
||||
"param_svc__C",
|
||||
"param_svc__gamma",
|
||||
"mean_test_score",
|
||||
"mean_fit_time"
|
||||
]
|
||||
# Filter DataFrame to include only the best mean_test_score for each param_pca__n_components
|
||||
df = df.loc[df.groupby('param_pca__n_components')['mean_test_score'].idxmax()][columns]
|
||||
|
||||
# Convert C and gamma to log2 scale
|
||||
df['param_svc__C'] = np.log2(df['param_svc__C']).astype(int)
|
||||
df['param_svc__gamma'] = np.log2(df['param_svc__gamma']).astype(int)
|
||||
|
||||
if include_efficiency:
|
||||
columns.append("time_score_ratio")
|
||||
df['time_score_ratio'] = df['mean_test_score'] / df['mean_fit_time'] * 1e3
|
||||
|
||||
# Rename columns for LaTeX formatting
|
||||
column_names = {
|
||||
"param_pca__n_components": r"$n_{\text{components}}$",
|
||||
"param_svc__C": r"$C (\log_2)$",
|
||||
"param_svc__gamma": r"$\gamma (\log_2)$",
|
||||
"mean_test_score": r"$S_i$",
|
||||
"mean_fit_time": r"$T_i$",
|
||||
"time_score_ratio": r"$E_i (\times 10^{-3})$"
|
||||
}
|
||||
|
||||
# Filter and rename columns
|
||||
df_filtered = df[columns].rename(columns=column_names)
|
||||
|
||||
# Export to LaTeX
|
||||
latex_table = df_filtered.to_latex(
|
||||
index=False,
|
||||
float_format="%.5f",
|
||||
column_format="cccccc" if include_efficiency else "ccccc",
|
||||
# caption="Best hyperparameters for each PCA component size.",
|
||||
# label="tab:best_hyperparams"
|
||||
)
|
||||
|
||||
# Save to file
|
||||
with open(output_path, "w") as f:
|
||||
f.write(latex_table)
|
||||
|
||||
if verbose:
|
||||
print(latex_table)
|
||||
print(f"LaTeX table exported to '{output_path}'")
|
||||
Reference in New Issue
Block a user