feat(src): add confusion matrix plotting and label percentage calculation

This commit is contained in:
nuluh
2025-06-24 14:06:56 +07:00
parent 114ab849b9
commit 5041ee3feb

View File

@@ -155,7 +155,7 @@ def train_and_evaluate_model(
except Exception as e: except Exception as e:
result["error"] = f"Training error: {str(e)}" result["error"] = f"Training error: {str(e)}"
return result return result
def plot_confusion_matrix(results_sensor, x_test, y_test): def plot_confusion_matrix(results_sensor, y_test):
""" """
Plot confusion matrices for each model in results_sensor1. Plot confusion matrices for each model in results_sensor1.
@@ -186,14 +186,78 @@ def plot_confusion_matrix(results_sensor, x_test, y_test):
# Iterate through each model result and plot confusion matrix # Iterate through each model result and plot confusion matrix
for i in results_sensor: for i in results_sensor:
model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib") model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib")
y_pred = model.predict(x_test) cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray
cm = confusion_matrix(y_test, y_pred) # -> ndarray
# get the class labels # get the class labels
labels = model.classes_ labels = model.classes_
# Plot # Plot
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues) # You can change colormap disp.plot(cmap=plt.cm.Blues) # You can change colormap
plt.title(f"{i['model']} {i['sensor']} Test") plt.title(f"{i['model']} {i['sensor']} Test")
plt.show() plt.show()
def calculate_label_percentages(labels):
"""
Calculate and print the percentage distribution of unique labels in a numpy array.
Parameters:
labels (np.array): Input array of labels.
Returns:
None
"""
# Count occurrences of each unique label
unique, counts = np.unique(labels, return_counts=True)
# Calculate percentages
percentages = (counts / len(labels)) * 100
# Build and print the result string
result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)])
return print(result)
def inference_model(
models, raw_file, column_question: int = None
):
"""
Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid.
Parameters
----------
model : dict with some exported model path
The trained machine learning model to use for inference.
x_test : array-like or pandas.DataFrame
The input samples for which predictions are to be made.
export : str, optional
Directory path where the predictions should be saved. If None, predictions won't be saved.
Returns
-------
np.ndarray
Array of predicted values.
Example
-------
>>> from sklearn.svm import SVC
>>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"}
>>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1)
"""
df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
col_idx = []
for i in range(1,6):
idx = [i, i+5, i+10, i+15, i+20, i+25]
col_idx.append(idx)
vibration_data = df.iloc[:, column_question].values
# Perform STFT
from scipy.signal import stft, hann
freq, times, Zxx = stft(
vibration_data,
fs=1024,
window=hann(1024),
nperseg=1024,
noverlap=512
)
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
model = load(models) # Load the model from the provided path
return calculate_label_percentages(model.predict(data))