From 5041ee3feb44eb309097bf695ebca8f14fe8e52b Mon Sep 17 00:00:00 2001 From: nuluh Date: Tue, 24 Jun 2025 14:06:56 +0700 Subject: [PATCH] feat(src): add confusion matrix plotting and label percentage calculation --- code/src/ml/model_selection.py | 74 +++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/code/src/ml/model_selection.py b/code/src/ml/model_selection.py index 51c9f9b..afb80a4 100644 --- a/code/src/ml/model_selection.py +++ b/code/src/ml/model_selection.py @@ -155,7 +155,7 @@ def train_and_evaluate_model( except Exception as e: result["error"] = f"Training error: {str(e)}" return result -def plot_confusion_matrix(results_sensor, x_test, y_test): +def plot_confusion_matrix(results_sensor, y_test): """ Plot confusion matrices for each model in results_sensor1. @@ -186,14 +186,78 @@ def plot_confusion_matrix(results_sensor, x_test, y_test): # Iterate through each model result and plot confusion matrix for i in results_sensor: model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib") - y_pred = model.predict(x_test) - cm = confusion_matrix(y_test, y_pred) # -> ndarray + cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray # get the class labels labels = model.classes_ - # Plot disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp.plot(cmap=plt.cm.Blues) # You can change colormap plt.title(f"{i['model']} {i['sensor']} Test") - plt.show() \ No newline at end of file + plt.show() + +def calculate_label_percentages(labels): + """ + Calculate and print the percentage distribution of unique labels in a numpy array. + + Parameters: + labels (np.array): Input array of labels. + + Returns: + None + """ + # Count occurrences of each unique label + unique, counts = np.unique(labels, return_counts=True) + + # Calculate percentages + percentages = (counts / len(labels)) * 100 + + # Build and print the result string + result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)]) + return print(result) + +def inference_model( + models, raw_file, column_question: int = None +): + """ + Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid. + + Parameters + ---------- + model : dict with some exported model path + The trained machine learning model to use for inference. + x_test : array-like or pandas.DataFrame + The input samples for which predictions are to be made. + export : str, optional + Directory path where the predictions should be saved. If None, predictions won't be saved. + + Returns + ------- + np.ndarray + Array of predicted values. + + Example + ------- + >>> from sklearn.svm import SVC + >>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"} + >>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1) + """ + df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True) + col_idx = [] + for i in range(1,6): + idx = [i, i+5, i+10, i+15, i+20, i+25] + col_idx.append(idx) + vibration_data = df.iloc[:, column_question].values + # Perform STFT + from scipy.signal import stft, hann + freq, times, Zxx = stft( + vibration_data, + fs=1024, + window=hann(1024), + nperseg=1024, + noverlap=512 + ) + data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])]) + data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format + model = load(models) # Load the model from the provided path + return calculate_label_percentages(model.predict(data)) \ No newline at end of file