feat(src): add confusion matrix plotting and label percentage calculation
This commit is contained in:
@@ -155,7 +155,7 @@ def train_and_evaluate_model(
|
||||
except Exception as e:
|
||||
result["error"] = f"Training error: {str(e)}"
|
||||
return result
|
||||
def plot_confusion_matrix(results_sensor, x_test, y_test):
|
||||
def plot_confusion_matrix(results_sensor, y_test):
|
||||
"""
|
||||
Plot confusion matrices for each model in results_sensor1.
|
||||
|
||||
@@ -186,14 +186,78 @@ def plot_confusion_matrix(results_sensor, x_test, y_test):
|
||||
# Iterate through each model result and plot confusion matrix
|
||||
for i in results_sensor:
|
||||
model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib")
|
||||
y_pred = model.predict(x_test)
|
||||
cm = confusion_matrix(y_test, y_pred) # -> ndarray
|
||||
cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray
|
||||
|
||||
# get the class labels
|
||||
labels = model.classes_
|
||||
|
||||
# Plot
|
||||
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
|
||||
disp.plot(cmap=plt.cm.Blues) # You can change colormap
|
||||
plt.title(f"{i['model']} {i['sensor']} Test")
|
||||
plt.show()
|
||||
plt.show()
|
||||
|
||||
def calculate_label_percentages(labels):
|
||||
"""
|
||||
Calculate and print the percentage distribution of unique labels in a numpy array.
|
||||
|
||||
Parameters:
|
||||
labels (np.array): Input array of labels.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
# Count occurrences of each unique label
|
||||
unique, counts = np.unique(labels, return_counts=True)
|
||||
|
||||
# Calculate percentages
|
||||
percentages = (counts / len(labels)) * 100
|
||||
|
||||
# Build and print the result string
|
||||
result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)])
|
||||
return print(result)
|
||||
|
||||
def inference_model(
|
||||
models, raw_file, column_question: int = None
|
||||
):
|
||||
"""
|
||||
Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : dict with some exported model path
|
||||
The trained machine learning model to use for inference.
|
||||
x_test : array-like or pandas.DataFrame
|
||||
The input samples for which predictions are to be made.
|
||||
export : str, optional
|
||||
Directory path where the predictions should be saved. If None, predictions won't be saved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
Array of predicted values.
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> from sklearn.svm import SVC
|
||||
>>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"}
|
||||
>>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1)
|
||||
"""
|
||||
df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
|
||||
col_idx = []
|
||||
for i in range(1,6):
|
||||
idx = [i, i+5, i+10, i+15, i+20, i+25]
|
||||
col_idx.append(idx)
|
||||
vibration_data = df.iloc[:, column_question].values
|
||||
# Perform STFT
|
||||
from scipy.signal import stft, hann
|
||||
freq, times, Zxx = stft(
|
||||
vibration_data,
|
||||
fs=1024,
|
||||
window=hann(1024),
|
||||
nperseg=1024,
|
||||
noverlap=512
|
||||
)
|
||||
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
|
||||
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
|
||||
model = load(models) # Load the model from the provided path
|
||||
return calculate_label_percentages(model.predict(data))
|
||||
Reference in New Issue
Block a user