[FEAT] Feat Include Undamaged Node Classification #98

Merged
nuluh merged 15 commits from feat/53-feat-include-undamaged-node-classification into dev 2025-06-18 02:06:04 +00:00
3 changed files with 217 additions and 38 deletions
Showing only changes of commit 4b0819f94e - Show all commits

View File

@@ -1,4 +1,7 @@
{ {
"python.analysis.extraPaths": ["./code/src/features"], "python.analysis.extraPaths": [
"./code/src/features",
"${workspaceFolder}/code/src"
],
"jupyter.notebookFileRoot": "${workspaceFolder}/code" "jupyter.notebookFileRoot": "${workspaceFolder}/code"
} }

View File

@@ -17,8 +17,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_1_TEST1_01.csv',sep=',')\n", "sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_0_TEST1_01.csv',sep=',')\n",
"sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_1_TEST1_02.csv',sep=',')" "sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_0_TEST1_02.csv',sep=',')"
] ]
}, },
{ {
@@ -101,13 +101,16 @@
"source": [ "source": [
"# Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n", "# Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n",
"\n", "\n",
"plt.plot(df1['s2'], label='sensor 2')\n", "plt.plot(df1['s2'], label='Sensor 1', color='C1', alpha=0.6)\n",
"plt.plot(df1['s1'], label='sensor 1', alpha=0.5)\n", "plt.plot(df1['s1'], label='Sensor 2', color='C0', alpha=0.6)\n",
"plt.xlabel(\"Number of samples\")\n", "plt.xlabel(\"Number of samples\")\n",
"plt.ylabel(\"Amplitude\")\n", "plt.ylabel(\"Amplitude\")\n",
"plt.title(\"Raw vibration signal\")\n", "plt.title(\"Raw vibration signal\")\n",
"plt.ylim(-7.5, 5)\n", "plt.ylim(-7.5, 5)\n",
"plt.legend()\n", "plt.legend()\n",
"plt.locator_params(axis='x', nbins=8)\n",
"plt.ylim(-1, 1) # Adjust range as needed\n",
"plt.grid(True, linestyle='--', alpha=0.5)\n",
"plt.show()" "plt.show()"
] ]
}, },
@@ -334,9 +337,44 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# len(ready_data1a)\n", "import numpy as np\n",
"# plt.pcolormesh(ready_data1[0])\n", "import matplotlib.pyplot as plt\n",
"ready_data1a[0].max().max()" "from mpl_toolkits.mplot3d import Axes3D\n",
"\n",
"# Assuming ready_data1a[0] is a DataFrame or 2D array\n",
"spectrogram_data = ready_data1a[0].values # Convert to NumPy array if it's a DataFrame\n",
"\n",
"# Get the dimensions of the spectrogram\n",
"num_frequencies, num_time_frames = spectrogram_data.shape\n",
"\n",
"# Create frequency and time arrays\n",
"frequencies = np.arange(num_frequencies) # Replace with actual frequency values if available\n",
"time_frames = np.arange(num_time_frames) # Replace with actual time values if available\n",
"\n",
"# Create a meshgrid for plotting\n",
"T, F = np.meshgrid(time_frames, frequencies)\n",
"\n",
"# Create a 3D plot\n",
"fig = plt.figure(figsize=(12, 8))\n",
"ax = fig.add_subplot(111, projection='3d')\n",
"\n",
"# Plot the surface\n",
"surf = ax.plot_surface(T, F, spectrogram_data, cmap='bwr', edgecolor='none')\n",
"\n",
"# Add labels and a color bar\n",
"ax.set_xlabel('Time Frames')\n",
"ax.set_ylabel('Frequency [Hz]')\n",
"ax.set_zlabel('Magnitude')\n",
"ax.set_title('3D Spectrogram')\n",
"# Resize the z-axis (shrink it)\n",
"z_min, z_max = 0, 0.1 # Replace with your desired range\n",
"ax.set_zlim(z_min, z_max)\n",
"ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1, 1, 0.5, 1])) # Shrink z-axis by 50%\n",
"ax.set_facecolor('white')\n",
"fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\n",
"\n",
"# Show the plot\n",
"plt.show()"
] ]
}, },
{ {
@@ -345,13 +383,32 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from cmcrameri import cm\n",
"# Create a figure and subplots\n",
"fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
"\n",
"# Flatten the axes array for easier iteration\n",
"axes = axes.flatten()\n",
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n", "for i in range(6):\n",
" plt.pcolormesh(ready_data1a[i], cmap=\"jet\", vmax=0.03, vmin=0.0)\n", " pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='bwr', vmax=0.03, vmin=0.0)\n",
" plt.colorbar() \n", " axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n",
" plt.title(f'STFT Magnitude for case {i} sensor 1')\n", "\n",
" plt.xlabel(f'Frequency [Hz]')\n", "# Add a single color bar for all subplots\n",
" plt.ylabel(f'Time [sec]')\n", "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
" plt.show()" "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
] ]
}, },
{ {
@@ -576,6 +633,16 @@
"X2a, y = create_ready_data('D:/thesis/data/converted/raw/sensor2')" "X2a, y = create_ready_data('D:/thesis/data/converted/raw/sensor2')"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X1a.iloc[-1,:]\n",
"# y[2565]"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -621,23 +688,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def train_and_evaluate_model(model, model_name, sensor_label, x_train, y_train, x_test, y_test):\n", "from src.ml.model_selection import train_and_evaluate_model\n",
" model.fit(x_train, y_train)\n", "from sklearn.svm import SVC\n",
" y_pred = model.predict(x_test)\n",
" accuracy = accuracy_score(y_test, y_pred) * 100\n",
" return {\n",
" \"model\": model_name,\n",
" \"sensor\": sensor_label,\n",
" \"accuracy\": accuracy\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define models for sensor1\n", "# Define models for sensor1\n",
"models_sensor1 = {\n", "models_sensor1 = {\n",
" # \"Random Forest\": RandomForestClassifier(),\n", " # \"Random Forest\": RandomForestClassifier(),\n",
@@ -646,12 +698,12 @@
" # \"KNN\": KNeighborsClassifier(),\n", " # \"KNN\": KNeighborsClassifier(),\n",
" # \"LDA\": LinearDiscriminantAnalysis(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n",
" \"SVM\": SVC(),\n", " \"SVM\": SVC(),\n",
" \"XGBoost\": XGBClassifier()\n", " # \"XGBoost\": XGBClassifier()\n",
"}\n", "}\n",
"\n", "\n",
"results_sensor1 = []\n", "results_sensor1 = []\n",
"for name, model in models_sensor1.items():\n", "for name, model in models_sensor1.items():\n",
" res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test)\n", " res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n",
" results_sensor1.append(res)\n", " results_sensor1.append(res)\n",
" print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n" " print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n"
] ]
@@ -669,12 +721,12 @@
" # \"KNN\": KNeighborsClassifier(),\n", " # \"KNN\": KNeighborsClassifier(),\n",
" # \"LDA\": LinearDiscriminantAnalysis(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n",
" \"SVM\": SVC(),\n", " \"SVM\": SVC(),\n",
" \"XGBoost\": XGBClassifier()\n", " # \"XGBoost\": XGBClassifier()\n",
"}\n", "}\n",
"\n", "\n",
"results_sensor2 = []\n", "results_sensor2 = []\n",
"for name, model in models_sensor2.items():\n", "for name, model in models_sensor2.items():\n",
" res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train, x_test2, y_test)\n", " res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train, x_test2, y_test, export='D:/thesis/models/sensor2')\n",
" results_sensor2.append(res)\n", " results_sensor2.append(res)\n",
" print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n" " print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n"
] ]
@@ -787,6 +839,8 @@
"source": [ "source": [
"from sklearn.metrics import accuracy_score, classification_report\n", "from sklearn.metrics import accuracy_score, classification_report\n",
"# 4. Validate on Dataset B\n", "# 4. Validate on Dataset B\n",
"from joblib import load\n",
"svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
"y_pred_svm = svm_model.predict(X1b)\n", "y_pred_svm = svm_model.predict(X1b)\n",
"\n", "\n",
"# 5. Evaluate\n", "# 5. Evaluate\n",
@@ -794,6 +848,30 @@
"print(classification_report(y, y_pred_svm))" "print(classification_report(y, y_pred_svm))"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model sensor 1 to predict sensor 2 data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score, classification_report\n",
"# 4. Validate on Dataset B\n",
"from joblib import load\n",
"svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
"y_pred_svm = svm_model.predict(X2b)\n",
"\n",
"# 5. Evaluate\n",
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
"print(classification_report(y, y_pred_svm))"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -853,7 +931,7 @@
"# Plot\n", "# Plot\n",
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
"disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n",
"plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n", "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B from Sensor2 readings\")\n",
"plt.show()" "plt.show()"
] ]
}, },
@@ -871,14 +949,14 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# 1. Predict sensor 1 on Dataset A\n", "# 1. Predict sensor 1 on Dataset A\n",
"y_train_pred = svm_model.predict(x_train1)\n", "y_test_pred = svm_model.predict(x_test1)\n",
"\n", "\n",
"# 2. Import confusion matrix tools\n", "# 2. Import confusion matrix tools\n",
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"\n", "\n",
"# 3. Create and plot confusion matrix\n", "# 3. Create and plot confusion matrix\n",
"cm_train = confusion_matrix(y_train, y_train_pred)\n", "cm_train = confusion_matrix(y_test, y_test_pred)\n",
"labels = svm_model.classes_\n", "labels = svm_model.classes_\n",
"\n", "\n",
"disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n", "disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n",

View File

@@ -55,3 +55,101 @@ def create_ready_data(
y = np.array([]) y = np.array([])
return X, y return X, y
def train_and_evaluate_model(
model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None
):
"""
Train a machine learning model, evaluate its performance, and optionally export it.
This function trains the provided model on the training data, evaluates its
performance on test data using accuracy score, and can save the trained model
to disk if an export path is provided.
Parameters
----------
model : estimator object
The machine learning model to train.
model_name : str
Name of the model, used for the export filename and in the returned results.
sensor_label : str
Label identifying which sensor's data the model is being trained on.
x_train : array-like or pandas.DataFrame
The training input samples.
y_train : array-like
The target values for training.
x_test : array-like or pandas.DataFrame
The test input samples.
y_test : array-like
The target values for testing.
export : str, optional
Directory path where the trained model should be saved. If None, model won't be saved.
Returns
-------
dict
Dictionary containing:
- 'model': model_name (str)
- 'sensor': sensor_label (str)
- 'accuracy': accuracy percentage (float)
Example
-------
>>> from sklearn.svm import SVC
>>> from sklearn.model_selection import train_test_split
>>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
>>> result = train_and_evaluate_model(
... SVC(),
... "SVM",
... "sensor1",
... X_train,
... y_train,
... X_test,
... y_test,
... export="models/sensor1"
... )
>>> print(f"Model accuracy: {result['accuracy']:.2f}%")
"""
from sklearn.metrics import accuracy_score
result = {"model": model_name, "sensor": sensor_label, "success": False}
try:
# Train the model
model.fit(x_train, y_train)
try:
y_pred = model.predict(x_test)
except Exception as e:
result["error"] = f"Prediction error: {str(e)}"
return result
# Calculate accuracy
try:
accuracy = accuracy_score(y_test, y_pred) * 100
result["accuracy"] = accuracy
except Exception as e:
result["error"] = f"Accuracy calculation error: {str(e)}"
return result
# Export model if requested
if export:
try:
import joblib
full_path = os.path.join(export, f"{model_name}.joblib")
os.makedirs(os.path.dirname(full_path), exist_ok=True)
joblib.dump(model, full_path)
print(f"Model saved to {full_path}")
except Exception as e:
print(f"Warning: Failed to export model to {export}: {str(e)}")
result["export_error"] = str(e)
# Continue despite export error
result["success"] = True
return result
except Exception as e:
result["error"] = f"Training error: {str(e)}"
return result