From 5b0b3dd4e59cae392ff8da7440dfe515879de286 Mon Sep 17 00:00:00 2001
From: nuluh <dam.ar@outlook.com>
Date: Thu, 24 Apr 2025 16:13:50 +0700
Subject: [PATCH] feat(notebook): Add evaluation metrics and confusion matrix
 visualizations for model predictions on Dataset B. Remove commented-out code
 and integrate data preparation using create_ready_data function.

---
 code/notebooks/stft.ipynb | 144 ++++++++++++++++++++++----------------
 1 file changed, 85 insertions(+), 59 deletions(-)

diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb
index 41137d9..7a86841 100644
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
@@ -815,58 +815,6 @@
     "import matplotlib.pyplot as plt"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def spectograph(data_dir: str):\n",
-    "    # print(os.listdir(data_dir))\n",
-    "    for damage in os.listdir(data_dir):\n",
-    "        # print(damage)\n",
-    "        d = os.path.join(data_dir, damage)\n",
-    "        # print(d)\n",
-    "        for file in os.listdir(d):\n",
-    "            # print(file)\n",
-    "            f = os.path.join(d, file)\n",
-    "            print(f)\n",
-    "            # sensor1 = pd.read_csv(f, skiprows=1, sep=';')\n",
-    "            # sensor2 = pd.read_csv(f, skiprows=1, sep=';')\n",
-    "\n",
-    "            # df1 = pd.DataFrame()\n",
-    "\n",
-    "            # df1['s1'] = sensor1[sensor1.columns[-1]]\n",
-    "            # df1['s2'] = sensor2[sensor2.columns[-1]]\n",
-    "            # # Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n",
-    "\n",
-    "            # plt.plot(df1['s2'], label='sensor 2')\n",
-    "            # plt.plot(df1['s1'], label='sensor 1')\n",
-    "            # plt.xlabel(\"Number of samples\")\n",
-    "            # plt.ylabel(\"Amplitude\")\n",
-    "            # plt.title(\"Raw vibration signal\")\n",
-    "            # plt.legend()\n",
-    "            # plt.show()\n",
-    "\n",
-    "            # from scipy import signal\n",
-    "            # from scipy.signal.windows import hann\n",
-    "\n",
-    "            # vibration_data = df1['s1']\n",
-    "\n",
-    "            # # Applying STFT\n",
-    "            # window_size = 1024\n",
-    "            # hop_size = 512\n",
-    "            # window = hann(window_size)  # Creating a Hanning window\n",
-    "            # frequencies, times, Zxx = signal.stft(vibration_data, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
-    "\n",
-    "            # # Plotting the STFT Data\n",
-    "            # plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n",
-    "            # plt.title(f'STFT Magnitude for case 1 signal sensor 1 ')\n",
-    "            # plt.ylabel('Frequency [Hz]')\n",
-    "            # plt.xlabel('Time [sec]')\n",
-    "            # plt.show()"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -896,7 +844,22 @@
    "source": [
     "from sklearn.metrics import accuracy_score, classification_report\n",
     "# 4. Validate on Dataset B\n",
-    "y_pred = svm_model.predict(X1b)\n",
+    "y_pred_svm = svm_model.predict(X1b)\n",
+    "\n",
+    "# 5. Evaluate\n",
+    "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
+    "print(classification_report(y, y_pred_svm))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import accuracy_score, classification_report\n",
+    "# 4. Validate on Dataset B\n",
+    "y_pred = rf_model2.predict(X2b)\n",
     "\n",
     "# 5. Evaluate\n",
     "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred))\n",
@@ -909,13 +872,76 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.metrics import accuracy_score, classification_report\n",
-    "# 4. Validate on Dataset B\n",
-    "y_pred = svm_model2.predict(X2b)\n",
+    "y_predict = svm_model2.predict(X2b.iloc[[5312],:])\n",
+    "print(y_predict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y[5312]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Confusion Matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
     "\n",
-    "# 5. Evaluate\n",
-    "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred))\n",
-    "print(classification_report(y, y_pred))"
+    "\n",
+    "cm = confusion_matrix(y, y_pred_svm) # -> ndarray\n",
+    "\n",
+    "# get the class labels\n",
+    "labels = svm_model.classes_\n",
+    "\n",
+    "# Plot\n",
+    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
+    "disp.plot(cmap=plt.cm.Blues)  # You can change colormap\n",
+    "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Self-test CM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1. Predict sensor 1 on Dataset A\n",
+    "y_train_pred = svm_model.predict(x_train1)\n",
+    "\n",
+    "# 2. Import confusion matrix tools\n",
+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# 3. Create and plot confusion matrix\n",
+    "cm_train = confusion_matrix(y_train, y_train_pred)\n",
+    "labels = svm_model.classes_\n",
+    "\n",
+    "disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n",
+    "disp.plot(cmap=plt.cm.Blues)\n",
+    "plt.title(\"Confusion Matrix: Train & Test on Dataset A\")\n",
+    "plt.show()\n"
    ]
   }
  ],