From 5b0b3dd4e59cae392ff8da7440dfe515879de286 Mon Sep 17 00:00:00 2001 From: nuluh Date: Thu, 24 Apr 2025 16:13:50 +0700 Subject: [PATCH] feat(notebook): Add evaluation metrics and confusion matrix visualizations for model predictions on Dataset B. Remove commented-out code and integrate data preparation using create_ready_data function. --- code/notebooks/stft.ipynb | 144 ++++++++++++++++++++++---------------- 1 file changed, 85 insertions(+), 59 deletions(-) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index 41137d9..7a86841 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -815,58 +815,6 @@ "import matplotlib.pyplot as plt" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def spectograph(data_dir: str):\n", - " # print(os.listdir(data_dir))\n", - " for damage in os.listdir(data_dir):\n", - " # print(damage)\n", - " d = os.path.join(data_dir, damage)\n", - " # print(d)\n", - " for file in os.listdir(d):\n", - " # print(file)\n", - " f = os.path.join(d, file)\n", - " print(f)\n", - " # sensor1 = pd.read_csv(f, skiprows=1, sep=';')\n", - " # sensor2 = pd.read_csv(f, skiprows=1, sep=';')\n", - "\n", - " # df1 = pd.DataFrame()\n", - "\n", - " # df1['s1'] = sensor1[sensor1.columns[-1]]\n", - " # df1['s2'] = sensor2[sensor2.columns[-1]]\n", - " # # Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n", - "\n", - " # plt.plot(df1['s2'], label='sensor 2')\n", - " # plt.plot(df1['s1'], label='sensor 1')\n", - " # plt.xlabel(\"Number of samples\")\n", - " # plt.ylabel(\"Amplitude\")\n", - " # plt.title(\"Raw vibration signal\")\n", - " # plt.legend()\n", - " # plt.show()\n", - "\n", - " # from scipy import signal\n", - " # from scipy.signal.windows import hann\n", - "\n", - " # vibration_data = df1['s1']\n", - "\n", - " # # Applying STFT\n", - " # window_size = 1024\n", - " # hop_size = 512\n", - " # window = hann(window_size) # Creating a Hanning window\n", - " # frequencies, times, Zxx = signal.stft(vibration_data, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n", - "\n", - " # # Plotting the STFT Data\n", - " # plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n", - " # plt.title(f'STFT Magnitude for case 1 signal sensor 1 ')\n", - " # plt.ylabel('Frequency [Hz]')\n", - " # plt.xlabel('Time [sec]')\n", - " # plt.show()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -896,7 +844,22 @@ "source": [ "from sklearn.metrics import accuracy_score, classification_report\n", "# 4. Validate on Dataset B\n", - "y_pred = svm_model.predict(X1b)\n", + "y_pred_svm = svm_model.predict(X1b)\n", + "\n", + "# 5. Evaluate\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n", + "print(classification_report(y, y_pred_svm))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score, classification_report\n", + "# 4. Validate on Dataset B\n", + "y_pred = rf_model2.predict(X2b)\n", "\n", "# 5. Evaluate\n", "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred))\n", @@ -909,13 +872,76 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.metrics import accuracy_score, classification_report\n", - "# 4. Validate on Dataset B\n", - "y_pred = svm_model2.predict(X2b)\n", + "y_predict = svm_model2.predict(X2b.iloc[[5312],:])\n", + "print(y_predict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y[5312]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", "\n", - "# 5. Evaluate\n", - "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred))\n", - "print(classification_report(y, y_pred))" + "\n", + "cm = confusion_matrix(y, y_pred_svm) # -> ndarray\n", + "\n", + "# get the class labels\n", + "labels = svm_model.classes_\n", + "\n", + "# Plot\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", + "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", + "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Self-test CM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Predict sensor 1 on Dataset A\n", + "y_train_pred = svm_model.predict(x_train1)\n", + "\n", + "# 2. Import confusion matrix tools\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# 3. Create and plot confusion matrix\n", + "cm_train = confusion_matrix(y_train, y_train_pred)\n", + "labels = svm_model.classes_\n", + "\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n", + "disp.plot(cmap=plt.cm.Blues)\n", + "plt.title(\"Confusion Matrix: Train & Test on Dataset A\")\n", + "plt.show()\n" ] } ],