From 0e28ed6dd08812fd6ae110a46699bc5e2d23b25a Mon Sep 17 00:00:00 2001 From: nuluh Date: Mon, 28 Jul 2025 16:41:54 +0700 Subject: [PATCH] feat(notebooks): add cross-dataset validation for Sensor A and Sensor B models Closes #74 --- code/notebooks/stft.ipynb | 188 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index ceb9176..4510922 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -938,6 +938,194 @@ "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n", "print(classification_report(y, y_pred_svm))" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cross Dataset Validation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "# sensor A\n", + "x_train1, x_test1, y_train1, y_test1 = train_test_split(X1b, y, test_size=0.2, random_state=2)\n", + "# sensor B\n", + "x_train2, x_test2, y_train2, y_test2 = train_test_split(X2b, y, test_size=0.2, random_state=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results_sensor1 = []\n", + "for name, model in models_sensor1.items():\n", + " res = train_and_evaluate_model(model, name, \"Sensor A\", x_train1, y_train1, x_test1, y_test1, \n", + " export='D:/thesis/datasetB/models/Sensor A')\n", + " results_sensor1.append(res)\n", + " print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n", + "\n", + "# Display result\n", + "results_sensor1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results_sensor2 = []\n", + "for name, model in models_sensor2.items():\n", + " res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train2, x_test2, y_test2, \n", + " export='D:/thesis/datasetB/models/sensor2')\n", + " results_sensor2.append(res)\n", + " print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n", + "\n", + "# Display result\n", + "results_sensor2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Sensor A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 4. Sensor A Validate on Dataset A\n", + "from joblib import load\n", + "svm_model = load('D:/thesis/datasetB/models/sensor1/SVM.joblib')\n", + "y_pred_svm_1 = svm_model.predict(X1a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score, classification_report\n", + "\n", + "# 5. Evaluate\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_1))\n", + "print(classification_report(y, y_pred_svm_1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Confusion Matrix Sensor A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "\n", + "\n", + "cm = confusion_matrix(y, y_pred_svm_1) # -> ndarray\n", + "\n", + "# get the class labels\n", + "labels = svm_model.classes_\n", + "\n", + "# Plot\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", + "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", + "plt.title(\"Confusion Matrix of Sensor A Test on Dataset B\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Sensor B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "svm_model = load('D:/thesis/datasetB/models/sensor2/SVM.joblib')\n", + "# svm_model = load('D:/thesis/models/sensor2/SVM with StandardScaler and PCA.joblib')\n", + "y_pred_svm_2 = svm_model.predict(X2a)\n", + "\n", + "# 5. Evaluate\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_2))\n", + "print(classification_report(y, y_pred_svm_2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Confusion Matrix Sensor B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "\n", + "\n", + "cm = confusion_matrix(y, y_pred_svm_2) # -> ndarray\n", + "\n", + "# get the class labels\n", + "labels = svm_model.classes_\n", + "\n", + "# Plot\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", + "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", + "plt.title(\"Confusion Matrix of Sensor B Test on Dataset B\")\n", + "plt.show()" + ] } ], "metadata": {