From 7a7b2a41af19583b4a87702ce67ee3f62a16adfc Mon Sep 17 00:00:00 2001
From: nuluh <dam.ar@outlook.com>
Date: Tue, 26 Aug 2025 20:13:41 +0700
Subject: [PATCH] fix(notebooks): update variable names for clarity and add
 timing evaluation for model predictions on Dataset B

---
 code/notebooks/stft.ipynb | 152 +++++++++++++++++++++++++-------------
 1 file changed, 100 insertions(+), 52 deletions(-)

diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb
index 03c4e5f..6e9e48d 100644
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
@@ -287,8 +287,8 @@
    "source": [
     "# Define output directories for each sensor exported data\n",
     "output_dirs = {\n",
-    "    'sensor1': os.path.join(damage_base_path, 'sensor1'),\n",
-    "    'sensor2': os.path.join(damage_base_path, 'sensor2')\n",
+    "    'sensorA': os.path.join(damage_base_path, 'sensorA'),\n",
+    "    'sensorB': os.path.join(damage_base_path, 'sensorB')\n",
     "}"
    ]
   },
@@ -305,7 +305,7 @@
     "\n",
     "with multiprocessing.Pool() as pool:\n",
     "    # Process each DAMAGE_X case in parallel\n",
-    "    pool.map(process_damage_case, range(num_damage_cases), Fs, window_size, hop_size, output_dirs)"
+    "    pool.map(process_damage_case, range(num_damage_cases))"
    ]
   },
   {
@@ -789,8 +789,8 @@
    "source": [
     "from src.ml.model_selection import create_ready_data\n",
     "\n",
-    "X1b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor1') # sensor A\n",
-    "X2b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor2') # sensor B"
+    "X1b, y1 = create_ready_data('D:/thesis/data/converted/raw_B/sensor1') # sensor A\n",
+    "X2b, y2 = create_ready_data('D:/thesis/data/converted/raw_B/sensor2') # sensor B"
    ]
   },
   {
@@ -807,6 +807,17 @@
     "#### Sensor A"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rename first column using proper pandas method\n",
+    "X1b = X1b.rename(columns={X1b.columns[0]: \"Freq_0.00\"})\n",
+    "X2b = X2b.rename(columns={X2b.columns[0]: \"Freq_0.00\"})"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -815,8 +826,27 @@
    "source": [
     "# 4. Sensor A Validate on Dataset B\n",
     "from joblib import load\n",
-    "svm_model = load('D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib')\n",
-    "y_pred_svm_1 = svm_model.predict_proba(X1b)"
+    "from sklearn.svm import SVC\n",
+    "svm_model: SVC = load('D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "time_taken = np.array([])\n",
+    "for i in range(5):  # Run multiple times to get an average time\n",
+    "    start_time = time.time()\n",
+    "    y_pred_svm_1 = svm_model.predict(X1b)\n",
+    "    end_time = time.time()\n",
+    "    time_taken = np.append(time_taken, end_time - start_time)\n",
+    "\n",
+    "print(time_taken)\n",
+    "print(time_taken.mean())\n"
    ]
   },
   {
@@ -828,9 +858,7 @@
     "import numpy as np\n",
     "\n",
     "# Set NumPy to display full decimal values\n",
-    "np.set_printoptions(suppress=True, precision=6)  # Suppress scientific notation, set precision to 6 decimals\n",
-    "\n",
-    "y_pred_svm_1[1]"
+    "np.set_printoptions(suppress=True, precision=6)  # Suppress scientific notation, set precision to 6 decimals"
    ]
   },
   {
@@ -842,39 +870,14 @@
     "from sklearn.metrics import accuracy_score, classification_report\n",
     "\n",
     "# 5. Evaluate\n",
-    "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_1))\n",
-    "print(classification_report(y, y_pred_svm_1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Confusion Matrix Sensor A"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
+    "print(\"Accuracy on Dataset B:\", accuracy_score(y1, y_pred_svm_1))\n",
+    "df = pd.DataFrame(classification_report(y1, y_pred_svm_1, output_dict=True)).T\n",
+    "# Round numbers nicely and move 'accuracy' into a row that fits your desired layout\n",
+    "df_rounded = df.round(2)\n",
     "\n",
-    "# Create a figure with subplots\n",
-    "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
-    "\n",
-    "# Calculate confusion matrix\n",
-    "cm_A = confusion_matrix(y, y_pred_svm_1)\n",
-    "\n",
-    "# Get class labels\n",
-    "labels = svm_model.classes_\n",
-    "\n",
-    "# Plot confusion matrix in first subplot\n",
-    "disp_A = ConfusionMatrixDisplay(confusion_matrix=cm_A, display_labels=labels)\n",
-    "disp_A.plot(ax=axes[0], cmap=plt.cm.Blues)\n",
-    "axes[0].set_title(\"Sensor A\")"
+    "# Export to LaTeX\n",
+    "latex_table = df_rounded.to_latex(index=True, float_format=\"%.2f\", caption=\"Classification report on Dataset B\", label=\"tab:clf_report_auto\")\n",
+    "print(latex_table)"
    ]
   },
   {
@@ -892,11 +895,19 @@
    "source": [
     "# svm_model = load('D:/thesis/models/sensor2/SVM.joblib')\n",
     "svm_model = load('D:/thesis/models/sensor2/SVM with StandardScaler and PCA.joblib')\n",
-    "y_pred_svm_2 = svm_model.predict(X2b)\n",
-    "\n",
+    "y_pred_svm_2 = svm_model.predict(X2b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "# 5. Evaluate\n",
+    "import pandas as pd\n",
     "\n",
-    "df = pd.DataFrame(classification_report(y, y_pred_svm_2, output_dict=True)).T\n",
+    "df = pd.DataFrame(classification_report(y2, y_pred_svm_2, output_dict=True)).T\n",
     "# Round numbers nicely and move 'accuracy' into a row that fits your desired layout\n",
     "df_rounded = df.round(2)\n",
     "\n",
@@ -920,17 +931,54 @@
    "source": [
     "import matplotlib.pyplot as plt\n",
     "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
+    "import numpy as np\n",
     "\n",
+    "# Create a fresh figure with subplots\n",
+    "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
     "\n",
-    "cm = confusion_matrix(y, y_pred_svm_2) # -> ndarray\n",
+    "# Plot confusion matrix for Sensor A\n",
+    "cm_A = confusion_matrix(y, y_pred_svm_1)\n",
+    "disp_A = ConfusionMatrixDisplay(confusion_matrix=cm_A, display_labels=labels)\n",
+    "disp_A.plot(ax=axes[0], cmap=plt.cm.Blues)\n",
+    "axes[0].set_title(\"Sensor A\")\n",
     "\n",
-    "# get the class labels\n",
-    "labels = svm_model.classes_\n",
+    "# Plot confusion matrix for Sensor B\n",
+    "cm_B = confusion_matrix(y, y_pred_svm_2)\n",
+    "disp_B = ConfusionMatrixDisplay(confusion_matrix=cm_B, display_labels=labels)\n",
+    "disp_B.plot(ax=axes[1], cmap=plt.cm.Blues)\n",
+    "axes[1].set_title(\"Sensor B\")\n",
     "\n",
-    "# Plot\n",
-    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
-    "disp.plot(cmap=plt.cm.Blues)  # You can change colormap\n",
-    "plt.title(\"Confusion Matrix of Sensor B Test on Dataset B\")\n",
+    "# Find and modify colorbars to show max values\n",
+    "# The colorbars are typically the 3rd and 4th axes in the figure\n",
+    "for i, (cbar_idx, cm) in enumerate(zip([2, 3], [cm_A, cm_B])):\n",
+    "    if cbar_idx < len(fig.axes):\n",
+    "        cbar_ax = fig.axes[cbar_idx]\n",
+    "        \n",
+    "        # Get max value from the confusion matrix\n",
+    "        max_val = cm.max()\n",
+    "        \n",
+    "        # Create a new set of ticks with reasonable spacing and ending with max_val\n",
+    "        # For example, if max is around 2560, create ticks: [0, 500, 1000, 1500, 2000, 2560]\n",
+    "        tick_interval = 500\n",
+    "        new_ticks = list(range(0, int(max_val), tick_interval))\n",
+    "        if np.isclose(new_ticks[-1], max_val, rtol=0.05):\n",
+    "            new_ticks[-1] = max_val \n",
+    "        else:\n",
+    "            new_ticks.extend([max_val])\n",
+    "        # Set the new ticks\n",
+    "        cbar_ax.set_yticks(new_ticks)\n",
+    "        \n",
+    "        # Format tick labels as integers\n",
+    "        # cbar_ax.set_yticklabels([f\"{int(t)}\" if t.is_integer() else f\"{t:.1f}\" for t in new_ticks])\n",
+    "\n",
+    "# Set SVG font rendering for better PDF output\n",
+    "plt.rcParams['svg.fonttype'] = 'none'\n",
+    "\n",
+    "# Adjust layout\n",
+    "plt.tight_layout()\n",
+    "\n",
+    "# Save and show\n",
+    "plt.savefig(\"output.svg\")\n",
     "plt.show()"
    ]
   },