From 7a7b2a41af19583b4a87702ce67ee3f62a16adfc Mon Sep 17 00:00:00 2001 From: nuluh Date: Tue, 26 Aug 2025 20:13:41 +0700 Subject: [PATCH] fix(notebooks): update variable names for clarity and add timing evaluation for model predictions on Dataset B --- code/notebooks/stft.ipynb | 152 +++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 52 deletions(-) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index 03c4e5f..6e9e48d 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -287,8 +287,8 @@ "source": [ "# Define output directories for each sensor exported data\n", "output_dirs = {\n", - " 'sensor1': os.path.join(damage_base_path, 'sensor1'),\n", - " 'sensor2': os.path.join(damage_base_path, 'sensor2')\n", + " 'sensorA': os.path.join(damage_base_path, 'sensorA'),\n", + " 'sensorB': os.path.join(damage_base_path, 'sensorB')\n", "}" ] }, @@ -305,7 +305,7 @@ "\n", "with multiprocessing.Pool() as pool:\n", " # Process each DAMAGE_X case in parallel\n", - " pool.map(process_damage_case, range(num_damage_cases), Fs, window_size, hop_size, output_dirs)" + " pool.map(process_damage_case, range(num_damage_cases))" ] }, { @@ -789,8 +789,8 @@ "source": [ "from src.ml.model_selection import create_ready_data\n", "\n", - "X1b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor1') # sensor A\n", - "X2b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor2') # sensor B" + "X1b, y1 = create_ready_data('D:/thesis/data/converted/raw_B/sensor1') # sensor A\n", + "X2b, y2 = create_ready_data('D:/thesis/data/converted/raw_B/sensor2') # sensor B" ] }, { @@ -807,6 +807,17 @@ "#### Sensor A" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Rename first column using proper pandas method\n", + "X1b = X1b.rename(columns={X1b.columns[0]: \"Freq_0.00\"})\n", + "X2b = X2b.rename(columns={X2b.columns[0]: \"Freq_0.00\"})" + ] + }, { "cell_type": "code", "execution_count": null, @@ -815,8 +826,27 @@ "source": [ "# 4. Sensor A Validate on Dataset B\n", "from joblib import load\n", - "svm_model = load('D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib')\n", - "y_pred_svm_1 = svm_model.predict_proba(X1b)" + "from sklearn.svm import SVC\n", + "svm_model: SVC = load('D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "time_taken = np.array([])\n", + "for i in range(5): # Run multiple times to get an average time\n", + " start_time = time.time()\n", + " y_pred_svm_1 = svm_model.predict(X1b)\n", + " end_time = time.time()\n", + " time_taken = np.append(time_taken, end_time - start_time)\n", + "\n", + "print(time_taken)\n", + "print(time_taken.mean())\n" ] }, { @@ -828,9 +858,7 @@ "import numpy as np\n", "\n", "# Set NumPy to display full decimal values\n", - "np.set_printoptions(suppress=True, precision=6) # Suppress scientific notation, set precision to 6 decimals\n", - "\n", - "y_pred_svm_1[1]" + "np.set_printoptions(suppress=True, precision=6) # Suppress scientific notation, set precision to 6 decimals" ] }, { @@ -842,39 +870,14 @@ "from sklearn.metrics import accuracy_score, classification_report\n", "\n", "# 5. Evaluate\n", - "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_1))\n", - "print(classification_report(y, y_pred_svm_1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Confusion Matrix Sensor A" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y1, y_pred_svm_1))\n", + "df = pd.DataFrame(classification_report(y1, y_pred_svm_1, output_dict=True)).T\n", + "# Round numbers nicely and move 'accuracy' into a row that fits your desired layout\n", + "df_rounded = df.round(2)\n", "\n", - "# Create a figure with subplots\n", - "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", - "\n", - "# Calculate confusion matrix\n", - "cm_A = confusion_matrix(y, y_pred_svm_1)\n", - "\n", - "# Get class labels\n", - "labels = svm_model.classes_\n", - "\n", - "# Plot confusion matrix in first subplot\n", - "disp_A = ConfusionMatrixDisplay(confusion_matrix=cm_A, display_labels=labels)\n", - "disp_A.plot(ax=axes[0], cmap=plt.cm.Blues)\n", - "axes[0].set_title(\"Sensor A\")" + "# Export to LaTeX\n", + "latex_table = df_rounded.to_latex(index=True, float_format=\"%.2f\", caption=\"Classification report on Dataset B\", label=\"tab:clf_report_auto\")\n", + "print(latex_table)" ] }, { @@ -892,11 +895,19 @@ "source": [ "# svm_model = load('D:/thesis/models/sensor2/SVM.joblib')\n", "svm_model = load('D:/thesis/models/sensor2/SVM with StandardScaler and PCA.joblib')\n", - "y_pred_svm_2 = svm_model.predict(X2b)\n", - "\n", + "y_pred_svm_2 = svm_model.predict(X2b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# 5. Evaluate\n", + "import pandas as pd\n", "\n", - "df = pd.DataFrame(classification_report(y, y_pred_svm_2, output_dict=True)).T\n", + "df = pd.DataFrame(classification_report(y2, y_pred_svm_2, output_dict=True)).T\n", "# Round numbers nicely and move 'accuracy' into a row that fits your desired layout\n", "df_rounded = df.round(2)\n", "\n", @@ -920,17 +931,54 @@ "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "import numpy as np\n", "\n", + "# Create a fresh figure with subplots\n", + "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", "\n", - "cm = confusion_matrix(y, y_pred_svm_2) # -> ndarray\n", + "# Plot confusion matrix for Sensor A\n", + "cm_A = confusion_matrix(y, y_pred_svm_1)\n", + "disp_A = ConfusionMatrixDisplay(confusion_matrix=cm_A, display_labels=labels)\n", + "disp_A.plot(ax=axes[0], cmap=plt.cm.Blues)\n", + "axes[0].set_title(\"Sensor A\")\n", "\n", - "# get the class labels\n", - "labels = svm_model.classes_\n", + "# Plot confusion matrix for Sensor B\n", + "cm_B = confusion_matrix(y, y_pred_svm_2)\n", + "disp_B = ConfusionMatrixDisplay(confusion_matrix=cm_B, display_labels=labels)\n", + "disp_B.plot(ax=axes[1], cmap=plt.cm.Blues)\n", + "axes[1].set_title(\"Sensor B\")\n", "\n", - "# Plot\n", - "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", - "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", - "plt.title(\"Confusion Matrix of Sensor B Test on Dataset B\")\n", + "# Find and modify colorbars to show max values\n", + "# The colorbars are typically the 3rd and 4th axes in the figure\n", + "for i, (cbar_idx, cm) in enumerate(zip([2, 3], [cm_A, cm_B])):\n", + " if cbar_idx < len(fig.axes):\n", + " cbar_ax = fig.axes[cbar_idx]\n", + " \n", + " # Get max value from the confusion matrix\n", + " max_val = cm.max()\n", + " \n", + " # Create a new set of ticks with reasonable spacing and ending with max_val\n", + " # For example, if max is around 2560, create ticks: [0, 500, 1000, 1500, 2000, 2560]\n", + " tick_interval = 500\n", + " new_ticks = list(range(0, int(max_val), tick_interval))\n", + " if np.isclose(new_ticks[-1], max_val, rtol=0.05):\n", + " new_ticks[-1] = max_val \n", + " else:\n", + " new_ticks.extend([max_val])\n", + " # Set the new ticks\n", + " cbar_ax.set_yticks(new_ticks)\n", + " \n", + " # Format tick labels as integers\n", + " # cbar_ax.set_yticklabels([f\"{int(t)}\" if t.is_integer() else f\"{t:.1f}\" for t in new_ticks])\n", + "\n", + "# Set SVG font rendering for better PDF output\n", + "plt.rcParams['svg.fonttype'] = 'none'\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "\n", + "# Save and show\n", + "plt.savefig(\"output.svg\")\n", "plt.show()" ] },