refactor(notebooks): visualization for sensor analysis and streamline data processing

2025-06-24 14:08:02 +07:00
parent 5041ee3feb
commit 459fbcc17a
1 changed files with 45 additions and 28 deletions
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
@@ -17,8 +17,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_0_TEST1_01.csv',sep=',')\n",
+    "sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_0/DAMAGE_0_TEST1_01.csv',sep=',')\n",
-    "sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_0_TEST1_02.csv',sep=',')"
+    "sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_0/DAMAGE_0_TEST1_02.csv',sep=',')"
   ]
  },
  {
@@ -276,7 +276,7 @@
    "from scipy.signal import stft, hann\n",
    "\n",
    "# Applying STFT\n",
-    "vibration_data = signal_sensor1_test1[1]\n",
+    "vibration_data = df1['s1'].values  # Using sensor 1 data for STFT\n",
    "window_size = 1024\n",
    "hop_size = 512\n",
    "window = hann(window_size)  # Creating a Hanning window\n",
@@ -288,10 +288,9 @@
    "                               nperseg=window_size, \n",
    "                               noverlap=window_size - hop_size)\n",
    "# Plotting the STFT Data\n",
-    "plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n",
+    "plt.pcolormesh(times, frequencies, np.abs(Zxx), cmap='jet', vmax=0.03, vmin=0.0)\n",
-    "plt.title(f'STFT Magnitude for case {1} signal sensor 2')\n",
+    "# plt.ylabel(f'Frequency [Hz]')\n",
-    "plt.ylabel(f'Frequency [Hz]')\n",
+    "# plt.xlabel(f'Time [sec]')\n",
    "plt.xlabel(f'Time [sec]')\n",
    "plt.show()\n",
    "\n",
    "# get current y ticks in list\n",
@@ -326,7 +325,7 @@
    "import matplotlib.pyplot as plt\n",
    "ready_data1a = []\n",
    "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
-    "    ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n",
+    "    ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))\n",
    "# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
    "# Plotting the STFT Data"
   ]
@@ -377,6 +376,17 @@
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dpi\n",
    "plt.figure(dpi=300)  # Set figure size and DPI\n",
    "plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -392,7 +402,7 @@
    "\n",
    "# Loop through each subplot and plot the data\n",
    "for i in range(6):\n",
-    "    pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='bwr', vmax=0.03, vmin=0.0)\n",
+    "    pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
    "    axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n",
    "\n",
    "# Add a single color bar for all subplots\n",
@@ -419,7 +429,18 @@
   "source": [
    "ready_data2a = []\n",
    "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
-    "    ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))"
+    "    ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dpi\n",
    "plt.figure(dpi=300)  # Set figure size and DPI\n",
    "plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
   ]
  },
  {
@@ -651,7 +672,9 @@
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# sensor A\n",
    "x_train1, x_test1, y_train, y_test = train_test_split(X1a, y, test_size=0.2, random_state=2)\n",
    "# sensor B\n",
    "x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)"
   ]
  },
@@ -724,23 +747,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from joblib import load\n",
+    "from src.ml.model_selection import plot_confusion_matrix\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
    "\n",
-    "for i in results_sensor1:\n",
+    "# Plot confusion matrix for sensor1\n",
-    "    model = load(f\"D:/thesis/models/sensor1/{i['model']}.joblib\")\n",
+    "plot_confusion_matrix(results_sensor1, y_test)"
    "    y_pred = model.predict(x_test1)\n",
    "    cm = confusion_matrix(y_test, y_pred) # -> ndarray\n",
    "\n",
    "    # get the class labels\n",
    "    labels = model.classes_\n",
    "\n",
    "    # Plot\n",
    "    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
    "    disp.plot(cmap=plt.cm.Blues)  # You can change colormap\n",
    "    plt.title(f\"{i['model']} Sensor A CM Training\")\n",
    "    plt.show()\n"
   ]
  },
  {
@@ -761,7 +771,7 @@
    "    PCA(n_components=10),\n",
    "    SVC(kernel='rbf')\n",
    "    ),\n",
-    "    # \"XGBoost\": XGBClassifier()\n",
+    "    \"XGBoost\": XGBClassifier()\n",
    "}\n",
    "\n",
    "results_sensor2 = []\n",
@@ -850,6 +860,13 @@
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -972,7 +989,7 @@
    "# Plot\n",
    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
    "disp.plot(cmap=plt.cm.Blues)  # You can change colormap\n",
-    "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B from Sensor2 readings\")\n",
+    "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B from Sensor1 readings\")\n",
    "plt.show()"
   ]
  },