feat(notebooks): minimize stft.ipynb notebooks and add STFT data preview plot.

- Consolidated import statements for pandas and matplotlib. - Updated STFT plotting for Sensor 1 and Sensor 2 datasets with improved visualization using pcolormesh. - Enhanced subplot organization for better clarity in visual representation. - Added titles and adjusted layout for all plots.
2025-06-30 01:36:44 +07:00
parent c2df42cc2b
commit a93adc8af3
1 changed files with 140 additions and 293 deletions
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
@@ -312,6 +312,8 @@
   "outputs": [],
   "source": [
    "import os\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
    "os.listdir('D:/thesis/data/working')"
   ]
  },
@@ -321,58 +323,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
    "ready_data1a = []\n",
    "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
    "    ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))\n",
    "# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
-    "# Plotting the STFT Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from mpl_toolkits.mplot3d import Axes3D\n",
-    "\n",
-    "# Assuming ready_data1a[0] is a DataFrame or 2D array\n",
-    "spectrogram_data = ready_data1a[0].values  # Convert to NumPy array if it's a DataFrame\n",
-    "\n",
-    "# Get the dimensions of the spectrogram\n",
-    "num_frequencies, num_time_frames = spectrogram_data.shape\n",
-    "\n",
-    "# Create frequency and time arrays\n",
-    "frequencies = np.arange(num_frequencies)  # Replace with actual frequency values if available\n",
-    "time_frames = np.arange(num_time_frames)  # Replace with actual time values if available\n",
-    "\n",
-    "# Create a meshgrid for plotting\n",
-    "T, F = np.meshgrid(time_frames, frequencies)\n",
-    "\n",
-    "# Create a 3D plot\n",
-    "fig = plt.figure(figsize=(12, 8))\n",
-    "ax = fig.add_subplot(111, projection='3d')\n",
-    "\n",
-    "# Plot the surface\n",
-    "surf = ax.plot_surface(T, F, spectrogram_data, cmap='bwr', edgecolor='none')\n",
-    "\n",
-    "# Add labels and a color bar\n",
-    "ax.set_xlabel('Time Frames')\n",
-    "ax.set_ylabel('Frequency [Hz]')\n",
-    "ax.set_zlabel('Magnitude')\n",
-    "ax.set_title('3D Spectrogram')\n",
-    "# Resize the z-axis (shrink it)\n",
-    "z_min, z_max = 0, 0.1  # Replace with your desired range\n",
-    "ax.set_zlim(z_min, z_max)\n",
-    "ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1, 1, 0.5, 1]))  # Shrink z-axis by 50%\n",
-    "ax.set_facecolor('white')\n",
-    "fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\n",
-    "\n",
-    "# Show the plot\n",
+    "# Plotting the STFT Data\n",
+    "plt.figure(dpi=300)  # Set figure size and DPI\n",
+    "plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "plt.title('STFT of Sensor 1 Dataset A Label 0 Undamaged')\n",
    "plt.show()"
   ]
  },
@@ -382,9 +340,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# dpi\n",
+    "ready_data2a = []\n",
+    "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
+    "    ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))\n",
+    "\n",
    "plt.figure(dpi=300)  # Set figure size and DPI\n",
-    "plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
+    "plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "plt.title('STFT of Sensor 2 Dataset A Label 0 Undamaged')\n",
+    "plt.show()"
   ]
  },
  {
@@ -402,8 +365,8 @@
    "\n",
    "# Loop through each subplot and plot the data\n",
    "for i in range(6):\n",
-    "    pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
-    "    axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n",
+    "    pcm = axes[i].pcolormesh(ready_data1a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "    axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
    "\n",
    "# Add a single color bar for all subplots\n",
    "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
@@ -413,6 +376,7 @@
    "# Set shared labels\n",
    "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
    "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
+    "fig.suptitle('STFT of Sensor 1 Dataset A', fontsize=16)\n",
    "\n",
    "# Adjust layout\n",
    "# plt.tight_layout(rect=[0.05, 0.05, 1, 1])  # Leave space for shared labels\n",
@@ -427,20 +391,33 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ready_data2a = []\n",
-    "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
-    "    ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# dpi\n",
-    "plt.figure(dpi=300)  # Set figure size and DPI\n",
-    "plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
+    "from cmcrameri import cm\n",
+    "# Create a figure and subplots\n",
+    "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
+    "\n",
+    "# Flatten the axes array for easier iteration\n",
+    "axes = axes.flatten()\n",
+    "\n",
+    "# Loop through each subplot and plot the data\n",
+    "for i in range(6):\n",
+    "    pcm = axes[i].pcolormesh(ready_data2a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "    axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
+    "\n",
+    "# Add a single color bar for all subplots\n",
+    "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
+    "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
+    "# cbar.set_label('Magnitude')\n",
+    "\n",
+    "# Set shared labels\n",
+    "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
+    "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
+    "fig.suptitle('STFT of Sensor 2 Dataset A', fontsize=16)\n",
+    "\n",
+    "# Adjust layout\n",
+    "# plt.tight_layout(rect=[0.05, 0.05, 1, 1])  # Leave space for shared labels\n",
+    "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
+    "\n",
+    "plt.show()"
   ]
  },
  {
@@ -451,18 +428,11 @@
   "source": [
    "ready_data1b = []\n",
    "for file in os.listdir('D:/thesis/data/converted/raw_B/sensor1'):\n",
-    "    ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# dpi\n",
+    "    ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))\n",
+    "\n",
    "plt.figure(dpi=300)  # Set figure size and DPI\n",
-    "plt.pcolormesh(ready_data1b[0].iloc[:22,:].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
+    "plt.pcolormesh(ready_data1b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "plt.title('STFT of Sensor 1 Dataset B Label 0 Undamaged')"
   ]
  },
  {
@@ -471,7 +441,83 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "len(ready_data1b[0])"
+    "ready_data2b = []\n",
+    "for file in os.listdir('D:/thesis/data/converted/raw_B/sensor2'):\n",
+    "    ready_data2b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor2', file), skiprows=1))\n",
+    "\n",
+    "plt.figure(dpi=300)  # Set figure size and DPI\n",
+    "plt.pcolormesh(ready_data2b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "plt.title('STFT of Sensor 2 Dataset B Label 0 Undamaged')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cmcrameri import cm\n",
+    "# Create a figure and subplots\n",
+    "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
+    "\n",
+    "# Flatten the axes array for easier iteration\n",
+    "axes = axes.flatten()\n",
+    "\n",
+    "# Loop through each subplot and plot the data\n",
+    "for i in range(6):\n",
+    "    pcm = axes[i].pcolormesh(ready_data1b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "    axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
+    "\n",
+    "# Add a single color bar for all subplots\n",
+    "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
+    "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
+    "# cbar.set_label('Magnitude')\n",
+    "\n",
+    "# Set shared labels\n",
+    "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
+    "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
+    "fig.suptitle('STFT of Sensor 1 Dataset B', fontsize=16)\n",
+    "\n",
+    "# Adjust layout\n",
+    "# plt.tight_layout(rect=[0.05, 0.05, 1, 1])  # Leave space for shared labels\n",
+    "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cmcrameri import cm\n",
+    "# Create a figure and subplots\n",
+    "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
+    "\n",
+    "# Flatten the axes array for easier iteration\n",
+    "axes = axes.flatten()\n",
+    "\n",
+    "# Loop through each subplot and plot the data\n",
+    "for i in range(6):\n",
+    "    pcm = axes[i].pcolormesh(ready_data2b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
+    "    axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
+    "\n",
+    "# Add a single color bar for all subplots\n",
+    "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
+    "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
+    "# cbar.set_label('Magnitude')\n",
+    "\n",
+    "# Set shared labels\n",
+    "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
+    "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
+    "fig.suptitle('STFT of Sensor 2 Dataset B', fontsize=16)\n",
+    "\n",
+    "# Adjust layout\n",
+    "# plt.tight_layout(rect=[0.05, 0.05, 1, 1])  # Leave space for shared labels\n",
+    "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
+    "\n",
+    "plt.show()"
   ]
  },
  {
@@ -484,195 +530,6 @@
    "print(len(ready_data2a))"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x1a = 0\n",
-    "print(type(ready_data1a[0]))\n",
-    "ready_data1a[0].iloc[:,0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Checking length of the total array"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x1a = 0\n",
-    "print(type(x1a))\n",
-    "for i in range(len(ready_data1a)):\n",
-    "    print(type(ready_data1a[i].shape[0]))\n",
-    "    x1a = x1a + ready_data1a[i].shape[0]\n",
-    "    print(type(x1a))\n",
-    "\n",
-    "print(x1a)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x2a = 0\n",
-    "\n",
-    "for i in range(len(ready_data2a)):\n",
-    "    print(ready_data2a[i].shape)\n",
-    "    x2a = x2a + ready_data2a[i].shape[0]\n",
-    "\n",
-    "print(x2a)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Flatten 6 array into one array"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Combine all dataframes in ready_data1a into a single dataframe\n",
-    "if ready_data1a:  # Check if the list is not empty\n",
-    "    # Use pandas concat function instead of iterative concatenation\n",
-    "    combined_data = pd.concat(ready_data1a, axis=0, ignore_index=True)\n",
-    "    \n",
-    "    print(f\"Type of combined data: {type(combined_data)}\")\n",
-    "    print(f\"Shape of combined data: {combined_data.shape}\")\n",
-    "    \n",
-    "    # Display the combined dataframe\n",
-    "    combined_data\n",
-    "else:\n",
-    "    print(\"No data available in ready_data1a list\")\n",
-    "    combined_data = pd.DataFrame()\n",
-    "\n",
-    "# Store the result in x1a for compatibility with subsequent code\n",
-    "x1a = combined_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Combine all dataframes in ready_data1a into a single dataframe\n",
-    "if ready_data2a:  # Check if the list is not empty\n",
-    "    # Use pandas concat function instead of iterative concatenation\n",
-    "    combined_data = pd.concat(ready_data2a, axis=0, ignore_index=True)\n",
-    "    \n",
-    "    print(f\"Type of combined data: {type(combined_data)}\")\n",
-    "    print(f\"Shape of combined data: {combined_data.shape}\")\n",
-    "    \n",
-    "    # Display the combined dataframe\n",
-    "    combined_data\n",
-    "else:\n",
-    "    print(\"No data available in ready_data1a list\")\n",
-    "    combined_data = pd.DataFrame()\n",
-    "\n",
-    "# Store the result in x1a for compatibility with subsequent code\n",
-    "x2a = combined_data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Creating the label"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y_1 = 0\n",
-    "y_2 = 1\n",
-    "y_3 = 2\n",
-    "y_4 = 3\n",
-    "y_5 = 4\n",
-    "y_6 = 5"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y_data = [y_1, y_2, y_3, y_4, y_5, y_6]\n",
-    "y_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i in range(len(y_data)):\n",
-    "    print(ready_data1a[i].shape[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "for i in range(len(y_data)):\n",
-    "    y_data[i] = [y_data[i]]*ready_data1a[i].shape[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(y_data[0])\n",
-    "# y_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y = y_data[0]\n",
-    "\n",
-    "for i in range(len(y_data) - 1):\n",
-    "    #print(i)\n",
-    "    y = np.concatenate((y, y_data[i+1]), axis=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(y.shape)\n",
-    "print(np.unique(y))"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -709,21 +566,6 @@
    "x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.metrics import accuracy_score\n",
-    "from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
-    "from sklearn.tree import DecisionTreeClassifier\n",
-    "from sklearn.neighbors import KNeighborsClassifier\n",
-    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
-    "from sklearn.svm import SVC\n",
-    "from xgboost import XGBClassifier"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -749,6 +591,11 @@
    "from sklearn.svm import SVC\n",
    "from sklearn.decomposition import PCA\n",
    "from xgboost import XGBClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
+    "\n",
    "# Define models for sensor1\n",
    "models_sensor1 = {\n",
    "    # \"Random Forest\": RandomForestClassifier(),\n",
@@ -756,19 +603,19 @@
    "    # \"Decision Tree\": DecisionTreeClassifier(),\n",
    "    # \"KNN\": KNeighborsClassifier(),\n",
    "    # \"LDA\": LinearDiscriminantAnalysis(),\n",
-    "    # \"SVM\": SVC(),\n",
-    "    # \"SVM with StandardScaler and PCA\": make_pipeline(\n",
-    "    # StandardScaler(),\n",
-    "    # PCA(n_components=10),\n",
-    "    # SVC(kernel='rbf')\n",
-    "    # ),\n",
+    "    \"SVM\": SVC(),\n",
+    "    \"SVM with StandardScaler and PCA\": make_pipeline(\n",
+    "                                        StandardScaler(),\n",
+    "                                        PCA(n_components=10),\n",
+    "                                        SVC(kernel='rbf')\n",
+    "                                        ),\n",
    "\n",
-    "    \"XGBoost\": XGBClassifier()\n",
+    "    # \"XGBoost\": XGBClassifier()\n",
    "}\n",
    "\n",
    "results_sensor1 = []\n",
    "for name, model in models_sensor1.items():\n",
-    "    res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n",
+    "    res = train_and_evaluate_model(model, name, \"Sensor A\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/Sensor A')\n",
    "    results_sensor1.append(res)\n",
    "    print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n"
   ]
@@ -782,7 +629,7 @@
    "from src.ml.model_selection import plot_confusion_matrix\n",
    "\n",
    "# Plot confusion matrix for sensor1\n",
-    "plot_confusion_matrix(results_sensor1, y_test)"
+    "plot_confusion_matrix(results_sensor1, y_test, \"Confusion Matrix of Sensor A Validation Dataset A\")"
   ]
  },
  {
@@ -929,8 +776,8 @@
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "# 4. Validate on Dataset B\n",
    "from joblib import load\n",
-    "# svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
-    "svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n",
+    "svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
+    "# svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n",
    "y_pred_svm = svm_model.predict(X1b)\n",
    "\n",
    "# 5. Evaluate\n",