From a93adc8af3674fd2cd0ae10b1882c58a52f13703 Mon Sep 17 00:00:00 2001 From: nuluh Date: Mon, 30 Jun 2025 01:36:44 +0700 Subject: [PATCH] feat(notebooks): minimize stft.ipynb notebooks and add STFT data preview plot. - Consolidated import statements for pandas and matplotlib. - Updated STFT plotting for Sensor 1 and Sensor 2 datasets with improved visualization using pcolormesh. - Enhanced subplot organization for better clarity in visual representation. - Added titles and adjusted layout for all plots. --- code/notebooks/stft.ipynb | 433 ++++++++++++-------------------------- 1 file changed, 140 insertions(+), 293 deletions(-) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index c1873f1..42a0495 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -312,6 +312,8 @@ "outputs": [], "source": [ "import os\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", "os.listdir('D:/thesis/data/working')" ] }, @@ -321,58 +323,14 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", "ready_data1a = []\n", "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n", " ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))\n", "# colormesh give title x is frequency and y is time and rotate/transpose the data\n", - "# Plotting the STFT Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from mpl_toolkits.mplot3d import Axes3D\n", - "\n", - "# Assuming ready_data1a[0] is a DataFrame or 2D array\n", - "spectrogram_data = ready_data1a[0].values # Convert to NumPy array if it's a DataFrame\n", - "\n", - "# Get the dimensions of the spectrogram\n", - "num_frequencies, num_time_frames = spectrogram_data.shape\n", - "\n", - "# Create frequency and time arrays\n", - "frequencies = np.arange(num_frequencies) # Replace with actual frequency values if available\n", - "time_frames = np.arange(num_time_frames) # Replace with actual time values if available\n", - "\n", - "# Create a meshgrid for plotting\n", - "T, F = np.meshgrid(time_frames, frequencies)\n", - "\n", - "# Create a 3D plot\n", - "fig = plt.figure(figsize=(12, 8))\n", - "ax = fig.add_subplot(111, projection='3d')\n", - "\n", - "# Plot the surface\n", - "surf = ax.plot_surface(T, F, spectrogram_data, cmap='bwr', edgecolor='none')\n", - "\n", - "# Add labels and a color bar\n", - "ax.set_xlabel('Time Frames')\n", - "ax.set_ylabel('Frequency [Hz]')\n", - "ax.set_zlabel('Magnitude')\n", - "ax.set_title('3D Spectrogram')\n", - "# Resize the z-axis (shrink it)\n", - "z_min, z_max = 0, 0.1 # Replace with your desired range\n", - "ax.set_zlim(z_min, z_max)\n", - "ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1, 1, 0.5, 1])) # Shrink z-axis by 50%\n", - "ax.set_facecolor('white')\n", - "fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\n", - "\n", - "# Show the plot\n", + "# Plotting the STFT Data\n", + "plt.figure(dpi=300) # Set figure size and DPI\n", + "plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + "plt.title('STFT of Sensor 1 Dataset A Label 0 Undamaged')\n", "plt.show()" ] }, @@ -382,9 +340,14 @@ "metadata": {}, "outputs": [], "source": [ - "# dpi\n", + "ready_data2a = []\n", + "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n", + " ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))\n", + "\n", "plt.figure(dpi=300) # Set figure size and DPI\n", - "plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)" + "plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + "plt.title('STFT of Sensor 2 Dataset A Label 0 Undamaged')\n", + "plt.show()" ] }, { @@ -402,8 +365,8 @@ "\n", "# Loop through each subplot and plot the data\n", "for i in range(6):\n", - " pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", - " axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n", + " pcm = axes[i].pcolormesh(ready_data1a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + " axes[i].set_title(f'Label {i+1}', fontsize=12)\n", "\n", "# Add a single color bar for all subplots\n", "# Use the first `pcolormesh` object (or any valid one) for the color bar\n", @@ -413,6 +376,7 @@ "# Set shared labels\n", "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n", "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n", + "fig.suptitle('STFT of Sensor 1 Dataset A', fontsize=16)\n", "\n", "# Adjust layout\n", "# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n", @@ -427,20 +391,33 @@ "metadata": {}, "outputs": [], "source": [ - "ready_data2a = []\n", - "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n", - " ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# dpi\n", - "plt.figure(dpi=300) # Set figure size and DPI\n", - "plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)" + "from cmcrameri import cm\n", + "# Create a figure and subplots\n", + "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n", + "\n", + "# Flatten the axes array for easier iteration\n", + "axes = axes.flatten()\n", + "\n", + "# Loop through each subplot and plot the data\n", + "for i in range(6):\n", + " pcm = axes[i].pcolormesh(ready_data2a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + " axes[i].set_title(f'Label {i+1}', fontsize=12)\n", + "\n", + "# Add a single color bar for all subplots\n", + "# Use the first `pcolormesh` object (or any valid one) for the color bar\n", + "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n", + "# cbar.set_label('Magnitude')\n", + "\n", + "# Set shared labels\n", + "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n", + "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n", + "fig.suptitle('STFT of Sensor 2 Dataset A', fontsize=16)\n", + "\n", + "# Adjust layout\n", + "# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n", + "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n", + "\n", + "plt.show()" ] }, { @@ -451,18 +428,11 @@ "source": [ "ready_data1b = []\n", "for file in os.listdir('D:/thesis/data/converted/raw_B/sensor1'):\n", - " ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# dpi\n", + " ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))\n", + "\n", "plt.figure(dpi=300) # Set figure size and DPI\n", - "plt.pcolormesh(ready_data1b[0].iloc[:22,:].transpose(), cmap='jet', vmax=0.03, vmin=0.0)" + "plt.pcolormesh(ready_data1b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + "plt.title('STFT of Sensor 1 Dataset B Label 0 Undamaged')" ] }, { @@ -471,7 +441,83 @@ "metadata": {}, "outputs": [], "source": [ - "len(ready_data1b[0])" + "ready_data2b = []\n", + "for file in os.listdir('D:/thesis/data/converted/raw_B/sensor2'):\n", + " ready_data2b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor2', file), skiprows=1))\n", + "\n", + "plt.figure(dpi=300) # Set figure size and DPI\n", + "plt.pcolormesh(ready_data2b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + "plt.title('STFT of Sensor 2 Dataset B Label 0 Undamaged')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cmcrameri import cm\n", + "# Create a figure and subplots\n", + "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n", + "\n", + "# Flatten the axes array for easier iteration\n", + "axes = axes.flatten()\n", + "\n", + "# Loop through each subplot and plot the data\n", + "for i in range(6):\n", + " pcm = axes[i].pcolormesh(ready_data1b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + " axes[i].set_title(f'Label {i+1}', fontsize=12)\n", + "\n", + "# Add a single color bar for all subplots\n", + "# Use the first `pcolormesh` object (or any valid one) for the color bar\n", + "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n", + "# cbar.set_label('Magnitude')\n", + "\n", + "# Set shared labels\n", + "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n", + "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n", + "fig.suptitle('STFT of Sensor 1 Dataset B', fontsize=16)\n", + "\n", + "# Adjust layout\n", + "# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n", + "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cmcrameri import cm\n", + "# Create a figure and subplots\n", + "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n", + "\n", + "# Flatten the axes array for easier iteration\n", + "axes = axes.flatten()\n", + "\n", + "# Loop through each subplot and plot the data\n", + "for i in range(6):\n", + " pcm = axes[i].pcolormesh(ready_data2b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", + " axes[i].set_title(f'Label {i+1}', fontsize=12)\n", + "\n", + "# Add a single color bar for all subplots\n", + "# Use the first `pcolormesh` object (or any valid one) for the color bar\n", + "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n", + "# cbar.set_label('Magnitude')\n", + "\n", + "# Set shared labels\n", + "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n", + "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n", + "fig.suptitle('STFT of Sensor 2 Dataset B', fontsize=16)\n", + "\n", + "# Adjust layout\n", + "# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n", + "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n", + "\n", + "plt.show()" ] }, { @@ -484,195 +530,6 @@ "print(len(ready_data2a))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x1a = 0\n", - "print(type(ready_data1a[0]))\n", - "ready_data1a[0].iloc[:,0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Checking length of the total array" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x1a = 0\n", - "print(type(x1a))\n", - "for i in range(len(ready_data1a)):\n", - " print(type(ready_data1a[i].shape[0]))\n", - " x1a = x1a + ready_data1a[i].shape[0]\n", - " print(type(x1a))\n", - "\n", - "print(x1a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x2a = 0\n", - "\n", - "for i in range(len(ready_data2a)):\n", - " print(ready_data2a[i].shape)\n", - " x2a = x2a + ready_data2a[i].shape[0]\n", - "\n", - "print(x2a)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flatten 6 array into one array" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Combine all dataframes in ready_data1a into a single dataframe\n", - "if ready_data1a: # Check if the list is not empty\n", - " # Use pandas concat function instead of iterative concatenation\n", - " combined_data = pd.concat(ready_data1a, axis=0, ignore_index=True)\n", - " \n", - " print(f\"Type of combined data: {type(combined_data)}\")\n", - " print(f\"Shape of combined data: {combined_data.shape}\")\n", - " \n", - " # Display the combined dataframe\n", - " combined_data\n", - "else:\n", - " print(\"No data available in ready_data1a list\")\n", - " combined_data = pd.DataFrame()\n", - "\n", - "# Store the result in x1a for compatibility with subsequent code\n", - "x1a = combined_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Combine all dataframes in ready_data1a into a single dataframe\n", - "if ready_data2a: # Check if the list is not empty\n", - " # Use pandas concat function instead of iterative concatenation\n", - " combined_data = pd.concat(ready_data2a, axis=0, ignore_index=True)\n", - " \n", - " print(f\"Type of combined data: {type(combined_data)}\")\n", - " print(f\"Shape of combined data: {combined_data.shape}\")\n", - " \n", - " # Display the combined dataframe\n", - " combined_data\n", - "else:\n", - " print(\"No data available in ready_data1a list\")\n", - " combined_data = pd.DataFrame()\n", - "\n", - "# Store the result in x1a for compatibility with subsequent code\n", - "x2a = combined_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Creating the label" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y_1 = 0\n", - "y_2 = 1\n", - "y_3 = 2\n", - "y_4 = 3\n", - "y_5 = 4\n", - "y_6 = 5" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y_data = [y_1, y_2, y_3, y_4, y_5, y_6]\n", - "y_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(len(y_data)):\n", - " print(ready_data1a[i].shape[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "for i in range(len(y_data)):\n", - " y_data[i] = [y_data[i]]*ready_data1a[i].shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(y_data[0])\n", - "# y_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y = y_data[0]\n", - "\n", - "for i in range(len(y_data) - 1):\n", - " #print(i)\n", - " y = np.concatenate((y, y_data[i+1]), axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(y.shape)\n", - "print(np.unique(y))" - ] - }, { "cell_type": "code", "execution_count": null, @@ -709,21 +566,6 @@ "x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.metrics import accuracy_score\n", - "from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n", - "from sklearn.tree import DecisionTreeClassifier\n", - "from sklearn.neighbors import KNeighborsClassifier\n", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", - "from sklearn.svm import SVC\n", - "from xgboost import XGBClassifier" - ] - }, { "cell_type": "code", "execution_count": null, @@ -749,6 +591,11 @@ "from sklearn.svm import SVC\n", "from sklearn.decomposition import PCA\n", "from xgboost import XGBClassifier\n", + "from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "\n", "# Define models for sensor1\n", "models_sensor1 = {\n", " # \"Random Forest\": RandomForestClassifier(),\n", @@ -756,19 +603,19 @@ " # \"Decision Tree\": DecisionTreeClassifier(),\n", " # \"KNN\": KNeighborsClassifier(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n", - " # \"SVM\": SVC(),\n", - " # \"SVM with StandardScaler and PCA\": make_pipeline(\n", - " # StandardScaler(),\n", - " # PCA(n_components=10),\n", - " # SVC(kernel='rbf')\n", - " # ),\n", + " \"SVM\": SVC(),\n", + " \"SVM with StandardScaler and PCA\": make_pipeline(\n", + " StandardScaler(),\n", + " PCA(n_components=10),\n", + " SVC(kernel='rbf')\n", + " ),\n", "\n", - " \"XGBoost\": XGBClassifier()\n", + " # \"XGBoost\": XGBClassifier()\n", "}\n", "\n", "results_sensor1 = []\n", "for name, model in models_sensor1.items():\n", - " res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n", + " res = train_and_evaluate_model(model, name, \"Sensor A\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/Sensor A')\n", " results_sensor1.append(res)\n", " print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n" ] @@ -782,7 +629,7 @@ "from src.ml.model_selection import plot_confusion_matrix\n", "\n", "# Plot confusion matrix for sensor1\n", - "plot_confusion_matrix(results_sensor1, y_test)" + "plot_confusion_matrix(results_sensor1, y_test, \"Confusion Matrix of Sensor A Validation Dataset A\")" ] }, { @@ -929,8 +776,8 @@ "from sklearn.metrics import accuracy_score, classification_report\n", "# 4. Validate on Dataset B\n", "from joblib import load\n", - "# svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n", - "svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n", + "svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n", + "# svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n", "y_pred_svm = svm_model.predict(X1b)\n", "\n", "# 5. Evaluate\n",