feat(notebooks): minimize stft.ipynb notebooks and add STFT data preview plot.

- Consolidated import statements for pandas and matplotlib.
- Updated STFT plotting for Sensor 1 and Sensor 2 datasets with improved visualization using pcolormesh.
- Enhanced subplot organization for better clarity in visual representation.
- Added titles and adjusted layout for all plots.
This commit is contained in:
nuluh
2025-06-30 01:36:44 +07:00
parent c2df42cc2b
commit a93adc8af3

View File

@@ -312,6 +312,8 @@
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"os.listdir('D:/thesis/data/working')"
]
},
@@ -321,58 +323,14 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"ready_data1a = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
" ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))\n",
"# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
"# Plotting the STFT Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"\n",
"# Assuming ready_data1a[0] is a DataFrame or 2D array\n",
"spectrogram_data = ready_data1a[0].values # Convert to NumPy array if it's a DataFrame\n",
"\n",
"# Get the dimensions of the spectrogram\n",
"num_frequencies, num_time_frames = spectrogram_data.shape\n",
"\n",
"# Create frequency and time arrays\n",
"frequencies = np.arange(num_frequencies) # Replace with actual frequency values if available\n",
"time_frames = np.arange(num_time_frames) # Replace with actual time values if available\n",
"\n",
"# Create a meshgrid for plotting\n",
"T, F = np.meshgrid(time_frames, frequencies)\n",
"\n",
"# Create a 3D plot\n",
"fig = plt.figure(figsize=(12, 8))\n",
"ax = fig.add_subplot(111, projection='3d')\n",
"\n",
"# Plot the surface\n",
"surf = ax.plot_surface(T, F, spectrogram_data, cmap='bwr', edgecolor='none')\n",
"\n",
"# Add labels and a color bar\n",
"ax.set_xlabel('Time Frames')\n",
"ax.set_ylabel('Frequency [Hz]')\n",
"ax.set_zlabel('Magnitude')\n",
"ax.set_title('3D Spectrogram')\n",
"# Resize the z-axis (shrink it)\n",
"z_min, z_max = 0, 0.1 # Replace with your desired range\n",
"ax.set_zlim(z_min, z_max)\n",
"ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1, 1, 0.5, 1])) # Shrink z-axis by 50%\n",
"ax.set_facecolor('white')\n",
"fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\n",
"\n",
"# Show the plot\n",
"# Plotting the STFT Data\n",
"plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 1 Dataset A Label 0 Undamaged')\n",
"plt.show()"
]
},
@@ -382,9 +340,14 @@
"metadata": {},
"outputs": [],
"source": [
"# dpi\n",
"ready_data2a = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
" ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))\n",
"\n",
"plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
"plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 2 Dataset A Label 0 Undamaged')\n",
"plt.show()"
]
},
{
@@ -402,8 +365,8 @@
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n",
" pcm = axes[i].pcolormesh(ready_data1a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n",
"# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
@@ -413,6 +376,7 @@
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 1 Dataset A', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
@@ -427,20 +391,33 @@
"metadata": {},
"outputs": [],
"source": [
"ready_data2a = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
" ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# dpi\n",
"plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
"from cmcrameri import cm\n",
"# Create a figure and subplots\n",
"fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
"\n",
"# Flatten the axes array for easier iteration\n",
"axes = axes.flatten()\n",
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data2a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n",
"# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
"cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 2 Dataset A', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
]
},
{
@@ -451,18 +428,11 @@
"source": [
"ready_data1b = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw_B/sensor1'):\n",
" ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# dpi\n",
" ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))\n",
"\n",
"plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data1b[0].iloc[:22,:].transpose(), cmap='jet', vmax=0.03, vmin=0.0)"
"plt.pcolormesh(ready_data1b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 1 Dataset B Label 0 Undamaged')"
]
},
{
@@ -471,7 +441,83 @@
"metadata": {},
"outputs": [],
"source": [
"len(ready_data1b[0])"
"ready_data2b = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw_B/sensor2'):\n",
" ready_data2b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor2', file), skiprows=1))\n",
"\n",
"plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data2b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 2 Dataset B Label 0 Undamaged')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cmcrameri import cm\n",
"# Create a figure and subplots\n",
"fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
"\n",
"# Flatten the axes array for easier iteration\n",
"axes = axes.flatten()\n",
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data1b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n",
"# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
"cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 1 Dataset B', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cmcrameri import cm\n",
"# Create a figure and subplots\n",
"fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
"\n",
"# Flatten the axes array for easier iteration\n",
"axes = axes.flatten()\n",
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data2b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n",
"# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
"cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 2 Dataset B', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
]
},
{
@@ -484,195 +530,6 @@
"print(len(ready_data2a))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x1a = 0\n",
"print(type(ready_data1a[0]))\n",
"ready_data1a[0].iloc[:,0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Checking length of the total array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x1a = 0\n",
"print(type(x1a))\n",
"for i in range(len(ready_data1a)):\n",
" print(type(ready_data1a[i].shape[0]))\n",
" x1a = x1a + ready_data1a[i].shape[0]\n",
" print(type(x1a))\n",
"\n",
"print(x1a)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x2a = 0\n",
"\n",
"for i in range(len(ready_data2a)):\n",
" print(ready_data2a[i].shape)\n",
" x2a = x2a + ready_data2a[i].shape[0]\n",
"\n",
"print(x2a)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flatten 6 array into one array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine all dataframes in ready_data1a into a single dataframe\n",
"if ready_data1a: # Check if the list is not empty\n",
" # Use pandas concat function instead of iterative concatenation\n",
" combined_data = pd.concat(ready_data1a, axis=0, ignore_index=True)\n",
" \n",
" print(f\"Type of combined data: {type(combined_data)}\")\n",
" print(f\"Shape of combined data: {combined_data.shape}\")\n",
" \n",
" # Display the combined dataframe\n",
" combined_data\n",
"else:\n",
" print(\"No data available in ready_data1a list\")\n",
" combined_data = pd.DataFrame()\n",
"\n",
"# Store the result in x1a for compatibility with subsequent code\n",
"x1a = combined_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine all dataframes in ready_data1a into a single dataframe\n",
"if ready_data2a: # Check if the list is not empty\n",
" # Use pandas concat function instead of iterative concatenation\n",
" combined_data = pd.concat(ready_data2a, axis=0, ignore_index=True)\n",
" \n",
" print(f\"Type of combined data: {type(combined_data)}\")\n",
" print(f\"Shape of combined data: {combined_data.shape}\")\n",
" \n",
" # Display the combined dataframe\n",
" combined_data\n",
"else:\n",
" print(\"No data available in ready_data1a list\")\n",
" combined_data = pd.DataFrame()\n",
"\n",
"# Store the result in x1a for compatibility with subsequent code\n",
"x2a = combined_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating the label"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_1 = 0\n",
"y_2 = 1\n",
"y_3 = 2\n",
"y_4 = 3\n",
"y_5 = 4\n",
"y_6 = 5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_data = [y_1, y_2, y_3, y_4, y_5, y_6]\n",
"y_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(len(y_data)):\n",
" print(ready_data1a[i].shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"for i in range(len(y_data)):\n",
" y_data[i] = [y_data[i]]*ready_data1a[i].shape[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(y_data[0])\n",
"# y_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y = y_data[0]\n",
"\n",
"for i in range(len(y_data) - 1):\n",
" #print(i)\n",
" y = np.concatenate((y, y_data[i+1]), axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(y.shape)\n",
"print(np.unique(y))"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -709,21 +566,6 @@
"x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n",
"from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.svm import SVC\n",
"from xgboost import XGBClassifier"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -749,6 +591,11 @@
"from sklearn.svm import SVC\n",
"from sklearn.decomposition import PCA\n",
"from xgboost import XGBClassifier\n",
"from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"\n",
"# Define models for sensor1\n",
"models_sensor1 = {\n",
" # \"Random Forest\": RandomForestClassifier(),\n",
@@ -756,19 +603,19 @@
" # \"Decision Tree\": DecisionTreeClassifier(),\n",
" # \"KNN\": KNeighborsClassifier(),\n",
" # \"LDA\": LinearDiscriminantAnalysis(),\n",
" # \"SVM\": SVC(),\n",
" # \"SVM with StandardScaler and PCA\": make_pipeline(\n",
" # StandardScaler(),\n",
" # PCA(n_components=10),\n",
" # SVC(kernel='rbf')\n",
" # ),\n",
" \"SVM\": SVC(),\n",
" \"SVM with StandardScaler and PCA\": make_pipeline(\n",
" StandardScaler(),\n",
" PCA(n_components=10),\n",
" SVC(kernel='rbf')\n",
" ),\n",
"\n",
" \"XGBoost\": XGBClassifier()\n",
" # \"XGBoost\": XGBClassifier()\n",
"}\n",
"\n",
"results_sensor1 = []\n",
"for name, model in models_sensor1.items():\n",
" res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n",
" res = train_and_evaluate_model(model, name, \"Sensor A\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/Sensor A')\n",
" results_sensor1.append(res)\n",
" print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n"
]
@@ -782,7 +629,7 @@
"from src.ml.model_selection import plot_confusion_matrix\n",
"\n",
"# Plot confusion matrix for sensor1\n",
"plot_confusion_matrix(results_sensor1, y_test)"
"plot_confusion_matrix(results_sensor1, y_test, \"Confusion Matrix of Sensor A Validation Dataset A\")"
]
},
{
@@ -929,8 +776,8 @@
"from sklearn.metrics import accuracy_score, classification_report\n",
"# 4. Validate on Dataset B\n",
"from joblib import load\n",
"# svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
"svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n",
"svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
"# svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n",
"y_pred_svm = svm_model.predict(X1b)\n",
"\n",
"# 5. Evaluate\n",