feat(notebooks): minimize stft.ipynb notebooks and add STFT data preview plot.

- Consolidated import statements for pandas and matplotlib.
- Updated STFT plotting for Sensor 1 and Sensor 2 datasets with improved visualization using pcolormesh.
- Enhanced subplot organization for better clarity in visual representation.
- Added titles and adjusted layout for all plots.
This commit is contained in:
nuluh
2025-06-30 01:36:44 +07:00
parent c2df42cc2b
commit a93adc8af3

View File

@@ -312,6 +312,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"os.listdir('D:/thesis/data/working')" "os.listdir('D:/thesis/data/working')"
] ]
}, },
@@ -321,58 +323,14 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"ready_data1a = []\n", "ready_data1a = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n", "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
" ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))\n", " ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))\n",
"# colormesh give title x is frequency and y is time and rotate/transpose the data\n", "# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
"# Plotting the STFT Data" "# Plotting the STFT Data\n",
] "plt.figure(dpi=300) # Set figure size and DPI\n",
}, "plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
{ "plt.title('STFT of Sensor 1 Dataset A Label 0 Undamaged')\n",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"\n",
"# Assuming ready_data1a[0] is a DataFrame or 2D array\n",
"spectrogram_data = ready_data1a[0].values # Convert to NumPy array if it's a DataFrame\n",
"\n",
"# Get the dimensions of the spectrogram\n",
"num_frequencies, num_time_frames = spectrogram_data.shape\n",
"\n",
"# Create frequency and time arrays\n",
"frequencies = np.arange(num_frequencies) # Replace with actual frequency values if available\n",
"time_frames = np.arange(num_time_frames) # Replace with actual time values if available\n",
"\n",
"# Create a meshgrid for plotting\n",
"T, F = np.meshgrid(time_frames, frequencies)\n",
"\n",
"# Create a 3D plot\n",
"fig = plt.figure(figsize=(12, 8))\n",
"ax = fig.add_subplot(111, projection='3d')\n",
"\n",
"# Plot the surface\n",
"surf = ax.plot_surface(T, F, spectrogram_data, cmap='bwr', edgecolor='none')\n",
"\n",
"# Add labels and a color bar\n",
"ax.set_xlabel('Time Frames')\n",
"ax.set_ylabel('Frequency [Hz]')\n",
"ax.set_zlabel('Magnitude')\n",
"ax.set_title('3D Spectrogram')\n",
"# Resize the z-axis (shrink it)\n",
"z_min, z_max = 0, 0.1 # Replace with your desired range\n",
"ax.set_zlim(z_min, z_max)\n",
"ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1, 1, 0.5, 1])) # Shrink z-axis by 50%\n",
"ax.set_facecolor('white')\n",
"fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\n",
"\n",
"# Show the plot\n",
"plt.show()" "plt.show()"
] ]
}, },
@@ -382,9 +340,14 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# dpi\n", "ready_data2a = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
" ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))\n",
"\n",
"plt.figure(dpi=300) # Set figure size and DPI\n", "plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)" "plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 2 Dataset A Label 0 Undamaged')\n",
"plt.show()"
] ]
}, },
{ {
@@ -402,8 +365,8 @@
"\n", "\n",
"# Loop through each subplot and plot the data\n", "# Loop through each subplot and plot the data\n",
"for i in range(6):\n", "for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n", " pcm = axes[i].pcolormesh(ready_data1a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n", " axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n", "\n",
"# Add a single color bar for all subplots\n", "# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n", "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
@@ -413,6 +376,7 @@
"# Set shared labels\n", "# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n", "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n", "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 1 Dataset A', fontsize=16)\n",
"\n", "\n",
"# Adjust layout\n", "# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n", "# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
@@ -427,20 +391,33 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"ready_data2a = []\n", "from cmcrameri import cm\n",
"for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n", "# Create a figure and subplots\n",
" ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))" "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
] "\n",
}, "# Flatten the axes array for easier iteration\n",
{ "axes = axes.flatten()\n",
"cell_type": "code", "\n",
"execution_count": null, "# Loop through each subplot and plot the data\n",
"metadata": {}, "for i in range(6):\n",
"outputs": [], " pcm = axes[i].pcolormesh(ready_data2a[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"source": [ " axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"# dpi\n", "\n",
"plt.figure(dpi=300) # Set figure size and DPI\n", "# Add a single color bar for all subplots\n",
"plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)" "# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
"cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 2 Dataset A', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
] ]
}, },
{ {
@@ -451,18 +428,11 @@
"source": [ "source": [
"ready_data1b = []\n", "ready_data1b = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw_B/sensor1'):\n", "for file in os.listdir('D:/thesis/data/converted/raw_B/sensor1'):\n",
" ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))" " ready_data1b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor1', file), skiprows=1))\n",
] "\n",
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# dpi\n",
"plt.figure(dpi=300) # Set figure size and DPI\n", "plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data1b[0].iloc[:22,:].transpose(), cmap='jet', vmax=0.03, vmin=0.0)" "plt.pcolormesh(ready_data1b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 1 Dataset B Label 0 Undamaged')"
] ]
}, },
{ {
@@ -471,7 +441,83 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(ready_data1b[0])" "ready_data2b = []\n",
"for file in os.listdir('D:/thesis/data/converted/raw_B/sensor2'):\n",
" ready_data2b.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw_B/sensor2', file), skiprows=1))\n",
"\n",
"plt.figure(dpi=300) # Set figure size and DPI\n",
"plt.pcolormesh(ready_data2b[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
"plt.title('STFT of Sensor 2 Dataset B Label 0 Undamaged')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cmcrameri import cm\n",
"# Create a figure and subplots\n",
"fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
"\n",
"# Flatten the axes array for easier iteration\n",
"axes = axes.flatten()\n",
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data1b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n",
"# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
"cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 1 Dataset B', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cmcrameri import cm\n",
"# Create a figure and subplots\n",
"fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n",
"\n",
"# Flatten the axes array for easier iteration\n",
"axes = axes.flatten()\n",
"\n",
"# Loop through each subplot and plot the data\n",
"for i in range(6):\n",
" pcm = axes[i].pcolormesh(ready_data2b[i+1].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
" axes[i].set_title(f'Label {i+1}', fontsize=12)\n",
"\n",
"# Add a single color bar for all subplots\n",
"# Use the first `pcolormesh` object (or any valid one) for the color bar\n",
"cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n",
"# cbar.set_label('Magnitude')\n",
"\n",
"# Set shared labels\n",
"fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n",
"fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n",
"fig.suptitle('STFT of Sensor 2 Dataset B', fontsize=16)\n",
"\n",
"# Adjust layout\n",
"# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n",
"plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n",
"\n",
"plt.show()"
] ]
}, },
{ {
@@ -484,195 +530,6 @@
"print(len(ready_data2a))" "print(len(ready_data2a))"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x1a = 0\n",
"print(type(ready_data1a[0]))\n",
"ready_data1a[0].iloc[:,0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Checking length of the total array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x1a = 0\n",
"print(type(x1a))\n",
"for i in range(len(ready_data1a)):\n",
" print(type(ready_data1a[i].shape[0]))\n",
" x1a = x1a + ready_data1a[i].shape[0]\n",
" print(type(x1a))\n",
"\n",
"print(x1a)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x2a = 0\n",
"\n",
"for i in range(len(ready_data2a)):\n",
" print(ready_data2a[i].shape)\n",
" x2a = x2a + ready_data2a[i].shape[0]\n",
"\n",
"print(x2a)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flatten 6 array into one array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine all dataframes in ready_data1a into a single dataframe\n",
"if ready_data1a: # Check if the list is not empty\n",
" # Use pandas concat function instead of iterative concatenation\n",
" combined_data = pd.concat(ready_data1a, axis=0, ignore_index=True)\n",
" \n",
" print(f\"Type of combined data: {type(combined_data)}\")\n",
" print(f\"Shape of combined data: {combined_data.shape}\")\n",
" \n",
" # Display the combined dataframe\n",
" combined_data\n",
"else:\n",
" print(\"No data available in ready_data1a list\")\n",
" combined_data = pd.DataFrame()\n",
"\n",
"# Store the result in x1a for compatibility with subsequent code\n",
"x1a = combined_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine all dataframes in ready_data1a into a single dataframe\n",
"if ready_data2a: # Check if the list is not empty\n",
" # Use pandas concat function instead of iterative concatenation\n",
" combined_data = pd.concat(ready_data2a, axis=0, ignore_index=True)\n",
" \n",
" print(f\"Type of combined data: {type(combined_data)}\")\n",
" print(f\"Shape of combined data: {combined_data.shape}\")\n",
" \n",
" # Display the combined dataframe\n",
" combined_data\n",
"else:\n",
" print(\"No data available in ready_data1a list\")\n",
" combined_data = pd.DataFrame()\n",
"\n",
"# Store the result in x1a for compatibility with subsequent code\n",
"x2a = combined_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating the label"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_1 = 0\n",
"y_2 = 1\n",
"y_3 = 2\n",
"y_4 = 3\n",
"y_5 = 4\n",
"y_6 = 5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_data = [y_1, y_2, y_3, y_4, y_5, y_6]\n",
"y_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(len(y_data)):\n",
" print(ready_data1a[i].shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"for i in range(len(y_data)):\n",
" y_data[i] = [y_data[i]]*ready_data1a[i].shape[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(y_data[0])\n",
"# y_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y = y_data[0]\n",
"\n",
"for i in range(len(y_data) - 1):\n",
" #print(i)\n",
" y = np.concatenate((y, y_data[i+1]), axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(y.shape)\n",
"print(np.unique(y))"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -709,21 +566,6 @@
"x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)" "x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n",
"from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.svm import SVC\n",
"from xgboost import XGBClassifier"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -749,6 +591,11 @@
"from sklearn.svm import SVC\n", "from sklearn.svm import SVC\n",
"from sklearn.decomposition import PCA\n", "from sklearn.decomposition import PCA\n",
"from xgboost import XGBClassifier\n", "from xgboost import XGBClassifier\n",
"from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"\n",
"# Define models for sensor1\n", "# Define models for sensor1\n",
"models_sensor1 = {\n", "models_sensor1 = {\n",
" # \"Random Forest\": RandomForestClassifier(),\n", " # \"Random Forest\": RandomForestClassifier(),\n",
@@ -756,19 +603,19 @@
" # \"Decision Tree\": DecisionTreeClassifier(),\n", " # \"Decision Tree\": DecisionTreeClassifier(),\n",
" # \"KNN\": KNeighborsClassifier(),\n", " # \"KNN\": KNeighborsClassifier(),\n",
" # \"LDA\": LinearDiscriminantAnalysis(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n",
" # \"SVM\": SVC(),\n", " \"SVM\": SVC(),\n",
" # \"SVM with StandardScaler and PCA\": make_pipeline(\n", " \"SVM with StandardScaler and PCA\": make_pipeline(\n",
" # StandardScaler(),\n", " StandardScaler(),\n",
" # PCA(n_components=10),\n", " PCA(n_components=10),\n",
" # SVC(kernel='rbf')\n", " SVC(kernel='rbf')\n",
" # ),\n", " ),\n",
"\n", "\n",
" \"XGBoost\": XGBClassifier()\n", " # \"XGBoost\": XGBClassifier()\n",
"}\n", "}\n",
"\n", "\n",
"results_sensor1 = []\n", "results_sensor1 = []\n",
"for name, model in models_sensor1.items():\n", "for name, model in models_sensor1.items():\n",
" res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n", " res = train_and_evaluate_model(model, name, \"Sensor A\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/Sensor A')\n",
" results_sensor1.append(res)\n", " results_sensor1.append(res)\n",
" print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n" " print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n"
] ]
@@ -782,7 +629,7 @@
"from src.ml.model_selection import plot_confusion_matrix\n", "from src.ml.model_selection import plot_confusion_matrix\n",
"\n", "\n",
"# Plot confusion matrix for sensor1\n", "# Plot confusion matrix for sensor1\n",
"plot_confusion_matrix(results_sensor1, y_test)" "plot_confusion_matrix(results_sensor1, y_test, \"Confusion Matrix of Sensor A Validation Dataset A\")"
] ]
}, },
{ {
@@ -929,8 +776,8 @@
"from sklearn.metrics import accuracy_score, classification_report\n", "from sklearn.metrics import accuracy_score, classification_report\n",
"# 4. Validate on Dataset B\n", "# 4. Validate on Dataset B\n",
"from joblib import load\n", "from joblib import load\n",
"# svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n", "svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
"svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n", "# svm_model = load('D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib')\n",
"y_pred_svm = svm_model.predict(X1b)\n", "y_pred_svm = svm_model.predict(X1b)\n",
"\n", "\n",
"# 5. Evaluate\n", "# 5. Evaluate\n",