Merge pull request #107 from nuluh/exp/74-exp-cross-dataset-validation
Exp/74 exp cross dataset validation
This commit was merged in pull request #107.
This commit is contained in:
@@ -287,8 +287,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Define output directories for each sensor exported data\n",
|
"# Define output directories for each sensor exported data\n",
|
||||||
"output_dirs = {\n",
|
"output_dirs = {\n",
|
||||||
" 'sensor1': os.path.join(damage_base_path, 'sensor1'),\n",
|
" 'sensorA': os.path.join(damage_base_path, 'sensorA'),\n",
|
||||||
" 'sensor2': os.path.join(damage_base_path, 'sensor2')\n",
|
" 'sensorB': os.path.join(damage_base_path, 'sensorB')\n",
|
||||||
"}"
|
"}"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -305,7 +305,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"with multiprocessing.Pool() as pool:\n",
|
"with multiprocessing.Pool() as pool:\n",
|
||||||
" # Process each DAMAGE_X case in parallel\n",
|
" # Process each DAMAGE_X case in parallel\n",
|
||||||
" pool.map(process_damage_case, range(num_damage_cases), Fs, window_size, hop_size, output_dirs)"
|
" pool.map(process_damage_case, range(num_damage_cases))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -342,7 +342,7 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ready_data1a = []\n",
|
"ready_data1a: list[pd.DataFrame] = []\n",
|
||||||
"for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
|
"for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
|
||||||
" ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))"
|
" ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file), skiprows=1))"
|
||||||
]
|
]
|
||||||
@@ -354,7 +354,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Load the processed data for Sensor 2\n",
|
"# Load the processed data for Sensor 2\n",
|
||||||
"ready_data2a = []\n",
|
"ready_data2a: list[pd.DataFrame] = []\n",
|
||||||
"for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
|
"for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
|
||||||
" ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))"
|
" ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file), skiprows=1))"
|
||||||
]
|
]
|
||||||
@@ -414,6 +414,33 @@
|
|||||||
"import matplotlib.pyplot as plt"
|
"import matplotlib.pyplot as plt"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def preview_stft(data: pd.DataFrame, x_num_ticks: int = 6, y_num_ticks: int = 5):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Preview the Short-Time Fourier Transform (STFT) of the given data.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" -------\n",
|
||||||
|
" data (pd.DataFrame): The STFT data to be visualized.\n",
|
||||||
|
" x_num_ticks (int): Number of ticks on the x-axis (time frames). Defaults to 6.\n",
|
||||||
|
" y_num_ticks (int): Number of ticks on the y-axis (frequency bins). Defaults to 5.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" plt.figure(dpi=300) # Set figure size and DPI\n",
|
||||||
|
" plt.pcolormesh(data.transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
|
||||||
|
" # plt.title('STFT Preview')\n",
|
||||||
|
" plt.colorbar(label='Magnitude')\n",
|
||||||
|
" plt.xlabel('Segmen Waktu')\n",
|
||||||
|
" plt.ylabel('Sampel Frekuensi (Hz)')\n",
|
||||||
|
" plt.xticks(np.linspace(0, len(data)-1, x_num_ticks)) # Set x-ticks at regular intervals\n",
|
||||||
|
" plt.yticks(np.linspace(0, len(data.columns)-1, y_num_ticks)) # Set y-ticks at regular intervals\n",
|
||||||
|
" plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -422,10 +449,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
|
"# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
|
||||||
"# Plotting the STFT Data\n",
|
"# Plotting the STFT Data\n",
|
||||||
"plt.figure(dpi=300) # Set figure size and DPI\n",
|
"preview_stft(ready_data1a[0]) # Preview for Sensor 1"
|
||||||
"plt.pcolormesh(ready_data1a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
|
|
||||||
"plt.title('STFT of Sensor A Dataset A Label 0 Undamaged')\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -437,6 +461,7 @@
|
|||||||
"plt.figure(dpi=300) # Set figure size and DPI\n",
|
"plt.figure(dpi=300) # Set figure size and DPI\n",
|
||||||
"plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
|
"plt.pcolormesh(ready_data2a[0].transpose(), cmap='jet', vmax=0.03, vmin=0.0)\n",
|
||||||
"plt.title('STFT of Sensor B Dataset A Label 0 Undamaged')\n",
|
"plt.title('STFT of Sensor B Dataset A Label 0 Undamaged')\n",
|
||||||
|
"plt.savefig(\"stft-sensor-b-dataset-a-undamaged.png\", dpi=300)\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -789,8 +814,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from src.ml.model_selection import create_ready_data\n",
|
"from src.ml.model_selection import create_ready_data\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X1b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor1') # sensor A\n",
|
"X1b, y1 = create_ready_data('D:/thesis/data/converted/raw_B/sensor1') # sensor A\n",
|
||||||
"X2b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor2') # sensor B"
|
"X2b, y2 = create_ready_data('D:/thesis/data/converted/raw_B/sensor2') # sensor B"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -807,6 +832,17 @@
|
|||||||
"#### Sensor A"
|
"#### Sensor A"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Rename first column using proper pandas method\n",
|
||||||
|
"X1b = X1b.rename(columns={X1b.columns[0]: \"Freq_0.00\"})\n",
|
||||||
|
"X2b = X2b.rename(columns={X2b.columns[0]: \"Freq_0.00\"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -815,8 +851,27 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# 4. Sensor A Validate on Dataset B\n",
|
"# 4. Sensor A Validate on Dataset B\n",
|
||||||
"from joblib import load\n",
|
"from joblib import load\n",
|
||||||
"svm_model = load('D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib')\n",
|
"from sklearn.svm import SVC\n",
|
||||||
"y_pred_svm_1 = svm_model.predict_proba(X1b)"
|
"svm_model: SVC = load('D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"time_taken = np.array([])\n",
|
||||||
|
"for i in range(5): # Run multiple times to get an average time\n",
|
||||||
|
" start_time = time.time()\n",
|
||||||
|
" y_pred_svm_1 = svm_model.predict(X1b)\n",
|
||||||
|
" end_time = time.time()\n",
|
||||||
|
" time_taken = np.append(time_taken, end_time - start_time)\n",
|
||||||
|
"\n",
|
||||||
|
"print(time_taken)\n",
|
||||||
|
"print(time_taken.mean())\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -828,9 +883,7 @@
|
|||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Set NumPy to display full decimal values\n",
|
"# Set NumPy to display full decimal values\n",
|
||||||
"np.set_printoptions(suppress=True, precision=6) # Suppress scientific notation, set precision to 6 decimals\n",
|
"np.set_printoptions(suppress=True, precision=6) # Suppress scientific notation, set precision to 6 decimals"
|
||||||
"\n",
|
|
||||||
"y_pred_svm_1[1]"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -842,37 +895,14 @@
|
|||||||
"from sklearn.metrics import accuracy_score, classification_report\n",
|
"from sklearn.metrics import accuracy_score, classification_report\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# 5. Evaluate\n",
|
"# 5. Evaluate\n",
|
||||||
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_1))\n",
|
"print(\"Accuracy on Dataset B:\", accuracy_score(y1, y_pred_svm_1))\n",
|
||||||
"print(classification_report(y, y_pred_svm_1))"
|
"df = pd.DataFrame(classification_report(y1, y_pred_svm_1, output_dict=True)).T\n",
|
||||||
]
|
"# Round numbers nicely and move 'accuracy' into a row that fits your desired layout\n",
|
||||||
},
|
"df_rounded = df.round(2)\n",
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Confusion Matrix Sensor A"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"# Export to LaTeX\n",
|
||||||
"cm = confusion_matrix(y, y_pred_svm_1) # -> ndarray\n",
|
"latex_table = df_rounded.to_latex(index=True, float_format=\"%.2f\", caption=\"Classification report on Dataset B\", label=\"tab:clf_report_auto\")\n",
|
||||||
"\n",
|
"print(latex_table)"
|
||||||
"# get the class labels\n",
|
|
||||||
"labels = svm_model.classes_\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot\n",
|
|
||||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
|
|
||||||
"disp.plot(cmap=plt.cm.Blues) # You can change colormap\n",
|
|
||||||
"plt.title(\"Confusion Matrix of Sensor A Test on Dataset B\")\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -888,20 +918,34 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"svm_model = load('D:/thesis/models/sensor2/SVM.joblib')\n",
|
"# svm_model = load('D:/thesis/models/sensor2/SVM.joblib')\n",
|
||||||
"# svm_model = load('D:/thesis/models/sensor2/SVM with StandardScaler and PCA.joblib')\n",
|
"svm_model = load('D:/thesis/models/sensor2/SVM with StandardScaler and PCA.joblib')\n",
|
||||||
"y_pred_svm_2 = svm_model.predict(X2b)\n",
|
"y_pred_svm_2 = svm_model.predict(X2b)"
|
||||||
"\n",
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
"# 5. Evaluate\n",
|
"# 5. Evaluate\n",
|
||||||
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_2))\n",
|
"import pandas as pd\n",
|
||||||
"print(classification_report(y, y_pred_svm_2))"
|
"\n",
|
||||||
|
"df = pd.DataFrame(classification_report(y2, y_pred_svm_2, output_dict=True)).T\n",
|
||||||
|
"# Round numbers nicely and move 'accuracy' into a row that fits your desired layout\n",
|
||||||
|
"df_rounded = df.round(2)\n",
|
||||||
|
"\n",
|
||||||
|
"# Export to LaTeX\n",
|
||||||
|
"latex_table = df_rounded.to_latex(index=True, float_format=\"%.2f\", caption=\"Classification report on Dataset B\", label=\"tab:clf_report_auto\")\n",
|
||||||
|
"print(latex_table)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Confusion Matrix Sensor B"
|
"#### Confusion Matrix Sensor A and B"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -912,17 +956,54 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
|
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
|
||||||
|
"import numpy as np\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Create a fresh figure with subplots\n",
|
||||||
|
"fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cm = confusion_matrix(y, y_pred_svm_2) # -> ndarray\n",
|
"# Plot confusion matrix for Sensor A\n",
|
||||||
|
"cm_A = confusion_matrix(y, y_pred_svm_1)\n",
|
||||||
|
"disp_A = ConfusionMatrixDisplay(confusion_matrix=cm_A, display_labels=labels)\n",
|
||||||
|
"disp_A.plot(ax=axes[0], cmap=plt.cm.Blues)\n",
|
||||||
|
"axes[0].set_title(\"Sensor A\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# get the class labels\n",
|
"# Plot confusion matrix for Sensor B\n",
|
||||||
"labels = svm_model.classes_\n",
|
"cm_B = confusion_matrix(y, y_pred_svm_2)\n",
|
||||||
|
"disp_B = ConfusionMatrixDisplay(confusion_matrix=cm_B, display_labels=labels)\n",
|
||||||
|
"disp_B.plot(ax=axes[1], cmap=plt.cm.Blues)\n",
|
||||||
|
"axes[1].set_title(\"Sensor B\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Plot\n",
|
"# Find and modify colorbars to show max values\n",
|
||||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
|
"# The colorbars are typically the 3rd and 4th axes in the figure\n",
|
||||||
"disp.plot(cmap=plt.cm.Blues) # You can change colormap\n",
|
"for i, (cbar_idx, cm) in enumerate(zip([2, 3], [cm_A, cm_B])):\n",
|
||||||
"plt.title(\"Confusion Matrix of Sensor B Test on Dataset B\")\n",
|
" if cbar_idx < len(fig.axes):\n",
|
||||||
|
" cbar_ax = fig.axes[cbar_idx]\n",
|
||||||
|
" \n",
|
||||||
|
" # Get max value from the confusion matrix\n",
|
||||||
|
" max_val = cm.max()\n",
|
||||||
|
" \n",
|
||||||
|
" # Create a new set of ticks with reasonable spacing and ending with max_val\n",
|
||||||
|
" # For example, if max is around 2560, create ticks: [0, 500, 1000, 1500, 2000, 2560]\n",
|
||||||
|
" tick_interval = 500\n",
|
||||||
|
" new_ticks = list(range(0, int(max_val), tick_interval))\n",
|
||||||
|
" if np.isclose(new_ticks[-1], max_val, rtol=0.05):\n",
|
||||||
|
" new_ticks[-1] = max_val \n",
|
||||||
|
" else:\n",
|
||||||
|
" new_ticks.extend([max_val])\n",
|
||||||
|
" # Set the new ticks\n",
|
||||||
|
" cbar_ax.set_yticks(new_ticks)\n",
|
||||||
|
" \n",
|
||||||
|
" # Format tick labels as integers\n",
|
||||||
|
" # cbar_ax.set_yticklabels([f\"{int(t)}\" if t.is_integer() else f\"{t:.1f}\" for t in new_ticks])\n",
|
||||||
|
"\n",
|
||||||
|
"# Set SVG font rendering for better PDF output\n",
|
||||||
|
"plt.rcParams['svg.fonttype'] = 'none'\n",
|
||||||
|
"\n",
|
||||||
|
"# Adjust layout\n",
|
||||||
|
"plt.tight_layout()\n",
|
||||||
|
"\n",
|
||||||
|
"# Save and show\n",
|
||||||
|
"plt.savefig(\"output.svg\")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -949,6 +1030,51 @@
|
|||||||
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
|
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
|
||||||
"print(classification_report(y, y_pred_svm))"
|
"print(classification_report(y, y_pred_svm))"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Test with AU"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"file_path = 'D:/thesis/data/dataset_B/zzzBU.TXT'\n",
|
||||||
|
"df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df1= df.iloc[:, [1]]\n",
|
||||||
|
"df2 = df.iloc[:, [26]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from scipy.signal.windows import hann\n",
|
||||||
|
"from scipy.signal import stft\n",
|
||||||
|
"\n",
|
||||||
|
"window = 1024\n",
|
||||||
|
"hop = 512\n",
|
||||||
|
"\n",
|
||||||
|
"stft1 = stft(df1.values.flatten(), window=hann(window), nperseg=window, noverlap=hop, fs=window)\n",
|
||||||
|
"stft2 = stft(df2.values.flatten(), window=hann(window), nperseg=window, noverlap=hop, fs=window)\n"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
Reference in New Issue
Block a user