From 0e28ed6dd08812fd6ae110a46699bc5e2d23b25a Mon Sep 17 00:00:00 2001 From: nuluh Date: Mon, 28 Jul 2025 16:41:54 +0700 Subject: [PATCH 1/7] feat(notebooks): add cross-dataset validation for Sensor A and Sensor B models Closes #74 --- code/notebooks/stft.ipynb | 188 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index ceb9176..4510922 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -938,6 +938,194 @@ "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n", "print(classification_report(y, y_pred_svm))" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cross Dataset Validation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "# sensor A\n", + "x_train1, x_test1, y_train1, y_test1 = train_test_split(X1b, y, test_size=0.2, random_state=2)\n", + "# sensor B\n", + "x_train2, x_test2, y_train2, y_test2 = train_test_split(X2b, y, test_size=0.2, random_state=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results_sensor1 = []\n", + "for name, model in models_sensor1.items():\n", + " res = train_and_evaluate_model(model, name, \"Sensor A\", x_train1, y_train1, x_test1, y_test1, \n", + " export='D:/thesis/datasetB/models/Sensor A')\n", + " results_sensor1.append(res)\n", + " print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n", + "\n", + "# Display result\n", + "results_sensor1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results_sensor2 = []\n", + "for name, model in models_sensor2.items():\n", + " res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train2, x_test2, y_test2, \n", + " export='D:/thesis/datasetB/models/sensor2')\n", + " results_sensor2.append(res)\n", + " print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n", + "\n", + "# Display result\n", + "results_sensor2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Sensor A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 4. Sensor A Validate on Dataset A\n", + "from joblib import load\n", + "svm_model = load('D:/thesis/datasetB/models/sensor1/SVM.joblib')\n", + "y_pred_svm_1 = svm_model.predict(X1a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score, classification_report\n", + "\n", + "# 5. Evaluate\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_1))\n", + "print(classification_report(y, y_pred_svm_1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Confusion Matrix Sensor A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "\n", + "\n", + "cm = confusion_matrix(y, y_pred_svm_1) # -> ndarray\n", + "\n", + "# get the class labels\n", + "labels = svm_model.classes_\n", + "\n", + "# Plot\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", + "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", + "plt.title(\"Confusion Matrix of Sensor A Test on Dataset B\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Sensor B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "svm_model = load('D:/thesis/datasetB/models/sensor2/SVM.joblib')\n", + "# svm_model = load('D:/thesis/models/sensor2/SVM with StandardScaler and PCA.joblib')\n", + "y_pred_svm_2 = svm_model.predict(X2a)\n", + "\n", + "# 5. Evaluate\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm_2))\n", + "print(classification_report(y, y_pred_svm_2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Confusion Matrix Sensor B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "\n", + "\n", + "cm = confusion_matrix(y, y_pred_svm_2) # -> ndarray\n", + "\n", + "# get the class labels\n", + "labels = svm_model.classes_\n", + "\n", + "# Plot\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", + "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", + "plt.title(\"Confusion Matrix of Sensor B Test on Dataset B\")\n", + "plt.show()" + ] } ], "metadata": { From 860542f3f9d49d38a8bb325ac43db50fab914b5e Mon Sep 17 00:00:00 2001 From: nuluh Date: Sun, 10 Aug 2025 20:02:45 +0700 Subject: [PATCH 2/7] refactor(src): restructure compute_stft function to be pure function and include return parameters and improve clarity --- code/src/process_stft.py | 42 +++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/code/src/process_stft.py b/code/src/process_stft.py index 79ab276..e100a8d 100644 --- a/code/src/process_stft.py +++ b/code/src/process_stft.py @@ -1,7 +1,8 @@ import os import pandas as pd import numpy as np -from scipy.signal import stft, hann +from scipy.signal import stft +from scipy.signal.windows import hann import glob import multiprocessing # Added import for multiprocessing @@ -19,27 +20,32 @@ for dir_path in output_dirs.values(): os.makedirs(dir_path, exist_ok=True) # Define STFT parameters -window_size = 1024 -hop_size = 512 -window = hann(window_size) -Fs = 1024 # Number of damage cases (adjust as needed) num_damage_cases = 0 # Change to 30 if you have 30 damage cases # Function to perform STFT and return magnitude -def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size): - frequencies, times, Zxx = stft( - vibration_data, - fs=Fs, - window=window, - nperseg=window_size, - noverlap=window_size - hop_size - ) - stft_magnitude = np.abs(Zxx) - return stft_magnitude.T # Transpose to have frequencies as columns +def compute_stft(vibration_data, return_param=False): + window_size = 1024 + hop_size = 512 + window = hann(window_size) + Fs = 1024 -def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs): + frequencies, times, Zxx = stft( + vibration_data, + fs=Fs, + window=window, + nperseg=window_size, + noverlap=window_size - hop_size + ) + stft_magnitude = np.abs(Zxx) + + if return_param: + return stft_magnitude.T, [window_size, hop_size, Fs] # Transpose to have frequencies as columns + else: + return stft_magnitude.T + +def process_damage_case(damage_num, Fs=Fs,): damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}') if damage_num == 0: # Number of test runs per damage case @@ -83,8 +89,8 @@ def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop vibration_data = df.iloc[:, 1].values # Perform STFT - stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size) - + stft_magnitude, (window_size, hop_size, Fs) = compute_stft(vibration_data, return_param=True) + # Convert STFT result to DataFrame df_stft = pd.DataFrame( stft_magnitude, From a8288b1426fe3a27fa947dc48a51b1e0070eebf2 Mon Sep 17 00:00:00 2001 From: nuluh Date: Mon, 11 Aug 2025 13:15:48 +0700 Subject: [PATCH 3/7] refactor(src): enhance compute_stft function with type hints, improved documentation by moving column renaming process from `process_damage_case` to `compute_stft` --- code/src/process_stft.py | 47 ++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/code/src/process_stft.py b/code/src/process_stft.py index e100a8d..113c977 100644 --- a/code/src/process_stft.py +++ b/code/src/process_stft.py @@ -5,6 +5,7 @@ from scipy.signal import stft from scipy.signal.windows import hann import glob import multiprocessing # Added import for multiprocessing +from typing import Union, Tuple # Define the base directory where DAMAGE_X folders are located damage_base_path = 'D:/thesis/data/converted/raw' @@ -22,10 +23,31 @@ for dir_path in output_dirs.values(): # Define STFT parameters # Number of damage cases (adjust as needed) -num_damage_cases = 0 # Change to 30 if you have 30 damage cases +num_damage_cases = 6 # Change to 30 if you have 30 damage cases # Function to perform STFT and return magnitude -def compute_stft(vibration_data, return_param=False): +def compute_stft(vibration_data: np.ndarray, return_param: bool = False) -> Union[pd.DataFrame, Tuple[pd.DataFrame, list[int, int, int]]]: + """ + Computes the Short-Time Fourier Transform (STFT) magnitude of the input vibration data. + + Parameters + ---------- + vibration_data : numpy.ndarray + The input vibration data as a 1D NumPy array. + return_param : bool, optional + If True, the function returns additional STFT parameters (window size, hop size, and sampling frequency). + Defaults to False. + + Returns + ------- + pd.DataFrame + The transposed STFT magnitude, with frequencies as columns, if `return_param` is False. + tuple + If `return_param` is True, returns a tuple containing: + - pd.DataFrame: The transposed STFT magnitude, with frequencies as columns. + - list[int, int, int]: A list of STFT parameters [window_size, hop_size, Fs]. + """ + window_size = 1024 hop_size = 512 window = hann(window_size) @@ -40,12 +62,18 @@ def compute_stft(vibration_data, return_param=False): ) stft_magnitude = np.abs(Zxx) + # Convert STFT result to DataFrame + df_stft = pd.DataFrame( + stft_magnitude.T, + columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])] + ) + # breakpoint() if return_param: - return stft_magnitude.T, [window_size, hop_size, Fs] # Transpose to have frequencies as columns + return df_stft, [window_size, hop_size, Fs] else: - return stft_magnitude.T + return df_stft -def process_damage_case(damage_num, Fs=Fs,): +def process_damage_case(damage_num): damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}') if damage_num == 0: # Number of test runs per damage case @@ -89,13 +117,8 @@ def process_damage_case(damage_num, Fs=Fs,): vibration_data = df.iloc[:, 1].values # Perform STFT - stft_magnitude, (window_size, hop_size, Fs) = compute_stft(vibration_data, return_param=True) + df_stft = compute_stft(vibration_data) - # Convert STFT result to DataFrame - df_stft = pd.DataFrame( - stft_magnitude, - columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])] - ) # only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features if damage_num == 0: print(f"Processing damage_num = 0, test_num = {test_num}") @@ -130,4 +153,4 @@ def process_damage_case(damage_num, Fs=Fs,): if __name__ == "__main__": # Added main guard for multiprocessing with multiprocessing.Pool() as pool: - pool.map(process_damage_case, range(0, num_damage_cases + 1)) + pool.map(process_damage_case, range(num_damage_cases + 1)) From 9f23d82fabee8d9b1694e8aeccb1893a8040f7da Mon Sep 17 00:00:00 2001 From: nuluh Date: Mon, 11 Aug 2025 13:17:46 +0700 Subject: [PATCH 4/7] fix(src): correct file writing method in process.stft.process_damage_case function to fix incorrect first column name Closes #104 --- code/src/process_stft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/src/process_stft.py b/code/src/process_stft.py index 113c977..a0cdaf2 100644 --- a/code/src/process_stft.py +++ b/code/src/process_stft.py @@ -146,7 +146,7 @@ def process_damage_case(damage_num): # Save the aggregated STFT to CSV with open(output_file, 'w') as file: file.write('sep=,\n') - df_aggregated.to_csv(output_file, index=False) + df_aggregated.to_csv(file, index=False) print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}") else: print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.") From 274cd60d27478070a3c2064ca7f980f912ad3620 Mon Sep 17 00:00:00 2001 From: nuluh Date: Mon, 11 Aug 2025 18:49:41 +0700 Subject: [PATCH 5/7] refactor(src): update generate_df_tuples function signature to include type hints for better clarity --- code/src/data_preprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/src/data_preprocessing.py b/code/src/data_preprocessing.py index 59db93e..340c706 100644 --- a/code/src/data_preprocessing.py +++ b/code/src/data_preprocessing.py @@ -35,8 +35,8 @@ def complement_pairs(n, prefix, extension): if a != orig_a: # skip original a yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples -def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset, - group_size=5, special_groups=None, group=True): +def generate_df_tuples(prefix: str, total_dfs: int=30, extension: str="TXT", first_col_start: int=1, last_col_offset: int=25, + group_size: int=5, special_groups: list=None, group: bool=True): """ Generate a structured list of tuples containing DataFrame references and column indices. From 4a1c0ed83e348e26d530c1c252d21c329fd4e18d Mon Sep 17 00:00:00 2001 From: nuluh Date: Sun, 17 Aug 2025 22:21:17 +0700 Subject: [PATCH 6/7] feat(src): implement inference function with damage probability calculations and visualization Closes #103 --- code/src/ml/inference.py | 200 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 187 insertions(+), 13 deletions(-) diff --git a/code/src/ml/inference.py b/code/src/ml/inference.py index 88a7b96..4ed103a 100644 --- a/code/src/ml/inference.py +++ b/code/src/ml/inference.py @@ -1,16 +1,190 @@ -from src.ml.model_selection import inference_model from joblib import load +import pandas as pd +from src.data_preprocessing import * +from src.process_stft import compute_stft +from typing import List, Tuple +from sklearn.base import BaseEstimator +import json -x = 30 -file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT" -sensor = 1 -model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib", - "SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib", - "XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"} +def probability_damage(pred: Tuple[np.ndarray, np.ndarray], model_classes: BaseEstimator, percentage=False) -> Dict[str, int]: + """ + Process the prediction output to return unique labels and their counts. + """ + labels, counts = np.unique(pred, return_counts=True) + label_counts = dict(zip(labels, counts)) -index = ((x-1) % 5) + 1 -inference_model(model["SVM"], file, column_question=index) -print("---") -inference_model(model["SVM with PCA"], file, column_question=index) -print("---") -inference_model(model["XGBoost"], file, column_question=index) \ No newline at end of file + # init all models classes probability of damage with 0 in dictionary + pod: Dict[np.ndarray, int] = dict.fromkeys(model_classes.classes_, 0) + + # update corresponding data + pod.update(label_counts) + + # turn the value into ratio instead of prediction counts + for label, count in pod.items(): + + ratio: float = count/np.sum(counts) + + if percentage: + pod[label] = ratio * 100 + else: + pod[label] = ratio + return pod + +def convert_keys_to_strings(obj): + """ + Recursively convert all dictionary keys to strings. + """ + if isinstance(obj, dict): + return {str(key): convert_keys_to_strings(value) for key, value in obj["data"].items()} + elif isinstance(obj, list): + return [convert_keys_to_strings(item) for item in obj["data"]] + else: + return obj + +def inference(model_sensor_A_path: str, model_sensor_B_path: str, file_path: str): + + # Generate column indices + column_index: List[Tuple[int, int]] = [ + (i + 1, i + 26) + for i in range(5) + ] + # Load a single case data + df: pd.DataFrame = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True) + # Take case name + case_name: str = file_path.split("/")[-1].split(".")[0] + # Extract relevant columns for each sensor + column_data: List[Tuple[pd.Series[float], pd.Series[float]]] = [ + (df.iloc[:, i[0]], df.iloc[:, i[1]]) + for i in column_index + ] + + column_data_stft: List[Tuple[pd.DataFrame, pd.DataFrame]] = [ + (compute_stft(sensor_A), compute_stft(sensor_B)) + for (sensor_A, sensor_B) in column_data + ] + + # Load the model + model_sensor_A = load(model_sensor_A_path) + model_sensor_B = load(model_sensor_B_path) + + res = {} + + for i, (stft_A, stft_B) in enumerate(column_data_stft): + # Make predictions using the model + pred_A: list[int] = model_sensor_A.predict(stft_A) + pred_B: list[int] = model_sensor_B.predict(stft_B) + + + percentage_A = probability_damage(pred_A, model_sensor_A) + percentage_B = probability_damage(pred_B, model_sensor_B) + + + res[f"Column_{i+1}"] = { + "Sensor_A": { + # "Predictions": pred_A, + "PoD": percentage_A + }, + "Sensor_B": { + # "Predictions": pred_B, + "PoD": percentage_B + } + } + final_res = {"data": res, "case": case_name} + return final_res + +def heatmap(result, damage_classes: list[int] = [1, 2, 3, 4, 5, 6]): + from scipy.interpolate import RectBivariateSpline + resolution = 300 + y = list(range(1, len(damage_classes)+1)) + + # length of column + x = list(range(len(result["data"]))) + + # X, Y = np.meshgrid(x, y) + Z = [] + for _, column_data in result["data"].items(): + sensor_a_pod = column_data['Sensor_A']['PoD'] + Z.append([sensor_a_pod.get(cls, 0) for cls in damage_classes]) + Z = np.array(Z).T + + y2 = np.linspace(1, len(damage_classes), resolution) + x2 = np.linspace(0,4,resolution) + f = RectBivariateSpline(x, y, Z.T, kx=2, ky=2) # 2nd degree quadratic spline interpolation + + Z2 = f(x2, y2).T.clip(0, 1) # clip to ignores negative values from cubic interpolation + + X2, Y2 = np.meshgrid(x2, y2) + # breakpoint() + c = plt.pcolormesh(X2, Y2, Z2, cmap='jet', shading='auto') + + # Add a colorbar + plt.colorbar(c, label='Probability of Damage (PoD)') + plt.gca().invert_xaxis() + plt.grid(True, linestyle='-', alpha=0.7) + plt.xticks(np.arange(int(X2.min()), int(X2.max())+1, 1)) + plt.xlabel("Column Index") + plt.ylabel("Damage Index") + plt.title(result["case"]) + # plt.xticks(ticks=x2, labels=[f'Col_{i+1}' for i in range(len(result))]) + # plt.gca().xaxis.set_major_locator(MultipleLocator(65/4)) + plt.show() + +if __name__ == "__main__": + import matplotlib.pyplot as plt + import json + from scipy.interpolate import UnivariateSpline + + + result = inference( + "D:/thesis/models/Sensor A/SVM with StandardScaler and PCA.joblib", + "D:/thesis/models/Sensor B/SVM with StandardScaler and PCA.joblib", + "D:/thesis/data/dataset_B/zzzBD19.TXT" + ) + + # heatmap(result) + # Convert all keys to strings before dumping to JSON + # result_with_string_keys = convert_keys_to_strings(result) + # print(json.dumps(result_with_string_keys, indent=4)) + + # Create a 5x2 subplot grid (5 rows for each column, 2 columns for sensors) + fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(5, 50)) + + # # Define damage class labels for x-axis + damage_classes = [1, 2, 3, 4, 5, 6] + + # # Loop through each column in the data + for row_idx, (column_name, column_data) in enumerate(result['data'].items()): + # Plot Sensor A in the first column of subplots + sensor_a_pod = column_data['Sensor_A']['PoD'] + x_values = list(range(len(damage_classes))) + y_values = [sensor_a_pod.get(cls, 0) for cls in damage_classes] + + # x2 = np.linspace(1, 6, 100) + # interp = UnivariateSpline(x_values, y_values, s=0) + axes[row_idx, 0].plot(x_values, y_values, '-', linewidth=2, markersize=8) + axes[row_idx, 0].set_title(f"{column_name} - Sensor A", fontsize=10) + axes[row_idx, 0].set_xticks(x_values) + axes[row_idx, 0].set_xticklabels(damage_classes) + axes[row_idx, 0].set_ylim(0, 1.05) + axes[row_idx, 0].set_ylabel('Probability') + axes[row_idx, 0].set_xlabel('Damage Class') + axes[row_idx, 0].grid(True, linestyle='-', alpha=0.5) + + # Plot Sensor B in the second column of subplots + sensor_b_pod = column_data['Sensor_B']['PoD'] + y_values = [sensor_b_pod.get(cls, 0) for cls in damage_classes] + axes[row_idx, 1].plot(x_values, y_values, '-', linewidth=2, markersize=8) + axes[row_idx, 1].set_title(f"{column_name} - Sensor B", fontsize=10) + axes[row_idx, 1].set_xticks(x_values) + axes[row_idx, 1].set_xticklabels(damage_classes) + axes[row_idx, 1].set_ylim(0, 1.05) + axes[row_idx, 1].set_ylabel('Probability') + axes[row_idx, 1].set_xlabel('Damage Class') + axes[row_idx, 1].grid(True, linestyle='-', alpha=0.5) + + # Adjust layout to prevent overlap + fig.tight_layout(rect=[0, 0, 1, 0.96]) # Leave space for suptitle + plt.subplots_adjust(hspace=1, wspace=0.3) # Adjust spacing between subplots + plt.suptitle(f"Case {result['case']}", fontsize=16, y=0.98) # Adjust suptitle position + plt.show() + From 855114d6334d883e341211ece2f44ba6deeb282f Mon Sep 17 00:00:00 2001 From: nuluh Date: Sun, 17 Aug 2025 23:39:57 +0700 Subject: [PATCH 7/7] refactor(notebooks): clean up imports, adjust damage case processing, and improve model training structure - Removed unnecessary imports (os, pandas, numpy) from the STFT notebook. - Adjusted the number of damage cases in the multiprocessing pool to correctly reflect the range. - Updated model training code for Sensor B to ensure consistent naming and structure. - Cleaned up commented-out code for clarity and maintainability. --- code/notebooks/stft.ipynb | 68 ++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index 4510922..9ea2f18 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -217,9 +217,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import pandas as pd\n", - "import numpy as np\n", "from scipy.signal import hann\n", "import multiprocessing" ] @@ -244,16 +241,6 @@ "Fs = 1024" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define the base directory where DAMAGE_X folders are located\n", - "damage_base_path = 'D:/thesis/data/converted/raw'" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -304,11 +291,11 @@ "# Import custom in-house functions\n", "from src.process_stft import process_damage_case\n", "\n", - "num_damage_cases = 7 # DAMAGE_0 to DAMAGE_6\n", + "num_damage_cases = 6 # DAMAGE_0 to DAMAGE_6\n", "\n", "with multiprocessing.Pool() as pool:\n", " # Process each DAMAGE_X case in parallel\n", - " pool.map(process_damage_case, range(num_damage_cases), Fs, window_size, hop_size, output_dirs)" + " pool.map(process_damage_case, range(num_damage_cases + 1))" ] }, { @@ -665,24 +652,33 @@ " # \"Decision Tree\": DecisionTreeClassifier(),\n", " # \"KNN\": KNeighborsClassifier(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n", - " # \"SVM\": make_pipeline(\n", - " # StandardScaler(),\n", - " # SVC(kernel='rbf', probability=True)\n", - " # ),\n", - " # \"SVM with StandardScaler and PCA\": make_pipeline(\n", - " # StandardScaler(),\n", - " # PCA(n_components=10),\n", - " # SVC(kernel='rbf')\n", - " # ),\n", + " \"SVM\": make_pipeline(\n", + " StandardScaler(),\n", + " SVC(kernel='rbf')\n", + " ),\n", + " \"SVM with StandardScaler and PCA\": make_pipeline(\n", + " StandardScaler(),\n", + " PCA(n_components=10),\n", + " SVC(kernel='rbf')\n", + " ),\n", "\n", " # \"XGBoost\": XGBClassifier()\n", - " \"MLPClassifier\": make_pipeline(\n", - " StandardScaler(),\n", - " MLPClassifier(hidden_layer_sizes=(1, 10), max_iter=500, random_state=42)\n", - " )\n", + " # \"MLPClassifier\": make_pipeline(\n", + " # StandardScaler(),\n", + " # MLPClassifier(hidden_layer_sizes=(1, 10), max_iter=500, random_state=42)\n", + " # )\n", "}" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_train1" + ] + }, { "cell_type": "code", "execution_count": null, @@ -712,9 +708,15 @@ " # \"Decision Tree\": DecisionTreeClassifier(),\n", " # \"KNN\": KNeighborsClassifier(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n", - " \"SVM\": SVC(),\n", - " # \"SVM with StandardScaler and PCA\": make_pipeline(\n", - " # StandardScaler(),\n", + " \"SVM\": make_pipeline(\n", + " StandardScaler(),\n", + " SVC(kernel='rbf')\n", + " ),\n", + " \"SVM with StandardScaler and PCA\": make_pipeline(\n", + " StandardScaler(),\n", + " PCA(n_components=10),\n", + " SVC(kernel='rbf')\n", + " ),\n", " # PCA(n_components=10),\n", " # SVC(kernel='rbf')\n", " # ),\n", @@ -730,8 +732,8 @@ "source": [ "results_sensor2 = []\n", "for name, model in models_sensor2.items():\n", - " res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train2, x_test2, y_test2, \n", - " export='D:/thesis/models/sensor2')\n", + " res = train_and_evaluate_model(model, name, \"Sensor B\", x_train2, y_train2, x_test2, y_test2, \n", + " export='D:/thesis/models/Sensor B')\n", " results_sensor2.append(res)\n", " print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n", "\n",