diff --git a/code/src/data_preprocessing.py b/code/src/data_preprocessing.py index 340c706..23b2e54 100644 --- a/code/src/data_preprocessing.py +++ b/code/src/data_preprocessing.py @@ -4,7 +4,7 @@ import re import sys import numpy as np from colorama import Fore, Style, init -from typing import TypedDict, Dict, List +from typing import TypedDict, Tuple, List from joblib import load from pprint import pprint @@ -35,8 +35,8 @@ def complement_pairs(n, prefix, extension): if a != orig_a: # skip original a yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples -def generate_df_tuples(prefix: str, total_dfs: int=30, extension: str="TXT", first_col_start: int=1, last_col_offset: int=25, - group_size: int=5, special_groups: list=None, group: bool=True): +def generate_df_tuples(prefix: str, extension: str="TXT", first_col_start: int=1, last_col_offset: int=25, + group_size: int=5, special_groups: list=None, group: bool=True, undamage_file: str=None) -> List[Tuple[str, List[int]]]: """ Generate a structured list of tuples containing DataFrame references and column indices. @@ -78,6 +78,12 @@ def generate_df_tuples(prefix: str, total_dfs: int=30, extension: str="TXT", fir # Add special groups at specified positions (other than beginning) if special_groups: result.insert(0, special_groups) + if undamage_file: + for i in range (1, 6): + n = 5 + i + bottom_end = i + top_end = bottom_end + 25 + result[0].append((undamage_file, [bottom_end, top_end])) return result @@ -354,4 +360,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/code/src/process_stft.py b/code/src/process_stft.py index a0cdaf2..196194a 100644 --- a/code/src/process_stft.py +++ b/code/src/process_stft.py @@ -8,7 +8,7 @@ import multiprocessing # Added import for multiprocessing from typing import Union, Tuple # Define the base directory where DAMAGE_X folders are located -damage_base_path = 'D:/thesis/data/converted/raw' +damage_base_path = 'D:/thesis/data/converted/raw_B' # Define output directories for each sensor output_dirs = { @@ -52,7 +52,7 @@ def compute_stft(vibration_data: np.ndarray, return_param: bool = False) -> Unio hop_size = 512 window = hann(window_size) Fs = 1024 - + frequencies, times, Zxx = stft( vibration_data, fs=Fs, @@ -77,7 +77,7 @@ def process_damage_case(damage_num): damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}') if damage_num == 0: # Number of test runs per damage case - num_test_runs = 120 + num_test_runs = 125 else: num_test_runs = 5 # Check if the damage folder exists @@ -122,12 +122,12 @@ def process_damage_case(damage_num): # only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features if damage_num == 0: print(f"Processing damage_num = 0, test_num = {test_num}") - if test_num <= 45: - df_stft = df_stft.iloc[:22, :] - print(f"Reduced df_stft shape (21 samples): {df_stft.shape}") + if test_num <= 60: + df_stft = df_stft.iloc[:20, :] + print(f"Reduced df_stft shape (20 samples): {df_stft.shape}") else: df_stft = df_stft.iloc[:21, :] - print(f"Reduced df_stft shape (22 samples): {df_stft.shape}") + print(f"Reduced df_stft shape (21 samples): {df_stft.shape}") # Append to the aggregated list aggregated_stft.append(df_stft)