Revert "Add Zero-Padding to CSV Filenames"

2024-08-27 09:18:44 +07:00
12 changed files with 62 additions and 5237 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
 *.ipynb filter=nbstripout
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 # Ignore CSV files in the data directory and all its subdirectories
 data/**/*.csv
-.venv/
+
 *.pyc
--- a/.gitmessage
+++ b/.gitmessage
@@ -1,30 +0,0 @@
 # .gitmessage
 # <type>(<scope>): <subject>
 # |<----  Using a Maximum Of 50 Characters  ---->|
 # 
 # Explain the problem that this commit is solving. Focus on why you
 # are making this change as opposed to how. Use clear, concise language.
 # |<----   Try To Limit Each Line to a Maximum Of 72 Characters   ---->|
 #
 # -- COMMIT END --
 # Types:
 #   feat     (new feature)
 #   fix      (bug fix)
 #   refactor (refactoring code)
 #   style    (formatting, no code change)
 #   doc      (changes to documentation)
 #   test     (adding or refactoring tests)
 #   perf     (performance improvements)
 #   chore    (routine tasks, dependencies)
 #   exp      (experimental work/exploration)
 # 
 # Scope:
 #   latex    (changes to thesis LaTeX)
 #   src      (changes to Python source code)
 #   nb       (changes to notebooks)
 #   ml       (ML model specific changes)
 #   data     (data processing/preparation)
 #   viz      (visualization related)
 #   all      (changes spanning entire repository)
 # --------------------
--- a/code/notebooks/03_feature_extraction.ipynb
+++ b/code/notebooks/03_feature_extraction.ipynb
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
--- a/code/src/features/frequency_domain_features.py
+++ b/code/src/features/frequency_domain_features.py
@@ -1,192 +0,0 @@
 import numpy as np
 import pandas as pd
 from scipy.fft import fft, fftfreq
 def get_mean_freq(signal, frame_size, hop_length):
    mean = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        current_mean = np.sum(y)/frame_size
        mean.append(current_mean)
    return np.array(mean)
 def get_variance_freq(signal, frame_size, hop_length):
    var = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        current_var = (np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)
        var.append(current_var)
    return np.array(var)
 def get_third_freq(signal, frame_size, hop_length):
    third = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        current_third = (np.sum((y - (np.sum(y)/frame_size))**3))/(frame_size * (np.sqrt((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)))**3)
        third.append(current_third)
    return np.array(third)
 def get_forth_freq(signal, frame_size, hop_length):
    forth = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)
        forth.append(current_forth)
    return np.array(forth)
 def get_grand_freq(signal, frame_size, hop_length):
    grand = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_grand = np.sum(f * y)/np.sum(y)
        grand.append(current_grand)
    return np.array(grand)
 def get_std_freq(signal, frame_size, hop_length):
    std = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_std = np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)
        std.append(current_std)
    return np.array(std)
 def get_Cfactor_freq(signal, frame_size, hop_length):
    cfactor = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_cfactor = np.sqrt(np.sum(f**2 * y)/np.sum(y))
        cfactor.append(current_cfactor)
    return np.array(cfactor)
 def get_Dfactor_freq(signal, frame_size, hop_length):
    dfactor = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_dfactor = np.sqrt(np.sum(f**4 * y)/np.sum(f**2 * y))
        dfactor.append(current_dfactor)
    return np.array(dfactor)
 def get_Efactor_freq(signal, frame_size, hop_length):
    efactor = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_efactor = np.sqrt(np.sum(f**2 * y)/np.sqrt(np.sum(y) * np.sum(f**4 * y)))
        efactor.append(current_efactor)
    return np.array(efactor)
 def get_Gfactor_freq(signal, frame_size, hop_length):
    gfactor = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_gfactor = (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))/(np.sum(f * y)/np.sum(y))
        gfactor.append(current_gfactor)
    return np.array(gfactor)
 def get_third1_freq(signal, frame_size, hop_length):
    third1 = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_third1 = np.sum((f - (np.sum(f * y)/np.sum(y)))**3 * y)/(frame_size * (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))**3)
        third1.append(current_third1)
    return np.array(third1)
 def get_forth1_freq(signal, frame_size, hop_length):
    forth1 = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_forth1 = np.sum((f - (np.sum(f * y)/np.sum(y)))**4 * y)/(frame_size * (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))**4)
        forth1.append(current_forth1)
    return np.array(forth1)
 def get_Hfactor_freq(signal, frame_size, hop_length):
    hfactor = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_hfactor = np.sum(np.sqrt(abs(f - (np.sum(f * y)/np.sum(y)))) * y)/(frame_size * np.sqrt(np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)))
        hfactor.append(current_hfactor)
    return np.array(hfactor)
 def get_Jfactor_freq(signal, frame_size, hop_length):
    jfactor = []
    for i in range(0, len(signal), hop_length):
        L = len(signal[i:i+frame_size])
        y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
        f = np.fft.fftfreq (L,.1/25600)[:int(L/2)] 
        current_jfactor = np.sum(np.sqrt(abs(f - (np.sum(f * y)/np.sum(y)))) * y)/(frame_size * np.sqrt(np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)))
        jfactor.append(current_jfactor)
    return np.array(jfactor)
 class FrequencyFeatureExtractor:
    def __init__(self, data):
        # Assuming data is a numpy array
        self.x = data
        # Perform FFT and compute magnitude of frequency components
        self.frequency_spectrum = np.abs(fft(self.x))
        self.n = len(self.frequency_spectrum)
        self.mean_freq = np.mean(self.frequency_spectrum)
        self.variance_freq = np.var(self.frequency_spectrum)
        self.std_freq = np.std(self.frequency_spectrum)
        # Calculate the required frequency features
        self.features = self.calculate_features()
    def calculate_features(self):
        S_mu = self.mean_freq
        S_MAX = np.max(self.frequency_spectrum)
        S_SBP = np.sum(self.frequency_spectrum)
        S_Peak = np.max(self.frequency_spectrum)
        S_V = np.sum((self.frequency_spectrum - S_mu) ** 2) / (self.n - 1)
        S_Sigma = np.sqrt(S_V)
        S_Skewness = np.sum((self.frequency_spectrum - S_mu) ** 3) / (self.n * S_Sigma ** 3)
        S_Kurtosis = np.sum((self.frequency_spectrum - S_mu) ** 4) / (self.n * S_Sigma ** 4)
        S_RSPPB = S_Peak / S_mu
        return {
            'Mean of band Power Spectrum (S_mu)': S_mu,
            'Max of band power spectrum (S_MAX)': S_MAX,
            'Sum of total band power (S_SBP)': S_SBP,
            'Peak of band power (S_Peak)': S_Peak,
            'Variance of band power (S_V)': S_V,
            'Standard Deviation of band power (S_Sigma)': S_Sigma,
            'Skewness of band power (S_Skewness)': S_Skewness,
            'Kurtosis of band power (S_Kurtosis)': S_Kurtosis,
            'Relative Spectral Peak per Band Power (S_RSPPB)': S_RSPPB
        }
    def __repr__(self):
        result = "Frequency Domain Feature Extraction Results:\n"
        for feature, value in self.features.items():
            result += f"{feature}: {value:.4f}\n"
        return result
 def ExtractFrequencyFeatures(object):
    data = pd.read_csv(object, skiprows=1)  # Skip the header row separator char info
    extractor = FrequencyFeatureExtractor(data.iloc[:, 1].values)  # Assuming the data is in the second column
    features = extractor.features
    return features
 # Usage Example
 # extractor = FrequencyFeatureExtractor('path_to_your_data.csv')
 # print(extractor)
--- a/code/src/features/time_domain_features.py
+++ b/code/src/features/time_domain_features.py
@@ -36,12 +36,9 @@ class FeatureExtractor:
            result += f"{feature}: {value:.4f}\n"
        return result
-def ExtractTimeFeatures(object, absolute):
+def ExtractTimeFeatures(object):
    data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
-    if absolute:
+    extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
        extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
    else:
        extractor = FeatureExtractor(data.iloc[:, 1].values)
    features = extractor.features
    return features
    # Save features to a file
--- a/code/src/process_stft.py
+++ b/code/src/process_stft.py
@@ -1,115 +0,0 @@
 import os
 import pandas as pd
 import numpy as np
 from scipy.signal import stft, hann
 import glob
 import multiprocessing  # Added import for multiprocessing
 # Define the base directory where DAMAGE_X folders are located
 damage_base_path = 'D:/thesis/data/converted/raw'
 # Define output directories for each sensor
 output_dirs = {
    'sensor1': os.path.join(damage_base_path, 'sensor1'),
    'sensor2': os.path.join(damage_base_path, 'sensor2')
 }
 # Create output directories if they don't exist
 for dir_path in output_dirs.values():
    os.makedirs(dir_path, exist_ok=True)
 # Define STFT parameters
 window_size = 1024
 hop_size = 512
 window = hann(window_size)
 Fs = 1024
 # Number of damage cases (adjust as needed)
 num_damage_cases = 6  # Change to 30 if you have 30 damage cases
 # Number of test runs per damage case
 num_test_runs = 5
 # Function to perform STFT and return magnitude
 def compute_stft(vibration_data):
    frequencies, times, Zxx = stft(
        vibration_data, 
        fs=Fs, 
        window=window, 
        nperseg=window_size, 
        noverlap=window_size - hop_size
    )
    stft_magnitude = np.abs(Zxx)
    return stft_magnitude.T  # Transpose to have frequencies as columns
 def process_damage_case(damage_num):
    damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
    # Check if the damage folder exists
    if not os.path.isdir(damage_folder):
        print(f"Folder {damage_folder} does not exist. Skipping...")
        return
    # Process Sensor 1 and Sensor 2 separately
    for sensor_num in [1, 2]:
        aggregated_stft = []  # List to hold STFTs from all test runs
        # Iterate over all test runs
        for test_num in range(1, num_test_runs + 1):
            # Construct the filename based on sensor number
            # Sensor 1 corresponds to '_01', Sensor 2 corresponds to '_02'
            sensor_suffix = f'_0{sensor_num}'
            file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
            file_path = os.path.join(damage_folder, file_name)
            # Check if the file exists
            if not os.path.isfile(file_path):
                print(f"File {file_path} does not exist. Skipping...")
                continue
            # Read the CSV file
            try:
                df = pd.read_csv(file_path)
            except Exception as e:
                print(f"Error reading {file_path}: {e}. Skipping...")
                continue
            # Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
            if df.shape[1] != 2:
                print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
                continue
            # Extract vibration data (assuming the second column is sensor data)
            vibration_data = df.iloc[:, 1].values
            # Perform STFT
            stft_magnitude = compute_stft(vibration_data)
            # Convert STFT result to DataFrame
            df_stft = pd.DataFrame(
                stft_magnitude, 
                columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
            )
            # Append to the aggregated list
            aggregated_stft.append(df_stft)
        # Concatenate all STFT DataFrames vertically
        if aggregated_stft:
            df_aggregated = pd.concat(aggregated_stft, ignore_index=True)
            # Define output filename
            output_file = os.path.join(
                output_dirs[f'sensor{sensor_num}'], 
                f'stft_data{sensor_num}_{damage_num}.csv'
            )
            # Save the aggregated STFT to CSV
            df_aggregated.to_csv(output_file, index=False)
            print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
        else:
            print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")
 if __name__ == "__main__":  # Added main guard for multiprocessing
    with multiprocessing.Pool() as pool:
        pool.map(process_damage_case, range(1, num_damage_cases + 1))
--- a/code/src/verify_stft.py
+++ b/code/src/verify_stft.py
@@ -1,133 +0,0 @@
 import os
 import pandas as pd
 import numpy as np
 from scipy.signal import stft, hann
 import glob
 # Define the base directory where DAMAGE_X folders are located
 damage_base_path = 'D:/thesis/data/converted/raw/'
 # Define sensor directories
 sensor_dirs = {
    'sensor1': os.path.join(damage_base_path, 'sensor1'),
    'sensor2': os.path.join(damage_base_path, 'sensor2')
 }
 # Define STFT parameters
 window_size = 1024
 hop_size = 512
 window = hann(window_size)
 Fs = 1024
 def verify_stft(damage_num, test_num, sensor_num):
    """
    Verifies the STFT of an individual test run against the aggregated STFT data.
    Parameters:
    - damage_num (int): Damage case number.
    - test_num (int): Test run number.
    - sensor_num (int): Sensor number (1 or 2).
    """
    # Mapping sensor number to suffix
    sensor_suffix = f'_0{sensor_num}'
    # Construct the file name for the individual test run
    individual_file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
    individual_file_path = os.path.join(damage_base_path, f'DAMAGE_{damage_num}', individual_file_name)
    # Check if the individual file exists
    if not os.path.isfile(individual_file_path):
        print(f"File {individual_file_path} does not exist. Skipping verification for this test run.")
        return
    # Read the individual test run CSV
    try:
        df_individual = pd.read_csv(individual_file_path)
    except Exception as e:
        print(f"Error reading {individual_file_path}: {e}. Skipping verification for this test run.")
        return
    # Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
    if df_individual.shape[1] != 2:
        print(f"Unexpected number of columns in {individual_file_path}. Expected 2, got {df_individual.shape[1]}. Skipping.")
        return
    # Extract vibration data
    vibration_data = df_individual.iloc[:, 1].values
    # Perform STFT
    frequencies, times, Zxx = stft(
        vibration_data, 
        fs=Fs, 
        window=window, 
        nperseg=window_size, 
        noverlap=window_size - hop_size
    )
    # Compute magnitude and transpose
    stft_magnitude = np.abs(Zxx).T  # Shape: (513, 513)
    # Select random row indices to verify (e.g., 3 random rows)
    np.random.seed(42)  # For reproducibility
    sample_row_indices = np.random.choice(stft_magnitude.shape[0], size=3, replace=False)
    # Read the aggregated STFT CSV
    aggregated_file_name = f'stft_data{sensor_num}_{damage_num}.csv'
    aggregated_file_path = os.path.join(sensor_dirs[f'sensor{sensor_num}'], aggregated_file_name)
    if not os.path.isfile(aggregated_file_path):
        print(f"Aggregated file {aggregated_file_path} does not exist. Skipping verification for this test run.")
        return
    try:
        df_aggregated = pd.read_csv(aggregated_file_path)
    except Exception as e:
        print(f"Error reading {aggregated_file_path}: {e}. Skipping verification for this test run.")
        return
    # Calculate the starting row index in the aggregated CSV
    # Each test run contributes 513 rows
    start_row = (test_num - 1) * 513
    end_row = start_row + 513  # Exclusive
    # Ensure the aggregated CSV has enough rows
    if df_aggregated.shape[0] < end_row:
        print(f"Aggregated file {aggregated_file_path} does not have enough rows for Test {test_num}. Skipping.")
        return
    # Extract the corresponding STFT block from the aggregated CSV
    df_aggregated_block = df_aggregated.iloc[start_row:end_row].values  # Shape: (513, 513)
    # Compare selected rows
    all_match = True
    for row_idx in sample_row_indices:
        individual_row = stft_magnitude[row_idx]
        aggregated_row = df_aggregated_block[row_idx]
        # Check if the rows are almost equal within a tolerance
        if np.allclose(individual_row, aggregated_row, atol=1e-6):
            verification_status = "MATCH"
        else:
            verification_status = "MISMATCH"
            all_match = False
        # Print the comparison details
        print(f"Comparing Damage {damage_num}, Test {test_num}, Sensor {sensor_num}, Row {row_idx}: {verification_status}")
        print(f"Individual STFT Row {row_idx}: {individual_row[:5]} ... {individual_row[-5:]}")
        print(f"Aggregated STFT Row {row_idx + start_row}: {aggregated_row[:5]} ... {aggregated_row[-5:]}\n")
    # If all sampled rows match, print a verification success message
    if all_match:
        print(f"STFT of DAMAGE_{damage_num}_TEST{test_num}_{sensor_num}.csv is verified. On `stft_data{sensor_num}_{damage_num}.csv` start at rows {start_row} to {end_row} with 513 rows.\n")
    else:
        print(f"STFT of DAMAGE_{damage_num}_TEST{test_num}_{sensor_num}.csv has discrepancies in `stft_data{sensor_num}_{damage_num}.csv` start at rows {start_row} to {end_row} with 513 rows.\n")
 # Define the number of damage cases and test runs
 num_damage_cases = 6  # Adjust to 30 as per your dataset
 num_test_runs = 5
 # Iterate through all damage cases, test runs, and sensors
 for damage_num in range(1, num_damage_cases + 1):
    for test_num in range(1, num_test_runs + 1):
        for sensor_num in [1, 2]:
            verify_stft(damage_num, test_num, sensor_num)
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -1,68 +0,0 @@
 import pandas as pd
 import os
 import sys
 from colorama import Fore, Style, init
 def create_damage_files(base_path, output_base, prefix):
    # Initialize colorama
    init(autoreset=True)
    # Generate column labels based on expected duplication in input files
    columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)]  # Explicitly setting column names
    sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'}
    # Define the damage scenarios and the corresponding original file indices
    damage_scenarios = {
        1: range(1, 6),  # Damage 1 files from zzzAD1.csv to zzzAD5.csv
        2: range(6, 11),  # Damage 2 files from zzzAD6.csv to zzzAD10.csv
        3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
        4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
        5: range(21, 26),  # Damage 5 files from zzzAD21.csv to zzzAD25.csv
        6: range(26, 31)  # Damage 6 files from zzzAD26.csv to zzzAD30.csv
    }
    damage_pad = len(str(len(damage_scenarios)))
    test_pad = len(str(30))
    for damage, files in damage_scenarios.items():
        for i, file_index in enumerate(files, start=1):
            # Load original data file
            file_path = os.path.join(base_path, f'zzz{prefix}D{file_index}.TXT')
            df = pd.read_csv(file_path, sep='\t', skiprows=10)  # Read with explicit column names
            top_sensor = columns[i-1]
            print(top_sensor, type(top_sensor))
            output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'DAMAGE{damage}_TEST{i}_01.csv')
            print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
            print("Taking datetime column on index 0...")
            print(f"Taking `{top_sensor}`...")
            df[['Time', top_sensor]].to_csv(output_file_1, index=False)
            print(Fore.GREEN + "Done")
            bottom_sensor = sensor_end_map[i]
            output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'DAMAGE{damage}_TEST{i}_02.csv')
            print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
            print("Taking datetime column on index 0...")
            print(f"Taking `{bottom_sensor}`...")
            df[['Time', bottom_sensor]].to_csv(output_file_2, index=False)
            print(Fore.GREEN + "Done")
            print("---")
 def main():
    if len(sys.argv) < 2:
        print("Usage: python convert.py <path_to_csv_files>")
        sys.exit(1)
    base_path = sys.argv[1]
    output_base = sys.argv[2]
    prefix = sys.argv[3]  # Define output directory
    # Create output folders if they don't exist
    for i in range(1, 5):
        os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
    create_damage_files(base_path, output_base, prefix)
    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
 if __name__ == "__main__":
    main()
--- a/data/processed/README.md
+++ b/data/processed/README.md
@@ -1,8 +1,8 @@
-# Raw Data Directory
+# Processed Data Directory
 ## Overview
-This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
+This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
 ## Directory Structure
--- a/generate_dummy_data.py
+++ b/generate_dummy_data.py
@@ -13,23 +13,14 @@ processed_path = os.path.join(base_path, "processed")
 os.makedirs(raw_path, exist_ok=True)
 os.makedirs(processed_path, exist_ok=True)
-# Define the number of zeros to pad
+for damage in range(1, 6):  # 5 Damage levels
-num_damages = 5
+    damage_folder = f"DAMAGE_{damage}"
-num_tests = 10
+    damage_path = os.path.join(processed_path, damage_folder)
 num_sensors = 2
 damage_pad = len(str(num_damages))
 test_pad = len(str(num_tests))
 sensor_pad = len(str(num_sensors))
 for damage in range(1, num_damages + 1):  # 5 Damage levels starts from 1
    damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
    damage_path = os.path.join(raw_path, damage_folder)
    os.makedirs(damage_path, exist_ok=True)
    for test in range(1, 11):  # 10 Tests per damage level
        for sensor in range(1, 3):  # 2 Sensors per test
        # Filename for the CSV
-            csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}_{sensor:0{sensor_pad}}.csv"
+        csv_filename = f"D{damage}_TEST{test}.csv"
        csv_path = os.path.join(damage_path, csv_filename)
        # Generate dummy data