thesis/code/src/process_stft.py

import os
import pandas as pd
import numpy as np
from scipy.signal import stft
from scipy.signal.windows import hann
import glob
import multiprocessing  # Added import for multiprocessing
from typing import Union, Tuple

# Define the base directory where DAMAGE_X folders are located
damage_base_path = 'D:/thesis/data/converted/raw_B'

# Define output directories for each sensor
output_dirs = {
    'sensor1': os.path.join(damage_base_path, 'sensor1'),
    'sensor2': os.path.join(damage_base_path, 'sensor2')
}

# Create output directories if they don't exist
for dir_path in output_dirs.values():
    os.makedirs(dir_path, exist_ok=True)

# Define STFT parameters

# Number of damage cases (adjust as needed)
num_damage_cases = 6  # Change to 30 if you have 30 damage cases

# Function to perform STFT and return magnitude
def compute_stft(vibration_data: np.ndarray, return_param: bool = False) -> Union[pd.DataFrame, Tuple[pd.DataFrame, list[int, int, int]]]:
    """
    Computes the Short-Time Fourier Transform (STFT) magnitude of the input vibration data.

    Parameters
    ----------
    vibration_data : numpy.ndarray
        The input vibration data as a 1D NumPy array.
    return_param : bool, optional
        If True, the function returns additional STFT parameters (window size, hop size, and sampling frequency).
        Defaults to False.

    Returns
    -------
    pd.DataFrame
        The transposed STFT magnitude, with frequencies as columns, if `return_param` is False.
    tuple
        If `return_param` is True, returns a tuple containing:
        - pd.DataFrame: The transposed STFT magnitude, with frequencies as columns.
        - list[int, int, int]: A list of STFT parameters [window_size, hop_size, Fs].
    """

    window_size = 1024
    hop_size = 512
    window = hann(window_size)
    Fs = 1024

    frequencies, times, Zxx = stft(
                                    vibration_data,
                                    fs=Fs,
                                    window=window,
                                    nperseg=window_size,
                                    noverlap=window_size - hop_size
                                )
    stft_magnitude = np.abs(Zxx)

    # Convert STFT result to DataFrame
    df_stft = pd.DataFrame(
        stft_magnitude.T,
        columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
    )
    # breakpoint()
    if return_param:
        return df_stft, [window_size, hop_size, Fs]
    else:
        return df_stft

def process_damage_case(damage_num):
    damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
    if damage_num == 0:
        # Number of test runs per damage case
        num_test_runs = 125
    else:
        num_test_runs = 5
    # Check if the damage folder exists
    if not os.path.isdir(damage_folder):
        print(f"Folder {damage_folder} does not exist. Skipping...")
        return

    # Process Sensor 1 and Sensor 2 separately
    for sensor_num in [1, 2]:
        aggregated_stft = []  # List to hold STFTs from all test runs

        # Iterate over all test runs
        for test_num in range(1, num_test_runs + 1):
            # Construct the filename based on sensor number
            # Sensor 1 corresponds to '_01', Sensor 2 corresponds to '_02'
            sensor_suffix = f'_0{sensor_num}'
            file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
            file_path = os.path.join(damage_folder, file_name)

            # Check if the file exists
            if not os.path.isfile(file_path):
                print(f"File {file_path} does not exist. Skipping...")
                continue

            # Read the CSV file
            try:
                df = pd.read_csv(file_path)
            except Exception as e:
                print(f"Error reading {file_path}: {e}. Skipping...")
                continue

            # Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
            if df.shape[1] != 2:
                print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
                continue

            vibration_data = df.iloc[:, 1].values

            # Perform STFT
            df_stft = compute_stft(vibration_data)

            # only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
            if damage_num == 0:
                print(f"Processing damage_num = 0, test_num = {test_num}")
                if test_num <= 60:
                    df_stft = df_stft.iloc[:20, :]
                    print(f"Reduced df_stft shape (20 samples): {df_stft.shape}")
                else:
                    df_stft = df_stft.iloc[:21, :]
                    print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")

            # Append to the aggregated list
            aggregated_stft.append(df_stft)
            print(sum(df.shape[0] for df in aggregated_stft))

        # Concatenate all STFT DataFrames vertically
        if aggregated_stft:
            df_aggregated = pd.concat(aggregated_stft, ignore_index=True)

            # Define output filename
            output_file = os.path.join(
                output_dirs[f'sensor{sensor_num}'],
                f'stft_data{sensor_num}_{damage_num}.csv'
            )

            # Save the aggregated STFT to CSV
            with open(output_file, 'w') as file:
                file.write('sep=,\n')
                df_aggregated.to_csv(file, index=False)
            print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
        else:
            print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")

if __name__ == "__main__":  # Added main guard for multiprocessing
    with multiprocessing.Pool() as pool:
        pool.map(process_damage_case, range(num_damage_cases + 1))