thesis/code/src/features/build_features.py

# src/features/build_features.py
import pandas as pd
from time_domain_features import ExtractTimeFeatures
import os
import re

# define function, regex pattern for extracting the damage level and test number store in pairs array
def extract_numbers(filename):
    # Find all occurrences of one or more digits in the filename
    numbers = re.findall(r'\d+', filename)
    # Convert the list of number strings to integers
    numbers = [int(num) for num in numbers]
    # Convert to a tuple and return
    return print(tuple(numbers))

def build_features(input_dir, output_dir):
    all_features = []
    for nth_damage in os.listdir(input_dir):
        nth_damage_path = os.path.join(input_dir, nth_damage)
        if os.path.isdir(nth_damage_path):
            print(nth_damage)
            for nth_test in os.listdir(nth_damage_path):
                nth_test_path = os.path.join(nth_damage_path, nth_test)
                # print(nth_test_path)
                features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}
                all_features.append(features)

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(all_features)
    print(df)
    # Save the DataFrame to a CSV file in the output directory
    output_file_path = os.path.join(output_dir, 'combined_features.csv')
    df.to_csv(output_file_path, index=False)
    print(f"Features saved to {output_file_path}")
    # Save features to a file
    # np.savez(output_file, **features)

if __name__ == "__main__":
    import sys
    input_path = sys.argv[1]  # 'data/processed/'
    output_path = sys.argv[2]  # 'data/features/feature_matrix.npz'

    # Assuming only one file for simplicity; adapt as needed
    build_features(input_path, output_path)