# src/features/build_features.py import pandas as pd from time_domain_features import ExtractTimeFeatures import os import re # define function, regex pattern for extracting the damage level and test number store in pairs array def extract_numbers(filename): # Find all occurrences of one or more digits in the filename numbers = re.findall(r'\d+', filename) # Convert the list of number strings to integers numbers = [int(num) for num in numbers] # Convert to a tuple and return return print(tuple(numbers)) def build_features(input_dir, output_dir): all_features = [] for nth_damage in os.listdir(input_dir): nth_damage_path = os.path.join(input_dir, nth_damage) if os.path.isdir(nth_damage_path): print(nth_damage) for nth_test in os.listdir(nth_damage_path): nth_test_path = os.path.join(nth_damage_path, nth_test) # print(nth_test_path) features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {} all_features.append(features) # Create a DataFrame from the list of dictionaries df = pd.DataFrame(all_features) print(df) # Save the DataFrame to a CSV file in the output directory output_file_path = os.path.join(output_dir, 'combined_features.csv') df.to_csv(output_file_path, index=False) print(f"Features saved to {output_file_path}") # Save features to a file # np.savez(output_file, **features) if __name__ == "__main__": import sys input_path = sys.argv[1] # 'data/processed/' output_path = sys.argv[2] # 'data/features/feature_matrix.npz' # Assuming only one file for simplicity; adapt as needed build_features(input_path, output_path)