Compare commits

..

2 Commits

6 changed files with 30 additions and 1275 deletions

16
.vscode/launch.json vendored
View File

@@ -1,16 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File with Arguments",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"args": ["data/raw", "data/raw"]
}
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,39 +1,16 @@
# src/features/build_features.py
import pandas as pd
from time_domain_features import ExtractTimeFeatures
import os
import re
from time_domain_features import FeatureExtractor
import numpy as np
# define function, regex pattern for extracting the damage level and test number store in pairs array
def extract_numbers(filename):
# Find all occurrences of one or more digits in the filename
numbers = re.findall(r'\d+', filename)
# Convert the list of number strings to integers
numbers = [int(num) for num in numbers]
# Convert to a tuple and return
return print(tuple(numbers))
def build_features(input_file, output_file):
data = pd.read_csv(input_file)
# Assuming the relevant data is in the first column
extractor = FeatureExtractor(data.iloc[:, 0].values)
features = extractor.features
def build_features(input_dir, output_dir):
all_features = []
for nth_damage in os.listdir(input_dir):
nth_damage_path = os.path.join(input_dir, nth_damage)
if os.path.isdir(nth_damage_path):
print(nth_damage)
for nth_test in os.listdir(nth_damage_path):
nth_test_path = os.path.join(nth_damage_path, nth_test)
# print(nth_test_path)
features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}
all_features.append(features)
# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(all_features)
print(df)
# Save the DataFrame to a CSV file in the output directory
output_file_path = os.path.join(output_dir, 'combined_features.csv')
df.to_csv(output_file_path, index=False)
print(f"Features saved to {output_file_path}")
# Save features to a file
# np.savez(output_file, **features)
np.savez(output_file, **features)
if __name__ == "__main__":
import sys
@@ -41,4 +18,4 @@ if __name__ == "__main__":
output_path = sys.argv[2] # 'data/features/feature_matrix.npz'
# Assuming only one file for simplicity; adapt as needed
build_features(input_path, output_path)
build_features(f"{input_path}processed_data.csv", output_path)

View File

@@ -36,13 +36,6 @@ class FeatureExtractor:
result += f"{feature}: {value:.4f}\n"
return result
def ExtractTimeFeatures(object):
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
features = extractor.features
return features
# Save features to a file
# np.savez(output_file, **features)
# Usage
# Assume you have a CSV file with numerical data in the first column
# Create an instance of the class and pass the path to your CSV file

View File

@@ -1,8 +1,8 @@
# Processed Data Directory
# Raw Data Directory
## Overview
This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
## Directory Structure
@@ -12,12 +12,12 @@ The directory is organized as follows:
data
└── processed
├── DAMAGE_1
│ ├── D1_TEST1.csv
│ ├── D1_TEST2.csv
├── D1_TEST1.csv
├── D1_TEST2.csv
│ ...
│ └── D1_TEST10.csv
└── D1_TEST10.csv
├── DAMAGE_2
│ ├── D2_TEST1.csv
├── D2_TEST1.csv
│ ...
├── DAMAGE_3
│ ...

View File

@@ -13,14 +13,20 @@ processed_path = os.path.join(base_path, "processed")
os.makedirs(raw_path, exist_ok=True)
os.makedirs(processed_path, exist_ok=True)
for damage in range(1, 6): # 5 Damage levels
damage_folder = f"DAMAGE_{damage}"
damage_path = os.path.join(processed_path, damage_folder)
# Define the number of zeros to pad
num_damages = 5
num_tests = 10
damage_pad = len(str(num_damages))
test_pad = len(str(num_tests))
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
damage_path = os.path.join(raw_path, damage_folder)
os.makedirs(damage_path, exist_ok=True)
for test in range(1, 11): # 10 Tests per damage level
# Filename for the CSV
csv_filename = f"D{damage}_TEST{test}.csv"
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}.csv"
csv_path = os.path.join(damage_path, csv_filename)
# Generate dummy data