diff --git a/code/src/features/build_features.py b/code/src/features/build_features.py new file mode 100644 index 0000000..017f26c --- /dev/null +++ b/code/src/features/build_features.py @@ -0,0 +1,21 @@ +# src/features/build_features.py +import pandas as pd +from time_domain_features import FeatureExtractor +import numpy as np + +def build_features(input_file, output_file): + data = pd.read_csv(input_file) + # Assuming the relevant data is in the first column + extractor = FeatureExtractor(data.iloc[:, 0].values) + features = extractor.features + + # Save features to a file + np.savez(output_file, **features) + +if __name__ == "__main__": + import sys + input_path = sys.argv[1] # 'data/processed/' + output_path = sys.argv[2] # 'data/features/feature_matrix.npz' + + # Assuming only one file for simplicity; adapt as needed + build_features(f"{input_path}processed_data.csv", output_path) diff --git a/code/src/features/time_domain_features.py b/code/src/features/time_domain_features.py index bc61c75..f0710e1 100644 --- a/code/src/features/time_domain_features.py +++ b/code/src/features/time_domain_features.py @@ -3,14 +3,16 @@ import pandas as pd from scipy.stats import kurtosis, skew class FeatureExtractor: - def __init__(self, file_path): - # Read data from CSV file - self.data = pd.read_csv(file_path) - # Assuming the data to analyze is in the first column - self.x = self.data.iloc[:, 0].values + # integrates the feature extraction into your project's existing data processing pipeline + def __init__(self, data): + # Assuming data is a numpy array + self.x = data + # Calculate features + self.features = self.calculate_features() # Calculate all features - self.features = { + def calculate_features(self): + features = { 'Mean': np.mean(self.x), 'Max': np.max(self.x), 'Peak (Pm)': np.max(np.abs(self.x)), @@ -26,6 +28,7 @@ class FeatureExtractor: 'Kurtosis': kurtosis(self.x, fisher=False), 'Skewness': skew(self.x, bias=False) } + return features def __repr__(self): result = "Feature Extraction Results:\n"