Files
thesis/code/src/features/time_domain_features.py

56 lines
2.2 KiB
Python

import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew
class FeatureExtractor:
# integrates the feature extraction into the project's existing data processing pipeline
def __init__(self, data):
# Assuming data is a numpy array
self.x = data
# Calculate features
self.features = self.calculate_features()
# Calculate all features
def calculate_features(self):
features = {
'Mean': np.mean(self.x),
'Max': np.max(self.x),
'Peak (Pm)': np.max(np.abs(self.x)),
'Peak-to-Peak (Pk)': np.max(self.x) - np.min(self.x),
'RMS': np.sqrt(np.mean(self.x**2)),
'Variance': np.var(self.x, ddof=0),
'Standard Deviation': np.std(self.x, ddof=1),
'Power': np.mean(self.x**2),
'Crest Factor': np.max(np.abs(self.x)) / np.sqrt(np.mean(self.x**2)),
'Form Factor': np.sqrt(np.mean(self.x**2)) / np.mean(self.x),
'Pulse Indicator': np.max(np.abs(self.x)) / np.mean(self.x),
'Margin': np.max(np.abs(self.x)) / (np.mean(np.sqrt(np.abs(self.x)))),
'Kurtosis': kurtosis(self.x, fisher=False),
'Skewness': skew(self.x, bias=False)
}
return features
def __repr__(self):
result = "Feature Extraction Results:\n"
for feature, value in self.features.items():
result += f"{feature}: {value:.4f}\n"
return result
def ExtractTimeFeatures(object, absolute):
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
if absolute:
extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
else:
extractor = FeatureExtractor(data.iloc[:, 1].values)
features = extractor.features
return features
# Save features to a file
# np.savez(output_file, **features)
# Usage
# Assume you have a CSV file with numerical data in the first column
# Create an instance of the class and pass the path to your CSV file
# For example:
# extractor = FeatureExtractor('path_to_your_data.csv')
# When you call the variable in a notebook or in the interpreter, it will print the results
# print(extractor)