Compare commits
11 Commits
feature/cs
...
feature/15
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
41086e95ad | ||
|
|
adde35ed7e | ||
|
|
b2684c23f6 | ||
|
|
8a499a04fb | ||
|
|
118c56c12d | ||
|
|
79a0f82372 | ||
|
|
c9415c21fa | ||
|
|
de902b2a8c | ||
|
|
57c0e03a4f | ||
|
|
8ab934fe1c | ||
|
|
55db5709a9 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,4 +1,4 @@
|
|||||||
# Ignore CSV files in the data directory and all its subdirectories
|
# Ignore CSV files in the data directory and all its subdirectories
|
||||||
data/**/*.csv
|
data/**/*.csv
|
||||||
|
.venv/
|
||||||
*.pyc
|
*.pyc
|
||||||
16
.vscode/launch.json
vendored
Normal file
16
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File with Arguments",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": ["data/raw", "data/raw"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,16 +1,39 @@
|
|||||||
# src/features/build_features.py
|
# src/features/build_features.py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from time_domain_features import FeatureExtractor
|
from time_domain_features import ExtractTimeFeatures
|
||||||
import numpy as np
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
def build_features(input_file, output_file):
|
# define function, regex pattern for extracting the damage level and test number store in pairs array
|
||||||
data = pd.read_csv(input_file)
|
def extract_numbers(filename):
|
||||||
# Assuming the relevant data is in the first column
|
# Find all occurrences of one or more digits in the filename
|
||||||
extractor = FeatureExtractor(data.iloc[:, 0].values)
|
numbers = re.findall(r'\d+', filename)
|
||||||
features = extractor.features
|
# Convert the list of number strings to integers
|
||||||
|
numbers = [int(num) for num in numbers]
|
||||||
|
# Convert to a tuple and return
|
||||||
|
return print(tuple(numbers))
|
||||||
|
|
||||||
|
def build_features(input_dir, output_dir):
|
||||||
|
all_features = []
|
||||||
|
for nth_damage in os.listdir(input_dir):
|
||||||
|
nth_damage_path = os.path.join(input_dir, nth_damage)
|
||||||
|
if os.path.isdir(nth_damage_path):
|
||||||
|
print(nth_damage)
|
||||||
|
for nth_test in os.listdir(nth_damage_path):
|
||||||
|
nth_test_path = os.path.join(nth_damage_path, nth_test)
|
||||||
|
# print(nth_test_path)
|
||||||
|
features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}
|
||||||
|
all_features.append(features)
|
||||||
|
|
||||||
|
# Create a DataFrame from the list of dictionaries
|
||||||
|
df = pd.DataFrame(all_features)
|
||||||
|
print(df)
|
||||||
|
# Save the DataFrame to a CSV file in the output directory
|
||||||
|
output_file_path = os.path.join(output_dir, 'combined_features.csv')
|
||||||
|
df.to_csv(output_file_path, index=False)
|
||||||
|
print(f"Features saved to {output_file_path}")
|
||||||
# Save features to a file
|
# Save features to a file
|
||||||
np.savez(output_file, **features)
|
# np.savez(output_file, **features)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
@@ -18,4 +41,4 @@ if __name__ == "__main__":
|
|||||||
output_path = sys.argv[2] # 'data/features/feature_matrix.npz'
|
output_path = sys.argv[2] # 'data/features/feature_matrix.npz'
|
||||||
|
|
||||||
# Assuming only one file for simplicity; adapt as needed
|
# Assuming only one file for simplicity; adapt as needed
|
||||||
build_features(f"{input_path}processed_data.csv", output_path)
|
build_features(input_path, output_path)
|
||||||
|
|||||||
@@ -36,6 +36,13 @@ class FeatureExtractor:
|
|||||||
result += f"{feature}: {value:.4f}\n"
|
result += f"{feature}: {value:.4f}\n"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def ExtractTimeFeatures(object):
|
||||||
|
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
||||||
|
extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
|
||||||
|
features = extractor.features
|
||||||
|
return features
|
||||||
|
# Save features to a file
|
||||||
|
# np.savez(output_file, **features)
|
||||||
# Usage
|
# Usage
|
||||||
# Assume you have a CSV file with numerical data in the first column
|
# Assume you have a CSV file with numerical data in the first column
|
||||||
# Create an instance of the class and pass the path to your CSV file
|
# Create an instance of the class and pass the path to your CSV file
|
||||||
|
|||||||
@@ -16,8 +16,10 @@ os.makedirs(processed_path, exist_ok=True)
|
|||||||
# Define the number of zeros to pad
|
# Define the number of zeros to pad
|
||||||
num_damages = 5
|
num_damages = 5
|
||||||
num_tests = 10
|
num_tests = 10
|
||||||
|
num_sensors = 2
|
||||||
damage_pad = len(str(num_damages))
|
damage_pad = len(str(num_damages))
|
||||||
test_pad = len(str(num_tests))
|
test_pad = len(str(num_tests))
|
||||||
|
sensor_pad = len(str(num_sensors))
|
||||||
|
|
||||||
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
|
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
|
||||||
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
|
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
|
||||||
@@ -25,23 +27,24 @@ for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
|
|||||||
os.makedirs(damage_path, exist_ok=True)
|
os.makedirs(damage_path, exist_ok=True)
|
||||||
|
|
||||||
for test in range(1, 11): # 10 Tests per damage level
|
for test in range(1, 11): # 10 Tests per damage level
|
||||||
# Filename for the CSV
|
for sensor in range(1, 3): # 2 Sensors per test
|
||||||
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}.csv"
|
# Filename for the CSV
|
||||||
csv_path = os.path.join(damage_path, csv_filename)
|
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}_{sensor:0{sensor_pad}}.csv"
|
||||||
|
csv_path = os.path.join(damage_path, csv_filename)
|
||||||
|
|
||||||
# Generate dummy data
|
# Generate dummy data
|
||||||
num_rows = 10
|
num_rows = 10
|
||||||
start_time = datetime.now()
|
start_time = datetime.now()
|
||||||
timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)]
|
timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)]
|
||||||
values = np.random.randn(num_rows) # Random float values
|
values = np.random.randn(num_rows) # Random float values
|
||||||
|
|
||||||
# Create DataFrame
|
# Create DataFrame
|
||||||
df = pd.DataFrame({
|
df = pd.DataFrame({
|
||||||
"Time": timestamps,
|
"Time": timestamps,
|
||||||
"Value": values
|
"Value": values
|
||||||
})
|
})
|
||||||
|
|
||||||
# Save the CSV file with a custom header
|
# Save the CSV file with a custom header
|
||||||
with open(csv_path, 'w') as file:
|
with open(csv_path, 'w') as file:
|
||||||
file.write('sep=,\n') # Writing the separator hint
|
file.write('sep=,\n') # Writing the separator hint
|
||||||
df.to_csv(file, index=False)
|
df.to_csv(file, index=False)
|
||||||
|
|||||||
Reference in New Issue
Block a user