Compare commits
2 Commits
feature/au
...
feature/cs
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3860f2cc5b | ||
|
|
553140fe3c |
16
.vscode/launch.json
vendored
16
.vscode/launch.json
vendored
@@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
// Use IntelliSense to learn about possible attributes.
|
|
||||||
// Hover to view descriptions of existing attributes.
|
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "Python Debugger: Current File with Arguments",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${file}",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["data/raw", "data/raw"]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -154,7 +154,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@@ -186,12 +186,12 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Print Time-domain Features (Single Mockup Data)"
|
"### Print Time-domain Features"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 23,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@@ -264,7 +264,7 @@
|
|||||||
"0 2.067638 1.917716 0.412307 "
|
"0 2.067638 1.917716 0.412307 "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 13,
|
"execution_count": 23,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -272,12 +272,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"import os\n",
|
|
||||||
"# Assuming the src directory is one level up from the notebooks directory\n",
|
"# Assuming the src directory is one level up from the notebooks directory\n",
|
||||||
"sys.path.append('../src/features')\n",
|
"sys.path.append('../src/features')\n",
|
||||||
"from time_domain_features import FeatureExtractor\n",
|
"from time_domain_features import FeatureExtractor\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"# Extract features\n",
|
"# Extract features\n",
|
||||||
"extracted = FeatureExtractor(mock_df['SampleData'])\n",
|
"extracted = FeatureExtractor(mock_df['SampleData'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -285,85 +283,6 @@
|
|||||||
"features = pd.DataFrame(extracted.features, index=[0])\n",
|
"features = pd.DataFrame(extracted.features, index=[0])\n",
|
||||||
"features\n"
|
"features\n"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Print Time-domain Features (Multiple CSV Mockup Data)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 17,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"# Assuming the src directory is one level up from the notebooks directory\n",
|
|
||||||
"sys.path.append('../src/features')\n",
|
|
||||||
"from time_domain_features import ExtractTimeFeatures # use wrapper function instead of class for easy use\n",
|
|
||||||
"\n",
|
|
||||||
"def build_features(input_dir):\n",
|
|
||||||
" all_features = []\n",
|
|
||||||
" for nth_damage in os.listdir(input_dir):\n",
|
|
||||||
" nth_damage_path = os.path.join(input_dir, nth_damage)\n",
|
|
||||||
" if os.path.isdir(nth_damage_path):\n",
|
|
||||||
" # print(nth_damage)\n",
|
|
||||||
" for nth_test in os.listdir(nth_damage_path):\n",
|
|
||||||
" nth_test_path = os.path.join(nth_damage_path, nth_test)\n",
|
|
||||||
" # print(nth_test_path)\n",
|
|
||||||
" features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}\n",
|
|
||||||
" all_features.append(features)\n",
|
|
||||||
"\n",
|
|
||||||
" # Create a DataFrame from the list of dictionaries\n",
|
|
||||||
" df = pd.DataFrame(all_features)\n",
|
|
||||||
" return df\n",
|
|
||||||
"\n",
|
|
||||||
"data_dir = \"../../data/raw\"\n",
|
|
||||||
"# Extract features\n",
|
|
||||||
"df = build_features(data_dir)\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 18,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
||||||
"RangeIndex: 50 entries, 0 to 49\n",
|
|
||||||
"Data columns (total 14 columns):\n",
|
|
||||||
" # Column Non-Null Count Dtype \n",
|
|
||||||
"--- ------ -------------- ----- \n",
|
|
||||||
" 0 Mean 50 non-null float64\n",
|
|
||||||
" 1 Max 50 non-null float64\n",
|
|
||||||
" 2 Peak (Pm) 50 non-null float64\n",
|
|
||||||
" 3 Peak-to-Peak (Pk) 50 non-null float64\n",
|
|
||||||
" 4 RMS 50 non-null float64\n",
|
|
||||||
" 5 Variance 50 non-null float64\n",
|
|
||||||
" 6 Standard Deviation 50 non-null float64\n",
|
|
||||||
" 7 Power 50 non-null float64\n",
|
|
||||||
" 8 Crest Factor 50 non-null float64\n",
|
|
||||||
" 9 Form Factor 50 non-null float64\n",
|
|
||||||
" 10 Pulse Indicator 50 non-null float64\n",
|
|
||||||
" 11 Margin 50 non-null float64\n",
|
|
||||||
" 12 Kurtosis 50 non-null float64\n",
|
|
||||||
" 13 Skewness 50 non-null float64\n",
|
|
||||||
"dtypes: float64(14)\n",
|
|
||||||
"memory usage: 5.6 KB\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df.info()"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@@ -1,39 +1,16 @@
|
|||||||
# src/features/build_features.py
|
# src/features/build_features.py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from time_domain_features import ExtractTimeFeatures
|
from time_domain_features import FeatureExtractor
|
||||||
import os
|
import numpy as np
|
||||||
import re
|
|
||||||
|
|
||||||
# define function, regex pattern for extracting the damage level and test number store in pairs array
|
def build_features(input_file, output_file):
|
||||||
def extract_numbers(filename):
|
data = pd.read_csv(input_file)
|
||||||
# Find all occurrences of one or more digits in the filename
|
# Assuming the relevant data is in the first column
|
||||||
numbers = re.findall(r'\d+', filename)
|
extractor = FeatureExtractor(data.iloc[:, 0].values)
|
||||||
# Convert the list of number strings to integers
|
features = extractor.features
|
||||||
numbers = [int(num) for num in numbers]
|
|
||||||
# Convert to a tuple and return
|
|
||||||
return print(tuple(numbers))
|
|
||||||
|
|
||||||
def build_features(input_dir, output_dir):
|
|
||||||
all_features = []
|
|
||||||
for nth_damage in os.listdir(input_dir):
|
|
||||||
nth_damage_path = os.path.join(input_dir, nth_damage)
|
|
||||||
if os.path.isdir(nth_damage_path):
|
|
||||||
print(nth_damage)
|
|
||||||
for nth_test in os.listdir(nth_damage_path):
|
|
||||||
nth_test_path = os.path.join(nth_damage_path, nth_test)
|
|
||||||
# print(nth_test_path)
|
|
||||||
features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}
|
|
||||||
all_features.append(features)
|
|
||||||
|
|
||||||
# Create a DataFrame from the list of dictionaries
|
|
||||||
df = pd.DataFrame(all_features)
|
|
||||||
print(df)
|
|
||||||
# Save the DataFrame to a CSV file in the output directory
|
|
||||||
output_file_path = os.path.join(output_dir, 'combined_features.csv')
|
|
||||||
df.to_csv(output_file_path, index=False)
|
|
||||||
print(f"Features saved to {output_file_path}")
|
|
||||||
# Save features to a file
|
# Save features to a file
|
||||||
# np.savez(output_file, **features)
|
np.savez(output_file, **features)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
@@ -41,4 +18,4 @@ if __name__ == "__main__":
|
|||||||
output_path = sys.argv[2] # 'data/features/feature_matrix.npz'
|
output_path = sys.argv[2] # 'data/features/feature_matrix.npz'
|
||||||
|
|
||||||
# Assuming only one file for simplicity; adapt as needed
|
# Assuming only one file for simplicity; adapt as needed
|
||||||
build_features(input_path, output_path)
|
build_features(f"{input_path}processed_data.csv", output_path)
|
||||||
|
|||||||
@@ -36,13 +36,6 @@ class FeatureExtractor:
|
|||||||
result += f"{feature}: {value:.4f}\n"
|
result += f"{feature}: {value:.4f}\n"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def ExtractTimeFeatures(object):
|
|
||||||
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
|
||||||
extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
|
|
||||||
features = extractor.features
|
|
||||||
return features
|
|
||||||
# Save features to a file
|
|
||||||
# np.savez(output_file, **features)
|
|
||||||
# Usage
|
# Usage
|
||||||
# Assume you have a CSV file with numerical data in the first column
|
# Assume you have a CSV file with numerical data in the first column
|
||||||
# Create an instance of the class and pass the path to your CSV file
|
# Create an instance of the class and pass the path to your CSV file
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
# Processed Data Directory
|
# Raw Data Directory
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
|
This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
|
||||||
|
|
||||||
## Directory Structure
|
## Directory Structure
|
||||||
|
|
||||||
@@ -13,14 +13,20 @@ processed_path = os.path.join(base_path, "processed")
|
|||||||
os.makedirs(raw_path, exist_ok=True)
|
os.makedirs(raw_path, exist_ok=True)
|
||||||
os.makedirs(processed_path, exist_ok=True)
|
os.makedirs(processed_path, exist_ok=True)
|
||||||
|
|
||||||
for damage in range(1, 6): # 5 Damage levels
|
# Define the number of zeros to pad
|
||||||
damage_folder = f"DAMAGE_{damage}"
|
num_damages = 5
|
||||||
damage_path = os.path.join(processed_path, damage_folder)
|
num_tests = 10
|
||||||
|
damage_pad = len(str(num_damages))
|
||||||
|
test_pad = len(str(num_tests))
|
||||||
|
|
||||||
|
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
|
||||||
|
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
|
||||||
|
damage_path = os.path.join(raw_path, damage_folder)
|
||||||
os.makedirs(damage_path, exist_ok=True)
|
os.makedirs(damage_path, exist_ok=True)
|
||||||
|
|
||||||
for test in range(1, 11): # 10 Tests per damage level
|
for test in range(1, 11): # 10 Tests per damage level
|
||||||
# Filename for the CSV
|
# Filename for the CSV
|
||||||
csv_filename = f"D{damage}_TEST{test}.csv"
|
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}.csv"
|
||||||
csv_path = os.path.join(damage_path, csv_filename)
|
csv_path = os.path.join(damage_path, csv_filename)
|
||||||
|
|
||||||
# Generate dummy data
|
# Generate dummy data
|
||||||
|
|||||||
Reference in New Issue
Block a user