Compare commits

...

15 Commits

Author SHA1 Message Date
nuluh
2f54e91197 feat: Add absolute value option to time feature extraction 2024-09-03 15:39:44 +07:00
nuluh
758255a24e feat(notebooks): Implement Time-domain feature extraction with real data from QUGS 2024-09-03 12:52:40 +07:00
nuluh
ff5578652f fix(script): Fix bugs taking incorrect column by changing columns and sensor_end_map index number to take the loop of enumeration. 2024-09-03 12:08:53 +07:00
nuluh
db2c5d3a4e feat(script): Update output directory in convert.py 2024-09-03 11:50:44 +07:00
nuluh
ea978de872 - 2024-09-03 11:43:46 +07:00
nuluh
465d257850 feat(script): Add zero-padding to converted CSV filenames for standardize processing pipeline 2024-09-03 11:38:49 +07:00
nuluh
d12eea0acf feat(data-processing): Implement CSV data transformation for SVM analysis
Introduce a Python script for transforming QUGS 2D grid structure data into a simplified 1D beam format suitable for SVM-based damage detection. The script efficiently slices original CSV files into smaller, manageable sets, correlating specific damage scenarios with their corresponding sensor data. This change addresses the challenge of retaining critical damage localization information during the data conversion process, ensuring high-quality, relevant data for 1D analysis.

Closes #20
2024-09-03 11:33:23 +07:00
nuluh
0306f28a68 docs(notebooks): add extract_numbers docstring 2024-09-03 11:09:47 +07:00
Panuluh
9da3dae709 Merge pull request #18 from nuluh/feature/15-normalize-dataset-by-preprocess-relatives-value-between-two-acceloremeter-sensors
Feature/15 normalize dataset by preprocess relatives value between two acceloremeter sensors
2024-09-03 08:43:44 +07:00
nuluh
41086e95ad chore: Ignore .venv/ directory and update .gitignore due to error numpy error ValueError: numpy.ndarray size changed, may indicate binary incompatibility. by creating venv. 2024-09-01 14:50:24 +07:00
nuluh
adde35ed7e feat(notebook): Normalize the data by calculating the relative value between two sensors. Along with it, MinMaxScaler and StandardScaler are applied and visualize with Seaborn's Pair Plot.
Closes #15
2024-09-01 14:50:04 +07:00
nuluh
b2684c23f6 feat(script): Add zero-padding to CSV filenames to include sensors number 2024-08-27 10:11:39 +07:00
Panuluh
8a499a04fb Merge pull request #17 from nuluh/feature/csv-padding-naming
Feature/csv padding naming
2024-08-27 09:23:44 +07:00
nuluh
3860f2cc5b fix(docs): The readme.md should belong to raw data since the script is intended to simulate raw data that coming from accelerometer sensors instead of processed data that should be generated by simulating frequency domain data instead. 2024-08-18 10:34:22 +07:00
nuluh
553140fe3c feat(script): add zero-padding to CSV filenames and change the output generated csv as raw data in raw folder 2024-08-17 19:51:42 +07:00
6 changed files with 1971 additions and 1077 deletions

2
.gitignore vendored
View File

@@ -1,4 +1,4 @@
# Ignore CSV files in the data directory and all its subdirectories
data/**/*.csv
.venv/
*.pyc

File diff suppressed because one or more lines are too long

View File

@@ -36,9 +36,12 @@ class FeatureExtractor:
result += f"{feature}: {value:.4f}\n"
return result
def ExtractTimeFeatures(object):
def ExtractTimeFeatures(object, absolute):
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
if absolute:
extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
else:
extractor = FeatureExtractor(data.iloc[:, 1].values)
features = extractor.features
return features
# Save features to a file

65
data/QUGS/convert.py Normal file
View File

@@ -0,0 +1,65 @@
import pandas as pd
import os
import sys
from colorama import Fore, Style, init
def create_damage_files(base_path, output_base):
# Initialize colorama
init(autoreset=True)
# Generate column labels based on expected duplication in input files
columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)] # Explicitly setting column names
sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'}
# Define the damage scenarios and the corresponding original file indices
damage_scenarios = {
1: range(6, 11), # Damage 1 files from zzzAD6.csv to zzzAD10.csv
2: range(11, 16), # Damage 2 files from zzzAD11.csv to zzzAD15.csvs
3: range(16, 21), # Damage 3 files from zzzAD16.csv to zzzAD20.csv
4: range(21, 26) # Damage 4 files from zzzAD21.csv to zzzAD25.csv
}
damage_pad = len(str(len(damage_scenarios)))
test_pad = len(str(30))
for damage, files in damage_scenarios.items():
for i, file_index in enumerate(files, start=1):
# Load original data file
file_path = os.path.join(base_path, f'zzzAD{file_index}.TXT')
df = pd.read_csv(file_path, sep='\t', skiprows=10) # Read with explicit column names
top_sensor = columns[i-1]
print(top_sensor, type(top_sensor))
output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage:0{damage_pad}}_TEST{i:0{test_pad}}_01.csv')
print(f"Creating {output_file_1} from taking zzzAD{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{top_sensor}`...")
df[['Time', top_sensor]].to_csv(output_file_1, index=False)
print(Fore.GREEN + "Done")
bottom_sensor = sensor_end_map[i]
output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage}_TEST{i}_02.csv')
print(f"Creating {output_file_2} from taking zzzAD{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{bottom_sensor}`...")
df[['Time', bottom_sensor]].to_csv(output_file_2, index=False)
print(Fore.GREEN + "Done")
print("---")
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>")
sys.exit(1)
base_path = sys.argv[1]
output_base = sys.argv[2] # Define output directory
# Create output folders if they don't exist
for i in range(1, 5):
os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
create_damage_files(base_path, output_base)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__":
main()

View File

@@ -1,8 +1,8 @@
# Processed Data Directory
# Raw Data Directory
## Overview
This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
## Directory Structure

View File

@@ -13,14 +13,23 @@ processed_path = os.path.join(base_path, "processed")
os.makedirs(raw_path, exist_ok=True)
os.makedirs(processed_path, exist_ok=True)
for damage in range(1, 6): # 5 Damage levels
damage_folder = f"DAMAGE_{damage}"
damage_path = os.path.join(processed_path, damage_folder)
# Define the number of zeros to pad
num_damages = 5
num_tests = 10
num_sensors = 2
damage_pad = len(str(num_damages))
test_pad = len(str(num_tests))
sensor_pad = len(str(num_sensors))
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
damage_path = os.path.join(raw_path, damage_folder)
os.makedirs(damage_path, exist_ok=True)
for test in range(1, 11): # 10 Tests per damage level
for sensor in range(1, 3): # 2 Sensors per test
# Filename for the CSV
csv_filename = f"D{damage}_TEST{test}.csv"
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}_{sensor:0{sensor_pad}}.csv"
csv_path = os.path.join(damage_path, csv_filename)
# Generate dummy data