Compare commits

..

2 Commits

Author SHA1 Message Date
Panuluh
93857af43d Merge pull request #16 from nuluh/revert-8-feature/csv-padding-naming
Revert "Add Zero-Padding to CSV Filenames"
2024-08-27 09:20:52 +07:00
Panuluh
88be76292b Revert "Add Zero-Padding to CSV Filenames" 2024-08-27 09:18:44 +07:00
6 changed files with 62 additions and 2080 deletions

2
.gitignore vendored
View File

@@ -1,4 +1,4 @@
# Ignore CSV files in the data directory and all its subdirectories # Ignore CSV files in the data directory and all its subdirectories
data/**/*.csv data/**/*.csv
.venv/
*.pyc *.pyc

File diff suppressed because one or more lines are too long

View File

@@ -36,12 +36,9 @@ class FeatureExtractor:
result += f"{feature}: {value:.4f}\n" result += f"{feature}: {value:.4f}\n"
return result return result
def ExtractTimeFeatures(object, absolute): def ExtractTimeFeatures(object):
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
if absolute: extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
else:
extractor = FeatureExtractor(data.iloc[:, 1].values)
features = extractor.features features = extractor.features
return features return features
# Save features to a file # Save features to a file

View File

@@ -1,65 +0,0 @@
import pandas as pd
import os
import sys
from colorama import Fore, Style, init
def create_damage_files(base_path, output_base):
# Initialize colorama
init(autoreset=True)
# Generate column labels based on expected duplication in input files
columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)] # Explicitly setting column names
sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'}
# Define the damage scenarios and the corresponding original file indices
damage_scenarios = {
1: range(6, 11), # Damage 1 files from zzzAD6.csv to zzzAD10.csv
2: range(11, 16), # Damage 2 files from zzzAD11.csv to zzzAD15.csvs
3: range(16, 21), # Damage 3 files from zzzAD16.csv to zzzAD20.csv
4: range(21, 26) # Damage 4 files from zzzAD21.csv to zzzAD25.csv
}
damage_pad = len(str(len(damage_scenarios)))
test_pad = len(str(30))
for damage, files in damage_scenarios.items():
for i, file_index in enumerate(files, start=1):
# Load original data file
file_path = os.path.join(base_path, f'zzzAD{file_index}.TXT')
df = pd.read_csv(file_path, sep='\t', skiprows=10) # Read with explicit column names
top_sensor = columns[i-1]
print(top_sensor, type(top_sensor))
output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage:0{damage_pad}}_TEST{i:0{test_pad}}_01.csv')
print(f"Creating {output_file_1} from taking zzzAD{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{top_sensor}`...")
df[['Time', top_sensor]].to_csv(output_file_1, index=False)
print(Fore.GREEN + "Done")
bottom_sensor = sensor_end_map[i]
output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage}_TEST{i}_02.csv')
print(f"Creating {output_file_2} from taking zzzAD{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{bottom_sensor}`...")
df[['Time', bottom_sensor]].to_csv(output_file_2, index=False)
print(Fore.GREEN + "Done")
print("---")
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>")
sys.exit(1)
base_path = sys.argv[1]
output_base = sys.argv[2] # Define output directory
# Create output folders if they don't exist
for i in range(1, 5):
os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
create_damage_files(base_path, output_base)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__":
main()

View File

@@ -1,8 +1,8 @@
# Raw Data Directory # Processed Data Directory
## Overview ## Overview
This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario. This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
## Directory Structure ## Directory Structure

View File

@@ -13,23 +13,14 @@ processed_path = os.path.join(base_path, "processed")
os.makedirs(raw_path, exist_ok=True) os.makedirs(raw_path, exist_ok=True)
os.makedirs(processed_path, exist_ok=True) os.makedirs(processed_path, exist_ok=True)
# Define the number of zeros to pad for damage in range(1, 6): # 5 Damage levels
num_damages = 5 damage_folder = f"DAMAGE_{damage}"
num_tests = 10 damage_path = os.path.join(processed_path, damage_folder)
num_sensors = 2
damage_pad = len(str(num_damages))
test_pad = len(str(num_tests))
sensor_pad = len(str(num_sensors))
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
damage_path = os.path.join(raw_path, damage_folder)
os.makedirs(damage_path, exist_ok=True) os.makedirs(damage_path, exist_ok=True)
for test in range(1, 11): # 10 Tests per damage level for test in range(1, 11): # 10 Tests per damage level
for sensor in range(1, 3): # 2 Sensors per test
# Filename for the CSV # Filename for the CSV
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}_{sensor:0{sensor_pad}}.csv" csv_filename = f"D{damage}_TEST{test}.csv"
csv_path = os.path.join(damage_path, csv_filename) csv_path = os.path.join(damage_path, csv_filename)
# Generate dummy data # Generate dummy data