Merge pull request #16 from nuluh/revert-8-feature/csv-padding-naming

Revert "Add Zero-Padding to CSV Filenames"
2024-08-27 09:20:52 +07:00 · 2024-08-27 09:18:44 +07:00
6 changed files with 62 additions and 2080 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 # Ignore CSV files in the data directory and all its subdirectories
 data/**/*.csv
-.venv/
+
 *.pyc
--- a/code/notebooks/03_feature_extraction.ipynb
+++ b/code/notebooks/03_feature_extraction.ipynb
--- a/code/src/features/time_domain_features.py
+++ b/code/src/features/time_domain_features.py
@@ -36,12 +36,9 @@ class FeatureExtractor:
            result += f"{feature}: {value:.4f}\n"
        return result
-def ExtractTimeFeatures(object, absolute):
+def ExtractTimeFeatures(object):
    data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
-    if absolute:
+    extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
        extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
    else:
        extractor = FeatureExtractor(data.iloc[:, 1].values)
    features = extractor.features
    return features
    # Save features to a file
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -1,65 +0,0 @@
 import pandas as pd
 import os
 import sys
 from colorama import Fore, Style, init
 def create_damage_files(base_path, output_base):
    # Initialize colorama
    init(autoreset=True)
    # Generate column labels based on expected duplication in input files
    columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)]  # Explicitly setting column names
    sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'}
    # Define the damage scenarios and the corresponding original file indices
    damage_scenarios = {
        1: range(6, 11),  # Damage 1 files from zzzAD6.csv to zzzAD10.csv
        2: range(11, 16), # Damage 2 files from zzzAD11.csv to zzzAD15.csvs
        3: range(16, 21), # Damage 3 files from zzzAD16.csv to zzzAD20.csv
        4: range(21, 26)  # Damage 4 files from zzzAD21.csv to zzzAD25.csv
    }
    damage_pad = len(str(len(damage_scenarios)))
    test_pad = len(str(30))
    for damage, files in damage_scenarios.items():
        for i, file_index in enumerate(files, start=1):
            # Load original data file
            file_path = os.path.join(base_path, f'zzzAD{file_index}.TXT')
            df = pd.read_csv(file_path, sep='\t', skiprows=10)  # Read with explicit column names
            top_sensor = columns[i-1]
            print(top_sensor, type(top_sensor))
            output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage:0{damage_pad}}_TEST{i:0{test_pad}}_01.csv')
            print(f"Creating {output_file_1} from taking zzzAD{file_index}.TXT")
            print("Taking datetime column on index 0...")
            print(f"Taking `{top_sensor}`...")
            df[['Time', top_sensor]].to_csv(output_file_1, index=False)
            print(Fore.GREEN + "Done")
            bottom_sensor = sensor_end_map[i]
            output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage}_TEST{i}_02.csv')
            print(f"Creating {output_file_2} from taking zzzAD{file_index}.TXT")
            print("Taking datetime column on index 0...")
            print(f"Taking `{bottom_sensor}`...")
            df[['Time', bottom_sensor]].to_csv(output_file_2, index=False)
            print(Fore.GREEN + "Done")
            print("---")
 def main():
    if len(sys.argv) < 2:
        print("Usage: python convert.py <path_to_csv_files>")
        sys.exit(1)
    base_path = sys.argv[1]
    output_base = sys.argv[2]  # Define output directory
    # Create output folders if they don't exist
    for i in range(1, 5):
        os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
    create_damage_files(base_path, output_base)
    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
 if __name__ == "__main__":
    main()
--- a/data/processed/README.md
+++ b/data/processed/README.md
@@ -1,8 +1,8 @@
-# Raw Data Directory
+# Processed Data Directory
 ## Overview
-This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
+This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
 ## Directory Structure
--- a/generate_dummy_data.py
+++ b/generate_dummy_data.py
@@ -13,23 +13,14 @@ processed_path = os.path.join(base_path, "processed")
 os.makedirs(raw_path, exist_ok=True)
 os.makedirs(processed_path, exist_ok=True)
-# Define the number of zeros to pad
+for damage in range(1, 6):  # 5 Damage levels
-num_damages = 5
+    damage_folder = f"DAMAGE_{damage}"
-num_tests = 10
+    damage_path = os.path.join(processed_path, damage_folder)
 num_sensors = 2
 damage_pad = len(str(num_damages))
 test_pad = len(str(num_tests))
 sensor_pad = len(str(num_sensors))
 for damage in range(1, num_damages + 1):  # 5 Damage levels starts from 1
    damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
    damage_path = os.path.join(raw_path, damage_folder)
    os.makedirs(damage_path, exist_ok=True)
    for test in range(1, 11):  # 10 Tests per damage level
        for sensor in range(1, 3):  # 2 Sensors per test
        # Filename for the CSV
-            csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}_{sensor:0{sensor_pad}}.csv"
+        csv_filename = f"D{damage}_TEST{test}.csv"
        csv_path = os.path.join(damage_path, csv_filename)
        # Generate dummy data
Author	SHA1	Message	Date
Panuluh	93857af43d	Merge pull request #16 from nuluh/revert-8-feature/csv-padding-naming Revert "Add Zero-Padding to CSV Filenames"	2024-08-27 09:20:52 +07:00
Panuluh	88be76292b	Revert "Add Zero-Padding to CSV Filenames"	2024-08-27 09:18:44 +07:00