feat: Add absolute value option to time feature extraction

feat(notebooks): Implement Time-domain feature extraction with real data from QUGS
fix(script): Fix bugs taking incorrect column by changing columns and sensor_end_map index number to take the loop of enumeration.
2024-09-03 15:39:44 +07:00 · 2024-09-03 12:52:40 +07:00 · 2024-09-03 12:08:53 +07:00 · 2024-09-03 11:50:44 +07:00 · 2024-09-03 11:43:46 +07:00 · 2024-09-03 11:38:49 +07:00
3 changed files with 1570 additions and 139 deletions
--- a/code/notebooks/03_feature_extraction.ipynb
+++ b/code/notebooks/03_feature_extraction.ipynb
--- a/code/src/features/time_domain_features.py
+++ b/code/src/features/time_domain_features.py
@@ -36,9 +36,12 @@ class FeatureExtractor:
            result += f"{feature}: {value:.4f}\n"
        return result

-def ExtractTimeFeatures(object):
+def ExtractTimeFeatures(object, absolute):
    data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
-    extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
+    if absolute:
+        extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
+    else:
+        extractor = FeatureExtractor(data.iloc[:, 1].values)
    features = extractor.features
    return features
    # Save features to a file
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -0,0 +1,65 @@
+import pandas as pd
+import os
+import sys
+from colorama import Fore, Style, init
+
+def create_damage_files(base_path, output_base):
+    # Initialize colorama
+    init(autoreset=True)
+    
+    # Generate column labels based on expected duplication in input files
+    columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)]  # Explicitly setting column names
+
+    sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'}
+
+    # Define the damage scenarios and the corresponding original file indices
+    damage_scenarios = {
+        1: range(6, 11),  # Damage 1 files from zzzAD6.csv to zzzAD10.csv
+        2: range(11, 16), # Damage 2 files from zzzAD11.csv to zzzAD15.csvs
+        3: range(16, 21), # Damage 3 files from zzzAD16.csv to zzzAD20.csv
+        4: range(21, 26)  # Damage 4 files from zzzAD21.csv to zzzAD25.csv
+    }
+    damage_pad = len(str(len(damage_scenarios)))
+    test_pad = len(str(30))
+
+    for damage, files in damage_scenarios.items():
+        for i, file_index in enumerate(files, start=1):
+            # Load original data file
+            file_path = os.path.join(base_path, f'zzzAD{file_index}.TXT')
+            df = pd.read_csv(file_path, sep='\t', skiprows=10)  # Read with explicit column names
+
+            top_sensor = columns[i-1]
+            print(top_sensor, type(top_sensor))
+            output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage:0{damage_pad}}_TEST{i:0{test_pad}}_01.csv')
+            print(f"Creating {output_file_1} from taking zzzAD{file_index}.TXT")
+            print("Taking datetime column on index 0...")
+            print(f"Taking `{top_sensor}`...")
+            df[['Time', top_sensor]].to_csv(output_file_1, index=False)
+            print(Fore.GREEN + "Done")
+
+            bottom_sensor = sensor_end_map[i]
+            output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'D{damage}_TEST{i}_02.csv')
+            print(f"Creating {output_file_2} from taking zzzAD{file_index}.TXT")
+            print("Taking datetime column on index 0...")
+            print(f"Taking `{bottom_sensor}`...")
+            df[['Time', bottom_sensor]].to_csv(output_file_2, index=False)
+            print(Fore.GREEN + "Done")
+            print("---")
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python convert.py <path_to_csv_files>")
+        sys.exit(1)
+
+    base_path = sys.argv[1]
+    output_base = sys.argv[2]  # Define output directory
+
+    # Create output folders if they don't exist
+    for i in range(1, 5):
+        os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
+
+    create_damage_files(base_path, output_base)
+    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
+
+if __name__ == "__main__":
+    main()
Author	SHA1	Message	Date
nuluh	2f54e91197	feat: Add absolute value option to time feature extraction	2024-09-03 15:39:44 +07:00
nuluh	758255a24e	feat(notebooks): Implement Time-domain feature extraction with real data from QUGS	2024-09-03 12:52:40 +07:00
nuluh	ff5578652f	fix(script): Fix bugs taking incorrect column by changing `columns` and `sensor_end_map` index number to take the loop of enumeration.	2024-09-03 12:08:53 +07:00
nuluh	db2c5d3a4e	feat(script): Update output directory in convert.py	2024-09-03 11:50:44 +07:00
nuluh	ea978de872	-	2024-09-03 11:43:46 +07:00
nuluh	465d257850	feat(script): Add zero-padding to converted CSV filenames for standardize processing pipeline	2024-09-03 11:38:49 +07:00
nuluh	d12eea0acf	feat(data-processing): Implement CSV data transformation for SVM analysis Introduce a Python script for transforming QUGS 2D grid structure data into a simplified 1D beam format suitable for SVM-based damage detection. The script efficiently slices original CSV files into smaller, manageable sets, correlating specific damage scenarios with their corresponding sensor data. This change addresses the challenge of retaining critical damage localization information during the data conversion process, ensuring high-quality, relevant data for 1D analysis. Closes #20	2024-09-03 11:33:23 +07:00
nuluh	0306f28a68	docs(notebooks): add `extract_numbers` docstring	2024-09-03 11:09:47 +07:00
Panuluh	9da3dae709	Merge pull request #18 from nuluh/feature/15-normalize-dataset-by-preprocess-relatives-value-between-two-acceloremeter-sensors Feature/15 normalize dataset by preprocess relatives value between two acceloremeter sensors	2024-09-03 08:43:44 +07:00