feat(data): add export_to_csv method for saving processed data into individuals sensor end and update test script

Closes #40
This commit is contained in:
nuluh
2025-04-17 10:10:19 +07:00
parent 28681017ad
commit 36b36c41ba
2 changed files with 46 additions and 10 deletions

View File

@@ -2,6 +2,7 @@ import pandas as pd
import os import os
import re import re
import sys import sys
import numpy as np
from colorama import Fore, Style, init from colorama import Fore, Style, init
from typing import TypedDict, Dict, List from typing import TypedDict, Dict, List
from joblib import load from joblib import load
@@ -225,25 +226,56 @@ class DataProcessor:
""" """
idx = self._create_vector_column_index() idx = self._create_vector_column_index()
# if overwrite: # if overwrite:
for i in range(len(self.data)): for i in range(len(self.data)): # damage(s)
for j in range(len(self.data[i])): for j in range(len(self.data[i])): # col(s)
# Get the appropriate indices for slicing from idx # Get the appropriate indices for slicing from idx
indices = idx[j] indices = idx[j]
# Get the current DataFrame # Get the current DataFrame
df = self.data[i][j] df = self.data[i][j]
# Keep the 'Time' column and select only specified 'Real' columns # Keep the 'Time' column and select only specifid 'Real' colmns
# First, we add 1 to all indices to account for 'Time' being at position 0 # First, we add 1 to all indices to acount for 'Time' being at positiion 0
real_indices = [index + 1 for index in indices] real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjusted Real indices # Create list with Time column index (0) and the adjustedd Real indices
all_indices = [0] + [real_indices[0]] + [real_indices[-1]] all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
# Apply the slicing # Apply the slicing
self.data[i][j] = df.iloc[:, all_indices] self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite: # TODO: if !overwrite:
def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
"""
Export the processed data to CSV files in the required folder structure.
:param output_dir: Directory to save the CSV files.
:param file_prefix: Prefix for the output filenames.
"""
for group_idx, group in enumerate(self.data, start=1):
group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
os.makedirs(group_folder, exist_ok=True)
for test_idx, df in enumerate(group, start=1):
# Ensure columns are named uniquely if duplicated
df = df.copy()
df.columns = ["Time", "Real_0", "Real_1"] # Rename
# Export first Real column
out1 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
)
df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
out1, index=False
)
# Export last Real column
out2 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
)
df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
out2, index=False
)
def create_damage_files(base_path, output_base, prefix): def create_damage_files(base_path, output_base, prefix):
# Initialize colorama # Initialize colorama

View File

@@ -1,8 +1,12 @@
from convert import * from convert import *
from joblib import dump, load from joblib import dump, load
# a = generate_damage_files_index( a = generate_damage_files_index(
# num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A" num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
# ) )
# dump(DataProcessor(file_index=a), "D:/cache.joblib") data = DataProcessor(file_index=a)
a = load("D:/cache.joblib") # data.create_vector_column(overwrite=True)
data.create_limited_sensor_vector_column(overwrite=True)
data.export_to_csv("D:/thesis/data/")
# a = load("D:/cache.joblib")
breakpoint()