From 36b36c41ba5898090377c914c08cfb8fd3f2040c Mon Sep 17 00:00:00 2001 From: nuluh Date: Thu, 17 Apr 2025 10:10:19 +0700 Subject: [PATCH 1/3] feat(data): add export_to_csv method for saving processed data into individuals sensor end and update test script Closes #40 --- data/QUGS/convert.py | 42 +++++++++++++++++++++++++++++++++++++----- data/QUGS/test.py | 14 +++++++++----- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/data/QUGS/convert.py b/data/QUGS/convert.py index 85aa0ab..95f1073 100644 --- a/data/QUGS/convert.py +++ b/data/QUGS/convert.py @@ -2,6 +2,7 @@ import pandas as pd import os import re import sys +import numpy as np from colorama import Fore, Style, init from typing import TypedDict, Dict, List from joblib import load @@ -225,25 +226,56 @@ class DataProcessor: """ idx = self._create_vector_column_index() # if overwrite: - for i in range(len(self.data)): - for j in range(len(self.data[i])): + for i in range(len(self.data)): # damage(s) + for j in range(len(self.data[i])): # col(s) # Get the appropriate indices for slicing from idx indices = idx[j] # Get the current DataFrame df = self.data[i][j] - # Keep the 'Time' column and select only specified 'Real' columns - # First, we add 1 to all indices to account for 'Time' being at position 0 + # Keep the 'Time' column and select only specifid 'Real' colmns + # First, we add 1 to all indices to acount for 'Time' being at positiion 0 real_indices = [index + 1 for index in indices] - # Create list with Time column index (0) and the adjusted Real indices + # Create list with Time column index (0) and the adjustedd Real indices all_indices = [0] + [real_indices[0]] + [real_indices[-1]] # Apply the slicing self.data[i][j] = df.iloc[:, all_indices] # TODO: if !overwrite: + def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"): + """ + Export the processed data to CSV files in the required folder structure. + + :param output_dir: Directory to save the CSV files. + :param file_prefix: Prefix for the output filenames. + """ + for group_idx, group in enumerate(self.data, start=1): + group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}") + os.makedirs(group_folder, exist_ok=True) + for test_idx, df in enumerate(group, start=1): + # Ensure columns are named uniquely if duplicated + df = df.copy() + df.columns = ["Time", "Real_0", "Real_1"] # Rename + + # Export first Real column + out1 = os.path.join( + group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv" + ) + df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv( + out1, index=False + ) + + # Export last Real column + out2 = os.path.join( + group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv" + ) + df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv( + out2, index=False + ) + def create_damage_files(base_path, output_base, prefix): # Initialize colorama diff --git a/data/QUGS/test.py b/data/QUGS/test.py index 95f2d8c..0bf4240 100644 --- a/data/QUGS/test.py +++ b/data/QUGS/test.py @@ -1,8 +1,12 @@ from convert import * from joblib import dump, load -# a = generate_damage_files_index( -# num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A" -# ) -# dump(DataProcessor(file_index=a), "D:/cache.joblib") -a = load("D:/cache.joblib") +a = generate_damage_files_index( + num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A" +) +data = DataProcessor(file_index=a) +# data.create_vector_column(overwrite=True) +data.create_limited_sensor_vector_column(overwrite=True) +data.export_to_csv("D:/thesis/data/") +# a = load("D:/cache.joblib") +breakpoint() From db2947abdf4fa2eae927b8ba99075660d57f8132 Mon Sep 17 00:00:00 2001 From: nuluh Date: Sun, 20 Apr 2025 14:45:38 +0700 Subject: [PATCH 2/3] fix(data): fix the incorrect output of scipy.stft() data to be pandas.DataFrame shaped (513,513) along with its frequencies as the index and times as the columns (transposed) instead of just the magnitude that being flattened out; add checks for empty data and correct file paths for sensor data loading. Closes #43 --- code/notebooks/stft.ipynb | 50 ++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index b1c16b3..818669d 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -121,8 +121,9 @@ "signal_sensor2_test1 = []\n", "\n", "for data in df:\n", - " signal_sensor1_test1.append(data['sensor 1'].values)\n", - " signal_sensor2_test1.append(data['sensor 2'].values)\n", + " if not data.empty and 'sensor 1' in data.columns and 'sensor 2' in data.columns:\n", + " signal_sensor1_test1.append(data['sensor 1'].values)\n", + " signal_sensor2_test1.append(data['sensor 2'].values)\n", "\n", "print(len(signal_sensor1_test1))\n", "print(len(signal_sensor2_test1))" @@ -156,8 +157,6 @@ "from scipy.signal import stft, hann\n", "from multiprocessing import Pool\n", "\n", - "\n", - "\n", "# Function to compute and append STFT data\n", "def process_stft(args):\n", " # Define STFT parameters\n", @@ -199,23 +198,22 @@ " # Compute STFT\n", " frequencies, times, Zxx = stft(sensor_data, fs=Fs, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n", " magnitude = np.abs(Zxx)\n", - " flattened_stft = magnitude.flatten()\n", + " df_stft = pd.DataFrame(magnitude, index=frequencies, columns=times).T\n", + " df_stft.columns = [f\"Freq_{i}\" for i in frequencies]\n", " \n", " # Define the output CSV file path\n", " stft_file_name = f'stft_data{sensor_num}_{damage_num}.csv'\n", " sensor_output_dir = os.path.join(damage_base_path, sensor_name.lower())\n", " os.makedirs(sensor_output_dir, exist_ok=True)\n", " stft_file_path = os.path.join(sensor_output_dir, stft_file_name)\n", - " print(stft_file_path)\n", " # Append the flattened STFT to the CSV\n", " try:\n", - " flattened_stft_df = pd.DataFrame([flattened_stft])\n", " if not os.path.isfile(stft_file_path):\n", " # Create a new CSV\n", - " flattened_stft_df.to_csv(stft_file_path, index=False, header=False)\n", + " df_stft.to_csv(stft_file_path, index=False, header=False)\n", " else:\n", " # Append to existing CSV\n", - " flattened_stft_df.to_csv(stft_file_path, mode='a', index=False, header=False)\n", + " df_stft.to_csv(stft_file_path, mode='a', index=False, header=False)\n", " print(f\"Appended STFT data to {stft_file_path}\")\n", " except Exception as e:\n", " print(f\"Error writing to {stft_file_path}: {e}\")" @@ -295,7 +293,7 @@ "\n", "# get current y ticks in list\n", "print(len(frequencies))\n", - "print(len(times))\n" + "print(len(times))" ] }, { @@ -324,8 +322,8 @@ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "ready_data1 = []\n", - "for file in os.listdir('D:/thesis/data/working/sensor1'):\n", - " ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor1', file)))\n", + "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n", + " ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n", "# ready_data1[1]\n", "# colormesh give title x is frequency and y is time and rotate/transpose the data\n", "# Plotting the STFT Data" @@ -337,8 +335,8 @@ "metadata": {}, "outputs": [], "source": [ - "ready_data1[1]\n", - "plt.pcolormesh(ready_data1[1])" + "# ready_data1[1]\n", + "plt.pcolormesh(ready_data1[2])" ] }, { @@ -362,9 +360,8 @@ "outputs": [], "source": [ "ready_data2 = []\n", - "for file in os.listdir('D:/thesis/data/working/sensor2'):\n", - " ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor2', file)))\n", - "ready_data2[5]" + "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n", + " ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))" ] }, { @@ -384,10 +381,25 @@ "outputs": [], "source": [ "x1 = 0\n", - "\n", + "print(type(ready_data1[0]))\n", + "ready_data1[0].iloc[:,0]\n", + "# x1 = x1 + ready_data1[0].shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x1 = 0\n", + "print(type(x1))\n", "for i in range(len(ready_data1)):\n", - " print(ready_data1[i].shape)\n", + " # print(ready_data1[i].shape)\n", + " # print(ready_data1[i].)\n", + " print(type(ready_data1[i].shape[0]))\n", " x1 = x1 + ready_data1[i].shape[0]\n", + " print(type(x1))\n", "\n", "print(x1)" ] From 1511012e115d15c7c617dd706cb8209865a05e63 Mon Sep 17 00:00:00 2001 From: nuluh Date: Sun, 20 Apr 2025 16:02:16 +0700 Subject: [PATCH 3/3] refactor(test): update test script to generate damage files index for dataset_B and adjust export path for processed data --- data/QUGS/test.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/data/QUGS/test.py b/data/QUGS/test.py index 0bf4240..12b39cf 100644 --- a/data/QUGS/test.py +++ b/data/QUGS/test.py @@ -1,12 +1,25 @@ from convert import * from joblib import dump, load -a = generate_damage_files_index( - num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A" +# a = generate_damage_files_index( +# num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A" +# ) + +b = generate_damage_files_index( + num_damage=6, + file_index_start=1, + col=5, + base_path="D:/thesis/data/dataset_B", + prefix="zzzBD", ) -data = DataProcessor(file_index=a) +# data_A = DataProcessor(file_index=a) +# # data.create_vector_column(overwrite=True) +# data_A.create_limited_sensor_vector_column(overwrite=True) +# data_A.export_to_csv("D:/thesis/data/converted/raw") + +data_B = DataProcessor(file_index=b) # data.create_vector_column(overwrite=True) -data.create_limited_sensor_vector_column(overwrite=True) -data.export_to_csv("D:/thesis/data/") +data_B.create_limited_sensor_vector_column(overwrite=True) +data_B.export_to_csv("D:/thesis/data/converted/raw_B") # a = load("D:/cache.joblib") -breakpoint() +# breakpoint()