refactor(test): update test script to generate damage files index for dataset_B and adjust export path for processed data

fix(data): fix the incorrect output of scipy.stft() data to be pandas.DataFrame shaped (513,513) along with its frequencies as the index and times as the columns (transposed) instead of just the magnitude that being flattened out; add checks for empty data and correct file paths for sensor data loading.
Closes #43
2025-04-20 16:02:16 +07:00 · 2025-04-20 14:45:38 +07:00 · 2025-04-17 10:10:19 +07:00
3 changed files with 87 additions and 26 deletions
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
@@ -121,6 +121,7 @@
    "signal_sensor2_test1 = []\n",
    "\n",
    "for data in df:\n",
    "    if not data.empty and 'sensor 1' in data.columns and 'sensor 2' in data.columns:\n",
    "        signal_sensor1_test1.append(data['sensor 1'].values)\n",
    "        signal_sensor2_test1.append(data['sensor 2'].values)\n",
    "\n",
@@ -156,8 +157,6 @@
    "from scipy.signal import stft, hann\n",
    "from multiprocessing import Pool\n",
    "\n",
    "\n",
    "\n",
    "# Function to compute and append STFT data\n",
    "def process_stft(args):\n",
    "    # Define STFT parameters\n",
@@ -199,23 +198,22 @@
    "    # Compute STFT\n",
    "    frequencies, times, Zxx = stft(sensor_data, fs=Fs, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
    "    magnitude = np.abs(Zxx)\n",
-    "    flattened_stft = magnitude.flatten()\n",
+    "    df_stft = pd.DataFrame(magnitude, index=frequencies, columns=times).T\n",
    "    df_stft.columns = [f\"Freq_{i}\" for i in frequencies]\n",
    "    \n",
    "    # Define the output CSV file path\n",
    "    stft_file_name = f'stft_data{sensor_num}_{damage_num}.csv'\n",
    "    sensor_output_dir = os.path.join(damage_base_path, sensor_name.lower())\n",
    "    os.makedirs(sensor_output_dir, exist_ok=True)\n",
    "    stft_file_path = os.path.join(sensor_output_dir, stft_file_name)\n",
    "    print(stft_file_path)\n",
    "    # Append the flattened STFT to the CSV\n",
    "    try:\n",
    "        flattened_stft_df = pd.DataFrame([flattened_stft])\n",
    "        if not os.path.isfile(stft_file_path):\n",
    "            # Create a new CSV\n",
-    "            flattened_stft_df.to_csv(stft_file_path, index=False, header=False)\n",
+    "            df_stft.to_csv(stft_file_path, index=False, header=False)\n",
    "        else:\n",
    "            # Append to existing CSV\n",
-    "            flattened_stft_df.to_csv(stft_file_path, mode='a', index=False, header=False)\n",
+    "            df_stft.to_csv(stft_file_path, mode='a', index=False, header=False)\n",
    "        print(f\"Appended STFT data to {stft_file_path}\")\n",
    "    except Exception as e:\n",
    "        print(f\"Error writing to {stft_file_path}: {e}\")"
@@ -295,7 +293,7 @@
    "\n",
    "# get current y ticks in list\n",
    "print(len(frequencies))\n",
-    "print(len(times))\n"
+    "print(len(times))"
   ]
  },
  {
@@ -324,8 +322,8 @@
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "ready_data1 = []\n",
-    "for file in os.listdir('D:/thesis/data/working/sensor1'):\n",
+    "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
-    "    ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor1', file)))\n",
+    "    ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n",
    "# ready_data1[1]\n",
    "# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
    "# Plotting the STFT Data"
@@ -337,8 +335,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ready_data1[1]\n",
+    "# ready_data1[1]\n",
-    "plt.pcolormesh(ready_data1[1])"
+    "plt.pcolormesh(ready_data1[2])"
   ]
  },
  {
@@ -362,9 +360,8 @@
   "outputs": [],
   "source": [
    "ready_data2 = []\n",
-    "for file in os.listdir('D:/thesis/data/working/sensor2'):\n",
+    "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
-    "    ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor2', file)))\n",
+    "    ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))"
    "ready_data2[5]"
   ]
  },
  {
@@ -384,10 +381,25 @@
   "outputs": [],
   "source": [
    "x1 = 0\n",
-    "\n",
+    "print(type(ready_data1[0]))\n",
    "ready_data1[0].iloc[:,0]\n",
    "# x1 = x1 + ready_data1[0].shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x1 = 0\n",
    "print(type(x1))\n",
    "for i in range(len(ready_data1)):\n",
-    "    print(ready_data1[i].shape)\n",
+    "    # print(ready_data1[i].shape)\n",
    "    # print(ready_data1[i].)\n",
    "    print(type(ready_data1[i].shape[0]))\n",
    "    x1 = x1 + ready_data1[i].shape[0]\n",
    "    print(type(x1))\n",
    "\n",
    "print(x1)"
   ]
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -2,6 +2,7 @@ import pandas as pd
 import os
 import re
 import sys
 import numpy as np
 from colorama import Fore, Style, init
 from typing import TypedDict, Dict, List
 from joblib import load
@@ -225,25 +226,56 @@ class DataProcessor:
        """
        idx = self._create_vector_column_index()
        # if overwrite:
-        for i in range(len(self.data)):
+        for i in range(len(self.data)):  # damage(s)
-            for j in range(len(self.data[i])):
+            for j in range(len(self.data[i])):  # col(s)
                # Get the appropriate indices for slicing from idx
                indices = idx[j]
                # Get the current DataFrame
                df = self.data[i][j]
-                # Keep the 'Time' column and select only specified 'Real' columns
+                # Keep the 'Time' column and select only specifid 'Real' colmns
-                # First, we add 1 to all indices to account for 'Time' being at position 0
+                # First, we add 1 to all indices to acount for 'Time' being at positiion 0
                real_indices = [index + 1 for index in indices]
-                # Create list with Time column index (0) and the adjusted Real indices
+                # Create list with Time column index (0) and the adjustedd Real indices
                all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
                # Apply the slicing
                self.data[i][j] = df.iloc[:, all_indices]
        # TODO: if !overwrite:
    def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
        """
        Export the processed data to CSV files in the required folder structure.
        :param output_dir: Directory to save the CSV files.
        :param file_prefix: Prefix for the output filenames.
        """
        for group_idx, group in enumerate(self.data, start=1):
            group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
            os.makedirs(group_folder, exist_ok=True)
            for test_idx, df in enumerate(group, start=1):
                # Ensure columns are named uniquely if duplicated
                df = df.copy()
                df.columns = ["Time", "Real_0", "Real_1"]  # Rename
                # Export first Real column
                out1 = os.path.join(
                    group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
                )
                df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
                    out1, index=False
                )
                # Export last Real column
                out2 = os.path.join(
                    group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
                )
                df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
                    out2, index=False
                )
 def create_damage_files(base_path, output_base, prefix):
    # Initialize colorama
--- a/data/QUGS/test.py
+++ b/data/QUGS/test.py
@@ -4,5 +4,22 @@ from joblib import dump, load
 # a = generate_damage_files_index(
 #     num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
 # )
-# dump(DataProcessor(file_index=a), "D:/cache.joblib")
+
-a = load("D:/cache.joblib")
+b = generate_damage_files_index(
    num_damage=6,
    file_index_start=1,
    col=5,
    base_path="D:/thesis/data/dataset_B",
    prefix="zzzBD",
 )
 # data_A = DataProcessor(file_index=a)
 # # data.create_vector_column(overwrite=True)
 # data_A.create_limited_sensor_vector_column(overwrite=True)
 # data_A.export_to_csv("D:/thesis/data/converted/raw")
 data_B = DataProcessor(file_index=b)
 # data.create_vector_column(overwrite=True)
 data_B.create_limited_sensor_vector_column(overwrite=True)
 data_B.export_to_csv("D:/thesis/data/converted/raw_B")
 # a = load("D:/cache.joblib")
 # breakpoint()
Author	SHA1	Message	Date
nuluh	1511012e11	refactor(test): update test script to generate damage files index for dataset_B and adjust export path for processed data	2025-04-20 16:02:16 +07:00
nuluh	db2947abdf	fix(data): fix the incorrect output of scipy.stft() data to be pandas.DataFrame shaped (513,513) along with its frequencies as the index and times as the columns (transposed) instead of just the magnitude that being flattened out; add checks for empty data and correct file paths for sensor data loading. Closes #43	2025-04-20 14:45:38 +07:00
nuluh	36b36c41ba	feat(data): add export_to_csv method for saving processed data into individuals sensor end and update test script Closes #40	2025-04-17 10:10:19 +07:00