diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index 9bc07b8..818669d 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -121,8 +121,9 @@ "signal_sensor2_test1 = []\n", "\n", "for data in df:\n", - " signal_sensor1_test1.append(data['sensor 1'].values)\n", - " signal_sensor2_test1.append(data['sensor 2'].values)\n", + " if not data.empty and 'sensor 1' in data.columns and 'sensor 2' in data.columns:\n", + " signal_sensor1_test1.append(data['sensor 1'].values)\n", + " signal_sensor2_test1.append(data['sensor 2'].values)\n", "\n", "print(len(signal_sensor1_test1))\n", "print(len(signal_sensor2_test1))" @@ -156,8 +157,6 @@ "from scipy.signal import stft, hann\n", "from multiprocessing import Pool\n", "\n", - "\n", - "\n", "# Function to compute and append STFT data\n", "def process_stft(args):\n", " # Define STFT parameters\n", @@ -199,23 +198,22 @@ " # Compute STFT\n", " frequencies, times, Zxx = stft(sensor_data, fs=Fs, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n", " magnitude = np.abs(Zxx)\n", - " flattened_stft = magnitude.flatten()\n", + " df_stft = pd.DataFrame(magnitude, index=frequencies, columns=times).T\n", + " df_stft.columns = [f\"Freq_{i}\" for i in frequencies]\n", " \n", " # Define the output CSV file path\n", " stft_file_name = f'stft_data{sensor_num}_{damage_num}.csv'\n", " sensor_output_dir = os.path.join(damage_base_path, sensor_name.lower())\n", " os.makedirs(sensor_output_dir, exist_ok=True)\n", " stft_file_path = os.path.join(sensor_output_dir, stft_file_name)\n", - " print(stft_file_path)\n", " # Append the flattened STFT to the CSV\n", " try:\n", - " flattened_stft_df = pd.DataFrame([flattened_stft])\n", " if not os.path.isfile(stft_file_path):\n", " # Create a new CSV\n", - " flattened_stft_df.to_csv(stft_file_path, index=False, header=False)\n", + " df_stft.to_csv(stft_file_path, index=False, header=False)\n", " else:\n", " # Append to existing CSV\n", - " flattened_stft_df.to_csv(stft_file_path, mode='a', index=False, header=False)\n", + " df_stft.to_csv(stft_file_path, mode='a', index=False, header=False)\n", " print(f\"Appended STFT data to {stft_file_path}\")\n", " except Exception as e:\n", " print(f\"Error writing to {stft_file_path}: {e}\")" @@ -295,7 +293,7 @@ "\n", "# get current y ticks in list\n", "print(len(frequencies))\n", - "print(len(times))\n" + "print(len(times))" ] }, { @@ -326,7 +324,7 @@ "ready_data1 = []\n", "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n", " ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n", - "ready_data1[0]\n", + "# ready_data1[1]\n", "# colormesh give title x is frequency and y is time and rotate/transpose the data\n", "# Plotting the STFT Data" ] @@ -337,8 +335,8 @@ "metadata": {}, "outputs": [], "source": [ - "ready_data1[1]\n", - "plt.pcolormesh(ready_data1[1])" + "# ready_data1[1]\n", + "plt.pcolormesh(ready_data1[2])" ] }, { @@ -363,8 +361,7 @@ "source": [ "ready_data2 = []\n", "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n", - " ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))\n", - "ready_data2[5]" + " ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))" ] }, { @@ -384,10 +381,25 @@ "outputs": [], "source": [ "x1 = 0\n", - "\n", + "print(type(ready_data1[0]))\n", + "ready_data1[0].iloc[:,0]\n", + "# x1 = x1 + ready_data1[0].shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x1 = 0\n", + "print(type(x1))\n", "for i in range(len(ready_data1)):\n", - " print(ready_data1[i].shape)\n", + " # print(ready_data1[i].shape)\n", + " # print(ready_data1[i].)\n", + " print(type(ready_data1[i].shape[0]))\n", " x1 = x1 + ready_data1[i].shape[0]\n", + " print(type(x1))\n", "\n", "print(x1)" ] @@ -407,13 +419,6 @@ "print(x2)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Appending" - ] - }, { "cell_type": "code", "execution_count": null, @@ -455,10 +460,15 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "### Labeling" + "y_1 = [1,1,1,1]\n", + "y_2 = [0,1,1,1]\n", + "y_3 = [1,0,1,1]\n", + "y_4 = [1,1,0,0]" ] }, { @@ -494,16 +504,6 @@ " print(ready_data1[i].shape[0])" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(len(y_data)):\n", - " print(ready_data2[i].shape[0])" - ] - }, { "cell_type": "code", "execution_count": null, @@ -521,8 +521,7 @@ "metadata": {}, "outputs": [], "source": [ - "# len(y_data[0])\n", - "y_data[0]" + "y_data" ] }, { @@ -806,6 +805,7 @@ "\n", " # df1['s1'] = sensor1[sensor1.columns[-1]]\n", " # df1['s2'] = sensor2[sensor2.columns[-1]]\n", + "ed\n", " # # Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n", "\n", " # plt.plot(df1['s2'], label='sensor 2')\n", @@ -835,19 +835,14 @@ " # plt.show()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test with Outside of Its Training Data" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "spectograph('D:/thesis/data/converted/raw')" + ] } ], "metadata": { diff --git a/data/QUGS/convert.py b/data/QUGS/convert.py index 85aa0ab..95f1073 100644 --- a/data/QUGS/convert.py +++ b/data/QUGS/convert.py @@ -2,6 +2,7 @@ import pandas as pd import os import re import sys +import numpy as np from colorama import Fore, Style, init from typing import TypedDict, Dict, List from joblib import load @@ -225,25 +226,56 @@ class DataProcessor: """ idx = self._create_vector_column_index() # if overwrite: - for i in range(len(self.data)): - for j in range(len(self.data[i])): + for i in range(len(self.data)): # damage(s) + for j in range(len(self.data[i])): # col(s) # Get the appropriate indices for slicing from idx indices = idx[j] # Get the current DataFrame df = self.data[i][j] - # Keep the 'Time' column and select only specified 'Real' columns - # First, we add 1 to all indices to account for 'Time' being at position 0 + # Keep the 'Time' column and select only specifid 'Real' colmns + # First, we add 1 to all indices to acount for 'Time' being at positiion 0 real_indices = [index + 1 for index in indices] - # Create list with Time column index (0) and the adjusted Real indices + # Create list with Time column index (0) and the adjustedd Real indices all_indices = [0] + [real_indices[0]] + [real_indices[-1]] # Apply the slicing self.data[i][j] = df.iloc[:, all_indices] # TODO: if !overwrite: + def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"): + """ + Export the processed data to CSV files in the required folder structure. + + :param output_dir: Directory to save the CSV files. + :param file_prefix: Prefix for the output filenames. + """ + for group_idx, group in enumerate(self.data, start=1): + group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}") + os.makedirs(group_folder, exist_ok=True) + for test_idx, df in enumerate(group, start=1): + # Ensure columns are named uniquely if duplicated + df = df.copy() + df.columns = ["Time", "Real_0", "Real_1"] # Rename + + # Export first Real column + out1 = os.path.join( + group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv" + ) + df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv( + out1, index=False + ) + + # Export last Real column + out2 = os.path.join( + group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv" + ) + df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv( + out2, index=False + ) + def create_damage_files(base_path, output_base, prefix): # Initialize colorama diff --git a/data/QUGS/test.py b/data/QUGS/test.py index 95f2d8c..12b39cf 100644 --- a/data/QUGS/test.py +++ b/data/QUGS/test.py @@ -4,5 +4,22 @@ from joblib import dump, load # a = generate_damage_files_index( # num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A" # ) -# dump(DataProcessor(file_index=a), "D:/cache.joblib") -a = load("D:/cache.joblib") + +b = generate_damage_files_index( + num_damage=6, + file_index_start=1, + col=5, + base_path="D:/thesis/data/dataset_B", + prefix="zzzBD", +) +# data_A = DataProcessor(file_index=a) +# # data.create_vector_column(overwrite=True) +# data_A.create_limited_sensor_vector_column(overwrite=True) +# data_A.export_to_csv("D:/thesis/data/converted/raw") + +data_B = DataProcessor(file_index=b) +# data.create_vector_column(overwrite=True) +data_B.create_limited_sensor_vector_column(overwrite=True) +data_B.export_to_csv("D:/thesis/data/converted/raw_B") +# a = load("D:/cache.joblib") +# breakpoint()