Compare commits
1 Commits
stft
...
revert-8-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
88be76292b |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +0,0 @@
|
|||||||
*.ipynb filter=nbstripout
|
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,4 +1,4 @@
|
|||||||
# Ignore CSV files in the data directory and all its subdirectories
|
# Ignore CSV files in the data directory and all its subdirectories
|
||||||
data/**/*.csv
|
data/**/*.csv
|
||||||
.venv/
|
|
||||||
*.pyc
|
*.pyc
|
||||||
30
.gitmessage
30
.gitmessage
@@ -1,30 +0,0 @@
|
|||||||
# .gitmessage
|
|
||||||
|
|
||||||
# <type>(<scope>): <subject>
|
|
||||||
# |<---- Using a Maximum Of 50 Characters ---->|
|
|
||||||
#
|
|
||||||
# Explain the problem that this commit is solving. Focus on why you
|
|
||||||
# are making this change as opposed to how. Use clear, concise language.
|
|
||||||
# |<---- Try To Limit Each Line to a Maximum Of 72 Characters ---->|
|
|
||||||
#
|
|
||||||
# -- COMMIT END --
|
|
||||||
# Types:
|
|
||||||
# feat (new feature)
|
|
||||||
# fix (bug fix)
|
|
||||||
# refactor (refactoring code)
|
|
||||||
# style (formatting, no code change)
|
|
||||||
# doc (changes to documentation)
|
|
||||||
# test (adding or refactoring tests)
|
|
||||||
# perf (performance improvements)
|
|
||||||
# chore (routine tasks, dependencies)
|
|
||||||
# exp (experimental work/exploration)
|
|
||||||
#
|
|
||||||
# Scope:
|
|
||||||
# latex (changes to thesis LaTeX)
|
|
||||||
# src (changes to Python source code)
|
|
||||||
# nb (changes to notebooks)
|
|
||||||
# ml (ML model specific changes)
|
|
||||||
# data (data processing/preparation)
|
|
||||||
# viz (visualization related)
|
|
||||||
# all (changes spanning entire repository)
|
|
||||||
# --------------------
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,192 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from scipy.fft import fft, fftfreq
|
|
||||||
|
|
||||||
def get_mean_freq(signal, frame_size, hop_length):
|
|
||||||
mean = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
current_mean = np.sum(y)/frame_size
|
|
||||||
mean.append(current_mean)
|
|
||||||
return np.array(mean)
|
|
||||||
|
|
||||||
def get_variance_freq(signal, frame_size, hop_length):
|
|
||||||
var = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
current_var = (np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)
|
|
||||||
var.append(current_var)
|
|
||||||
return np.array(var)
|
|
||||||
|
|
||||||
def get_third_freq(signal, frame_size, hop_length):
|
|
||||||
third = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
current_third = (np.sum((y - (np.sum(y)/frame_size))**3))/(frame_size * (np.sqrt((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)))**3)
|
|
||||||
third.append(current_third)
|
|
||||||
return np.array(third)
|
|
||||||
|
|
||||||
def get_forth_freq(signal, frame_size, hop_length):
|
|
||||||
forth = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)
|
|
||||||
forth.append(current_forth)
|
|
||||||
return np.array(forth)
|
|
||||||
|
|
||||||
def get_grand_freq(signal, frame_size, hop_length):
|
|
||||||
grand = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_grand = np.sum(f * y)/np.sum(y)
|
|
||||||
grand.append(current_grand)
|
|
||||||
return np.array(grand)
|
|
||||||
|
|
||||||
def get_std_freq(signal, frame_size, hop_length):
|
|
||||||
std = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_std = np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)
|
|
||||||
std.append(current_std)
|
|
||||||
return np.array(std)
|
|
||||||
|
|
||||||
def get_Cfactor_freq(signal, frame_size, hop_length):
|
|
||||||
cfactor = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_cfactor = np.sqrt(np.sum(f**2 * y)/np.sum(y))
|
|
||||||
cfactor.append(current_cfactor)
|
|
||||||
return np.array(cfactor)
|
|
||||||
|
|
||||||
def get_Dfactor_freq(signal, frame_size, hop_length):
|
|
||||||
dfactor = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_dfactor = np.sqrt(np.sum(f**4 * y)/np.sum(f**2 * y))
|
|
||||||
dfactor.append(current_dfactor)
|
|
||||||
return np.array(dfactor)
|
|
||||||
|
|
||||||
def get_Efactor_freq(signal, frame_size, hop_length):
|
|
||||||
efactor = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_efactor = np.sqrt(np.sum(f**2 * y)/np.sqrt(np.sum(y) * np.sum(f**4 * y)))
|
|
||||||
efactor.append(current_efactor)
|
|
||||||
return np.array(efactor)
|
|
||||||
|
|
||||||
def get_Gfactor_freq(signal, frame_size, hop_length):
|
|
||||||
gfactor = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_gfactor = (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))/(np.sum(f * y)/np.sum(y))
|
|
||||||
gfactor.append(current_gfactor)
|
|
||||||
return np.array(gfactor)
|
|
||||||
|
|
||||||
def get_third1_freq(signal, frame_size, hop_length):
|
|
||||||
third1 = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_third1 = np.sum((f - (np.sum(f * y)/np.sum(y)))**3 * y)/(frame_size * (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))**3)
|
|
||||||
third1.append(current_third1)
|
|
||||||
return np.array(third1)
|
|
||||||
|
|
||||||
def get_forth1_freq(signal, frame_size, hop_length):
|
|
||||||
forth1 = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_forth1 = np.sum((f - (np.sum(f * y)/np.sum(y)))**4 * y)/(frame_size * (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))**4)
|
|
||||||
forth1.append(current_forth1)
|
|
||||||
return np.array(forth1)
|
|
||||||
|
|
||||||
def get_Hfactor_freq(signal, frame_size, hop_length):
|
|
||||||
hfactor = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_hfactor = np.sum(np.sqrt(abs(f - (np.sum(f * y)/np.sum(y)))) * y)/(frame_size * np.sqrt(np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)))
|
|
||||||
hfactor.append(current_hfactor)
|
|
||||||
return np.array(hfactor)
|
|
||||||
|
|
||||||
def get_Jfactor_freq(signal, frame_size, hop_length):
|
|
||||||
jfactor = []
|
|
||||||
for i in range(0, len(signal), hop_length):
|
|
||||||
L = len(signal[i:i+frame_size])
|
|
||||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
|
||||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
|
||||||
current_jfactor = np.sum(np.sqrt(abs(f - (np.sum(f * y)/np.sum(y)))) * y)/(frame_size * np.sqrt(np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)))
|
|
||||||
jfactor.append(current_jfactor)
|
|
||||||
return np.array(jfactor)
|
|
||||||
|
|
||||||
class FrequencyFeatureExtractor:
|
|
||||||
def __init__(self, data):
|
|
||||||
# Assuming data is a numpy array
|
|
||||||
self.x = data
|
|
||||||
# Perform FFT and compute magnitude of frequency components
|
|
||||||
self.frequency_spectrum = np.abs(fft(self.x))
|
|
||||||
self.n = len(self.frequency_spectrum)
|
|
||||||
self.mean_freq = np.mean(self.frequency_spectrum)
|
|
||||||
self.variance_freq = np.var(self.frequency_spectrum)
|
|
||||||
self.std_freq = np.std(self.frequency_spectrum)
|
|
||||||
|
|
||||||
# Calculate the required frequency features
|
|
||||||
self.features = self.calculate_features()
|
|
||||||
|
|
||||||
def calculate_features(self):
|
|
||||||
S_mu = self.mean_freq
|
|
||||||
S_MAX = np.max(self.frequency_spectrum)
|
|
||||||
S_SBP = np.sum(self.frequency_spectrum)
|
|
||||||
S_Peak = np.max(self.frequency_spectrum)
|
|
||||||
S_V = np.sum((self.frequency_spectrum - S_mu) ** 2) / (self.n - 1)
|
|
||||||
S_Sigma = np.sqrt(S_V)
|
|
||||||
S_Skewness = np.sum((self.frequency_spectrum - S_mu) ** 3) / (self.n * S_Sigma ** 3)
|
|
||||||
S_Kurtosis = np.sum((self.frequency_spectrum - S_mu) ** 4) / (self.n * S_Sigma ** 4)
|
|
||||||
S_RSPPB = S_Peak / S_mu
|
|
||||||
|
|
||||||
return {
|
|
||||||
'Mean of band Power Spectrum (S_mu)': S_mu,
|
|
||||||
'Max of band power spectrum (S_MAX)': S_MAX,
|
|
||||||
'Sum of total band power (S_SBP)': S_SBP,
|
|
||||||
'Peak of band power (S_Peak)': S_Peak,
|
|
||||||
'Variance of band power (S_V)': S_V,
|
|
||||||
'Standard Deviation of band power (S_Sigma)': S_Sigma,
|
|
||||||
'Skewness of band power (S_Skewness)': S_Skewness,
|
|
||||||
'Kurtosis of band power (S_Kurtosis)': S_Kurtosis,
|
|
||||||
'Relative Spectral Peak per Band Power (S_RSPPB)': S_RSPPB
|
|
||||||
}
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
result = "Frequency Domain Feature Extraction Results:\n"
|
|
||||||
for feature, value in self.features.items():
|
|
||||||
result += f"{feature}: {value:.4f}\n"
|
|
||||||
return result
|
|
||||||
|
|
||||||
def ExtractFrequencyFeatures(object):
|
|
||||||
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
|
||||||
extractor = FrequencyFeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
|
|
||||||
features = extractor.features
|
|
||||||
return features
|
|
||||||
|
|
||||||
# Usage Example
|
|
||||||
# extractor = FrequencyFeatureExtractor('path_to_your_data.csv')
|
|
||||||
# print(extractor)
|
|
||||||
@@ -36,12 +36,9 @@ class FeatureExtractor:
|
|||||||
result += f"{feature}: {value:.4f}\n"
|
result += f"{feature}: {value:.4f}\n"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def ExtractTimeFeatures(object, absolute):
|
def ExtractTimeFeatures(object):
|
||||||
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
||||||
if absolute:
|
extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
|
||||||
extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
|
|
||||||
else:
|
|
||||||
extractor = FeatureExtractor(data.iloc[:, 1].values)
|
|
||||||
features = extractor.features
|
features = extractor.features
|
||||||
return features
|
return features
|
||||||
# Save features to a file
|
# Save features to a file
|
||||||
|
|||||||
@@ -1,115 +0,0 @@
|
|||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from scipy.signal import stft, hann
|
|
||||||
import glob
|
|
||||||
import multiprocessing # Added import for multiprocessing
|
|
||||||
|
|
||||||
# Define the base directory where DAMAGE_X folders are located
|
|
||||||
damage_base_path = 'D:/thesis/data/converted/raw'
|
|
||||||
|
|
||||||
# Define output directories for each sensor
|
|
||||||
output_dirs = {
|
|
||||||
'sensor1': os.path.join(damage_base_path, 'sensor1'),
|
|
||||||
'sensor2': os.path.join(damage_base_path, 'sensor2')
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create output directories if they don't exist
|
|
||||||
for dir_path in output_dirs.values():
|
|
||||||
os.makedirs(dir_path, exist_ok=True)
|
|
||||||
|
|
||||||
# Define STFT parameters
|
|
||||||
window_size = 1024
|
|
||||||
hop_size = 512
|
|
||||||
window = hann(window_size)
|
|
||||||
Fs = 1024
|
|
||||||
|
|
||||||
# Number of damage cases (adjust as needed)
|
|
||||||
num_damage_cases = 6 # Change to 30 if you have 30 damage cases
|
|
||||||
|
|
||||||
# Number of test runs per damage case
|
|
||||||
num_test_runs = 5
|
|
||||||
|
|
||||||
# Function to perform STFT and return magnitude
|
|
||||||
def compute_stft(vibration_data):
|
|
||||||
frequencies, times, Zxx = stft(
|
|
||||||
vibration_data,
|
|
||||||
fs=Fs,
|
|
||||||
window=window,
|
|
||||||
nperseg=window_size,
|
|
||||||
noverlap=window_size - hop_size
|
|
||||||
)
|
|
||||||
stft_magnitude = np.abs(Zxx)
|
|
||||||
return stft_magnitude.T # Transpose to have frequencies as columns
|
|
||||||
|
|
||||||
def process_damage_case(damage_num):
|
|
||||||
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
|
|
||||||
|
|
||||||
# Check if the damage folder exists
|
|
||||||
if not os.path.isdir(damage_folder):
|
|
||||||
print(f"Folder {damage_folder} does not exist. Skipping...")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Process Sensor 1 and Sensor 2 separately
|
|
||||||
for sensor_num in [1, 2]:
|
|
||||||
aggregated_stft = [] # List to hold STFTs from all test runs
|
|
||||||
|
|
||||||
# Iterate over all test runs
|
|
||||||
for test_num in range(1, num_test_runs + 1):
|
|
||||||
# Construct the filename based on sensor number
|
|
||||||
# Sensor 1 corresponds to '_01', Sensor 2 corresponds to '_02'
|
|
||||||
sensor_suffix = f'_0{sensor_num}'
|
|
||||||
file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
|
|
||||||
file_path = os.path.join(damage_folder, file_name)
|
|
||||||
|
|
||||||
# Check if the file exists
|
|
||||||
if not os.path.isfile(file_path):
|
|
||||||
print(f"File {file_path} does not exist. Skipping...")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Read the CSV file
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(file_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading {file_path}: {e}. Skipping...")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
|
|
||||||
if df.shape[1] != 2:
|
|
||||||
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Extract vibration data (assuming the second column is sensor data)
|
|
||||||
vibration_data = df.iloc[:, 1].values
|
|
||||||
|
|
||||||
# Perform STFT
|
|
||||||
stft_magnitude = compute_stft(vibration_data)
|
|
||||||
|
|
||||||
# Convert STFT result to DataFrame
|
|
||||||
df_stft = pd.DataFrame(
|
|
||||||
stft_magnitude,
|
|
||||||
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Append to the aggregated list
|
|
||||||
aggregated_stft.append(df_stft)
|
|
||||||
|
|
||||||
# Concatenate all STFT DataFrames vertically
|
|
||||||
if aggregated_stft:
|
|
||||||
df_aggregated = pd.concat(aggregated_stft, ignore_index=True)
|
|
||||||
|
|
||||||
# Define output filename
|
|
||||||
output_file = os.path.join(
|
|
||||||
output_dirs[f'sensor{sensor_num}'],
|
|
||||||
f'stft_data{sensor_num}_{damage_num}.csv'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save the aggregated STFT to CSV
|
|
||||||
df_aggregated.to_csv(output_file, index=False)
|
|
||||||
print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
|
|
||||||
else:
|
|
||||||
print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")
|
|
||||||
|
|
||||||
if __name__ == "__main__": # Added main guard for multiprocessing
|
|
||||||
with multiprocessing.Pool() as pool:
|
|
||||||
pool.map(process_damage_case, range(1, num_damage_cases + 1))
|
|
||||||
@@ -1,133 +0,0 @@
|
|||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from scipy.signal import stft, hann
|
|
||||||
import glob
|
|
||||||
|
|
||||||
# Define the base directory where DAMAGE_X folders are located
|
|
||||||
damage_base_path = 'D:/thesis/data/converted/raw/'
|
|
||||||
|
|
||||||
# Define sensor directories
|
|
||||||
sensor_dirs = {
|
|
||||||
'sensor1': os.path.join(damage_base_path, 'sensor1'),
|
|
||||||
'sensor2': os.path.join(damage_base_path, 'sensor2')
|
|
||||||
}
|
|
||||||
|
|
||||||
# Define STFT parameters
|
|
||||||
window_size = 1024
|
|
||||||
hop_size = 512
|
|
||||||
window = hann(window_size)
|
|
||||||
Fs = 1024
|
|
||||||
|
|
||||||
def verify_stft(damage_num, test_num, sensor_num):
|
|
||||||
"""
|
|
||||||
Verifies the STFT of an individual test run against the aggregated STFT data.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- damage_num (int): Damage case number.
|
|
||||||
- test_num (int): Test run number.
|
|
||||||
- sensor_num (int): Sensor number (1 or 2).
|
|
||||||
"""
|
|
||||||
# Mapping sensor number to suffix
|
|
||||||
sensor_suffix = f'_0{sensor_num}'
|
|
||||||
|
|
||||||
# Construct the file name for the individual test run
|
|
||||||
individual_file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
|
|
||||||
individual_file_path = os.path.join(damage_base_path, f'DAMAGE_{damage_num}', individual_file_name)
|
|
||||||
|
|
||||||
# Check if the individual file exists
|
|
||||||
if not os.path.isfile(individual_file_path):
|
|
||||||
print(f"File {individual_file_path} does not exist. Skipping verification for this test run.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Read the individual test run CSV
|
|
||||||
try:
|
|
||||||
df_individual = pd.read_csv(individual_file_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading {individual_file_path}: {e}. Skipping verification for this test run.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
|
|
||||||
if df_individual.shape[1] != 2:
|
|
||||||
print(f"Unexpected number of columns in {individual_file_path}. Expected 2, got {df_individual.shape[1]}. Skipping.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract vibration data
|
|
||||||
vibration_data = df_individual.iloc[:, 1].values
|
|
||||||
|
|
||||||
# Perform STFT
|
|
||||||
frequencies, times, Zxx = stft(
|
|
||||||
vibration_data,
|
|
||||||
fs=Fs,
|
|
||||||
window=window,
|
|
||||||
nperseg=window_size,
|
|
||||||
noverlap=window_size - hop_size
|
|
||||||
)
|
|
||||||
|
|
||||||
# Compute magnitude and transpose
|
|
||||||
stft_magnitude = np.abs(Zxx).T # Shape: (513, 513)
|
|
||||||
|
|
||||||
# Select random row indices to verify (e.g., 3 random rows)
|
|
||||||
np.random.seed(42) # For reproducibility
|
|
||||||
sample_row_indices = np.random.choice(stft_magnitude.shape[0], size=3, replace=False)
|
|
||||||
|
|
||||||
# Read the aggregated STFT CSV
|
|
||||||
aggregated_file_name = f'stft_data{sensor_num}_{damage_num}.csv'
|
|
||||||
aggregated_file_path = os.path.join(sensor_dirs[f'sensor{sensor_num}'], aggregated_file_name)
|
|
||||||
|
|
||||||
if not os.path.isfile(aggregated_file_path):
|
|
||||||
print(f"Aggregated file {aggregated_file_path} does not exist. Skipping verification for this test run.")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
df_aggregated = pd.read_csv(aggregated_file_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading {aggregated_file_path}: {e}. Skipping verification for this test run.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Calculate the starting row index in the aggregated CSV
|
|
||||||
# Each test run contributes 513 rows
|
|
||||||
start_row = (test_num - 1) * 513
|
|
||||||
end_row = start_row + 513 # Exclusive
|
|
||||||
|
|
||||||
# Ensure the aggregated CSV has enough rows
|
|
||||||
if df_aggregated.shape[0] < end_row:
|
|
||||||
print(f"Aggregated file {aggregated_file_path} does not have enough rows for Test {test_num}. Skipping.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract the corresponding STFT block from the aggregated CSV
|
|
||||||
df_aggregated_block = df_aggregated.iloc[start_row:end_row].values # Shape: (513, 513)
|
|
||||||
|
|
||||||
# Compare selected rows
|
|
||||||
all_match = True
|
|
||||||
for row_idx in sample_row_indices:
|
|
||||||
individual_row = stft_magnitude[row_idx]
|
|
||||||
aggregated_row = df_aggregated_block[row_idx]
|
|
||||||
|
|
||||||
# Check if the rows are almost equal within a tolerance
|
|
||||||
if np.allclose(individual_row, aggregated_row, atol=1e-6):
|
|
||||||
verification_status = "MATCH"
|
|
||||||
else:
|
|
||||||
verification_status = "MISMATCH"
|
|
||||||
all_match = False
|
|
||||||
|
|
||||||
# Print the comparison details
|
|
||||||
print(f"Comparing Damage {damage_num}, Test {test_num}, Sensor {sensor_num}, Row {row_idx}: {verification_status}")
|
|
||||||
print(f"Individual STFT Row {row_idx}: {individual_row[:5]} ... {individual_row[-5:]}")
|
|
||||||
print(f"Aggregated STFT Row {row_idx + start_row}: {aggregated_row[:5]} ... {aggregated_row[-5:]}\n")
|
|
||||||
|
|
||||||
# If all sampled rows match, print a verification success message
|
|
||||||
if all_match:
|
|
||||||
print(f"STFT of DAMAGE_{damage_num}_TEST{test_num}_{sensor_num}.csv is verified. On `stft_data{sensor_num}_{damage_num}.csv` start at rows {start_row} to {end_row} with 513 rows.\n")
|
|
||||||
else:
|
|
||||||
print(f"STFT of DAMAGE_{damage_num}_TEST{test_num}_{sensor_num}.csv has discrepancies in `stft_data{sensor_num}_{damage_num}.csv` start at rows {start_row} to {end_row} with 513 rows.\n")
|
|
||||||
|
|
||||||
# Define the number of damage cases and test runs
|
|
||||||
num_damage_cases = 6 # Adjust to 30 as per your dataset
|
|
||||||
num_test_runs = 5
|
|
||||||
|
|
||||||
# Iterate through all damage cases, test runs, and sensors
|
|
||||||
for damage_num in range(1, num_damage_cases + 1):
|
|
||||||
for test_num in range(1, num_test_runs + 1):
|
|
||||||
for sensor_num in [1, 2]:
|
|
||||||
verify_stft(damage_num, test_num, sensor_num)
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from colorama import Fore, Style, init
|
|
||||||
|
|
||||||
def create_damage_files(base_path, output_base, prefix):
|
|
||||||
# Initialize colorama
|
|
||||||
init(autoreset=True)
|
|
||||||
|
|
||||||
# Generate column labels based on expected duplication in input files
|
|
||||||
columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)] # Explicitly setting column names
|
|
||||||
|
|
||||||
sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'}
|
|
||||||
|
|
||||||
# Define the damage scenarios and the corresponding original file indices
|
|
||||||
damage_scenarios = {
|
|
||||||
1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv
|
|
||||||
2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv
|
|
||||||
3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
|
|
||||||
4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
|
|
||||||
5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv
|
|
||||||
6: range(26, 31) # Damage 6 files from zzzAD26.csv to zzzAD30.csv
|
|
||||||
}
|
|
||||||
damage_pad = len(str(len(damage_scenarios)))
|
|
||||||
test_pad = len(str(30))
|
|
||||||
|
|
||||||
for damage, files in damage_scenarios.items():
|
|
||||||
for i, file_index in enumerate(files, start=1):
|
|
||||||
# Load original data file
|
|
||||||
file_path = os.path.join(base_path, f'zzz{prefix}D{file_index}.TXT')
|
|
||||||
df = pd.read_csv(file_path, sep='\t', skiprows=10) # Read with explicit column names
|
|
||||||
|
|
||||||
top_sensor = columns[i-1]
|
|
||||||
print(top_sensor, type(top_sensor))
|
|
||||||
output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'DAMAGE{damage}_TEST{i}_01.csv')
|
|
||||||
print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
|
|
||||||
print("Taking datetime column on index 0...")
|
|
||||||
print(f"Taking `{top_sensor}`...")
|
|
||||||
df[['Time', top_sensor]].to_csv(output_file_1, index=False)
|
|
||||||
print(Fore.GREEN + "Done")
|
|
||||||
|
|
||||||
bottom_sensor = sensor_end_map[i]
|
|
||||||
output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'DAMAGE{damage}_TEST{i}_02.csv')
|
|
||||||
print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
|
|
||||||
print("Taking datetime column on index 0...")
|
|
||||||
print(f"Taking `{bottom_sensor}`...")
|
|
||||||
df[['Time', bottom_sensor]].to_csv(output_file_2, index=False)
|
|
||||||
print(Fore.GREEN + "Done")
|
|
||||||
print("---")
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print("Usage: python convert.py <path_to_csv_files>")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
base_path = sys.argv[1]
|
|
||||||
output_base = sys.argv[2]
|
|
||||||
prefix = sys.argv[3] # Define output directory
|
|
||||||
|
|
||||||
# Create output folders if they don't exist
|
|
||||||
for i in range(1, 5):
|
|
||||||
os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
|
|
||||||
|
|
||||||
create_damage_files(base_path, output_base, prefix)
|
|
||||||
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
# Raw Data Directory
|
# Processed Data Directory
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
|
This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
|
||||||
|
|
||||||
## Directory Structure
|
## Directory Structure
|
||||||
|
|
||||||
@@ -13,23 +13,14 @@ processed_path = os.path.join(base_path, "processed")
|
|||||||
os.makedirs(raw_path, exist_ok=True)
|
os.makedirs(raw_path, exist_ok=True)
|
||||||
os.makedirs(processed_path, exist_ok=True)
|
os.makedirs(processed_path, exist_ok=True)
|
||||||
|
|
||||||
# Define the number of zeros to pad
|
for damage in range(1, 6): # 5 Damage levels
|
||||||
num_damages = 5
|
damage_folder = f"DAMAGE_{damage}"
|
||||||
num_tests = 10
|
damage_path = os.path.join(processed_path, damage_folder)
|
||||||
num_sensors = 2
|
|
||||||
damage_pad = len(str(num_damages))
|
|
||||||
test_pad = len(str(num_tests))
|
|
||||||
sensor_pad = len(str(num_sensors))
|
|
||||||
|
|
||||||
for damage in range(1, num_damages + 1): # 5 Damage levels starts from 1
|
|
||||||
damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
|
|
||||||
damage_path = os.path.join(raw_path, damage_folder)
|
|
||||||
os.makedirs(damage_path, exist_ok=True)
|
os.makedirs(damage_path, exist_ok=True)
|
||||||
|
|
||||||
for test in range(1, 11): # 10 Tests per damage level
|
for test in range(1, 11): # 10 Tests per damage level
|
||||||
for sensor in range(1, 3): # 2 Sensors per test
|
|
||||||
# Filename for the CSV
|
# Filename for the CSV
|
||||||
csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}_{sensor:0{sensor_pad}}.csv"
|
csv_filename = f"D{damage}_TEST{test}.csv"
|
||||||
csv_path = os.path.join(damage_path, csv_filename)
|
csv_path = os.path.join(damage_path, csv_filename)
|
||||||
|
|
||||||
# Generate dummy data
|
# Generate dummy data
|
||||||
|
|||||||
Reference in New Issue
Block a user