[EXP] Alterntive Undamage Case Data #100
File diff suppressed because one or more lines are too long
@@ -25,9 +25,18 @@ class DamageFilesIndices(TypedDict):
|
|||||||
damage_index: int
|
damage_index: int
|
||||||
files: List[str]
|
files: List[str]
|
||||||
|
|
||||||
|
def complement_pairs(n, prefix, extension):
|
||||||
|
"""
|
||||||
|
Return the four complement tuples for zzzBD<n>.TXT
|
||||||
|
"""
|
||||||
|
filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
|
||||||
|
orig_a = (n - 1) % 5 + 1 # 1 … 5
|
||||||
|
for a in range(1, 6): # a = 1 … 5
|
||||||
|
if a != orig_a: # skip original a
|
||||||
|
yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
|
||||||
|
|
||||||
def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TXT", first_col_start=1, last_col_offset=25,
|
def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset,
|
||||||
special_groups=None, group=True):
|
group_size=5, special_groups=None, group=True):
|
||||||
"""
|
"""
|
||||||
Generate a structured list of tuples containing DataFrame references and column indices.
|
Generate a structured list of tuples containing DataFrame references and column indices.
|
||||||
|
|
||||||
@@ -37,7 +46,7 @@ def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TX
|
|||||||
Total number of DataFrames to include in the tuples
|
Total number of DataFrames to include in the tuples
|
||||||
group_size : int, default 5
|
group_size : int, default 5
|
||||||
Number of DataFrames in each group (determines the pattern repeat)
|
Number of DataFrames in each group (determines the pattern repeat)
|
||||||
prefix : str, default "df"
|
prefix : str
|
||||||
Prefix for DataFrame variable names
|
Prefix for DataFrame variable names
|
||||||
first_col_start : int, default 1
|
first_col_start : int, default 1
|
||||||
Starting value for the first column index (1-indexed)
|
Starting value for the first column index (1-indexed)
|
||||||
@@ -61,29 +70,17 @@ def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TX
|
|||||||
group = []
|
group = []
|
||||||
for i in range(1, 6): # TODO: shouldnt be hardcoded
|
for i in range(1, 6): # TODO: shouldnt be hardcoded
|
||||||
n = g * 5 + i
|
n = g * 5 + i
|
||||||
bottom_end = i # 1, 2, 3, 4, 5
|
bottom_end = i # 1, 2, 3, 4, 5
|
||||||
top_end = bottom_end + 25 # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
|
top_end = bottom_end + 25 # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
|
||||||
group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
|
group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
|
||||||
result.append(group)
|
result.append(group)
|
||||||
|
|
||||||
# Add special groups at specified positions (other than beginning)
|
# Add special groups at specified positions (other than beginning)
|
||||||
if special_groups:
|
if special_groups:
|
||||||
for group in special_groups:
|
result.insert(0, special_groups)
|
||||||
position = group.get('position', 0) # default value is 0 if not specified
|
|
||||||
df_name = group['df_name']
|
|
||||||
size = group.get('size', group_size)
|
|
||||||
|
|
||||||
# Create the special group tuples
|
|
||||||
special_tuples = []
|
|
||||||
for i in range(size):
|
|
||||||
first_col = first_col_start + i
|
|
||||||
last_col = first_col + last_col_offset
|
|
||||||
special_tuples.append((df_name, [first_col, last_col]))
|
|
||||||
|
|
||||||
tuples.insert(position, special_tuples)
|
|
||||||
|
|
||||||
|
|
||||||
return tuples
|
return result
|
||||||
|
|
||||||
|
|
||||||
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
|
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
|
||||||
@@ -1,6 +1,16 @@
|
|||||||
from src.ml.model_selection import inference_model
|
from src.ml.model_selection import inference_model
|
||||||
|
from joblib import load
|
||||||
|
|
||||||
model = {"SVM": "D:/thesis/models/sensor1/SVM.joblib",
|
x = 30
|
||||||
"SVM with PCA": "D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib"}
|
file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT"
|
||||||
|
sensor = 1
|
||||||
|
model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib",
|
||||||
|
"SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib",
|
||||||
|
"XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"}
|
||||||
|
|
||||||
inference_model(model["SVM"], "D:/thesis/data/dataset_A/zzzAD2.TXT", column_question=1)
|
index = ((x-1) % 5) + 1
|
||||||
|
inference_model(model["SVM"], file, column_question=index)
|
||||||
|
print("---")
|
||||||
|
inference_model(model["SVM with PCA"], file, column_question=index)
|
||||||
|
print("---")
|
||||||
|
inference_model(model["XGBoost"], file, column_question=index)
|
||||||
@@ -8,7 +8,7 @@ from joblib import load
|
|||||||
def create_ready_data(
|
def create_ready_data(
|
||||||
stft_data_path: str,
|
stft_data_path: str,
|
||||||
stratify: np.ndarray = None,
|
stratify: np.ndarray = None,
|
||||||
) -> tuple:
|
) -> tuple[pd.DataFrame, np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Create a stratified train-test split from STFT data.
|
Create a stratified train-test split from STFT data.
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ def create_ready_data(
|
|||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
tuple
|
tuple
|
||||||
(X_train, X_test, y_train, y_test) - Split datasets
|
(pd.DataFrame, np.ndarray) - Combined data and corresponding labels
|
||||||
"""
|
"""
|
||||||
ready_data = []
|
ready_data = []
|
||||||
for file in os.listdir(stft_data_path):
|
for file in os.listdir(stft_data_path):
|
||||||
@@ -155,7 +155,7 @@ def train_and_evaluate_model(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
result["error"] = f"Training error: {str(e)}"
|
result["error"] = f"Training error: {str(e)}"
|
||||||
return result
|
return result
|
||||||
def plot_confusion_matrix(results_sensor, y_test):
|
def plot_confusion_matrix(results_sensor, y_test, title):
|
||||||
"""
|
"""
|
||||||
Plot confusion matrices for each model in results_sensor1.
|
Plot confusion matrices for each model in results_sensor1.
|
||||||
|
|
||||||
@@ -193,8 +193,7 @@ def plot_confusion_matrix(results_sensor, y_test):
|
|||||||
# Plot
|
# Plot
|
||||||
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
|
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
|
||||||
disp.plot(cmap=plt.cm.Blues) # You can change colormap
|
disp.plot(cmap=plt.cm.Blues) # You can change colormap
|
||||||
plt.title(f"{i['model']} {i['sensor']} Test")
|
plt.title(f"{title}")
|
||||||
plt.show()
|
|
||||||
|
|
||||||
def calculate_label_percentages(labels):
|
def calculate_label_percentages(labels):
|
||||||
"""
|
"""
|
||||||
@@ -255,9 +254,9 @@ def inference_model(
|
|||||||
fs=1024,
|
fs=1024,
|
||||||
window=hann(1024),
|
window=hann(1024),
|
||||||
nperseg=1024,
|
nperseg=1024,
|
||||||
noverlap=512
|
noverlap=1024-512
|
||||||
)
|
)
|
||||||
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
|
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
|
||||||
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
|
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
|
||||||
model = load(models) # Load the model from the provided path
|
model = load(models) # Load the model from the provided path
|
||||||
return calculate_label_percentages(model.predict(data))
|
return calculate_label_percentages(model.predict(data.iloc[:21,:]))
|
||||||
@@ -6,7 +6,7 @@ import glob
|
|||||||
import multiprocessing # Added import for multiprocessing
|
import multiprocessing # Added import for multiprocessing
|
||||||
|
|
||||||
# Define the base directory where DAMAGE_X folders are located
|
# Define the base directory where DAMAGE_X folders are located
|
||||||
damage_base_path = 'D:/thesis/data/converted/raw_B'
|
damage_base_path = 'D:/thesis/data/converted/raw'
|
||||||
|
|
||||||
# Define output directories for each sensor
|
# Define output directories for each sensor
|
||||||
output_dirs = {
|
output_dirs = {
|
||||||
@@ -25,13 +25,10 @@ window = hann(window_size)
|
|||||||
Fs = 1024
|
Fs = 1024
|
||||||
|
|
||||||
# Number of damage cases (adjust as needed)
|
# Number of damage cases (adjust as needed)
|
||||||
num_damage_cases = 6 # Change to 30 if you have 30 damage cases
|
num_damage_cases = 0 # Change to 30 if you have 30 damage cases
|
||||||
|
|
||||||
# Number of test runs per damage case
|
|
||||||
num_test_runs = 5
|
|
||||||
|
|
||||||
# Function to perform STFT and return magnitude
|
# Function to perform STFT and return magnitude
|
||||||
def compute_stft(vibration_data):
|
def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size):
|
||||||
frequencies, times, Zxx = stft(
|
frequencies, times, Zxx = stft(
|
||||||
vibration_data,
|
vibration_data,
|
||||||
fs=Fs,
|
fs=Fs,
|
||||||
@@ -42,9 +39,13 @@ def compute_stft(vibration_data):
|
|||||||
stft_magnitude = np.abs(Zxx)
|
stft_magnitude = np.abs(Zxx)
|
||||||
return stft_magnitude.T # Transpose to have frequencies as columns
|
return stft_magnitude.T # Transpose to have frequencies as columns
|
||||||
|
|
||||||
def process_damage_case(damage_num):
|
def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs):
|
||||||
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
|
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
|
||||||
|
if damage_num == 0:
|
||||||
|
# Number of test runs per damage case
|
||||||
|
num_test_runs = 120
|
||||||
|
else:
|
||||||
|
num_test_runs = 5
|
||||||
# Check if the damage folder exists
|
# Check if the damage folder exists
|
||||||
if not os.path.isdir(damage_folder):
|
if not os.path.isdir(damage_folder):
|
||||||
print(f"Folder {damage_folder} does not exist. Skipping...")
|
print(f"Folder {damage_folder} does not exist. Skipping...")
|
||||||
@@ -79,20 +80,29 @@ def process_damage_case(damage_num):
|
|||||||
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
|
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Extract vibration data (assuming the second column is sensor data)
|
|
||||||
vibration_data = df.iloc[:, 1].values
|
vibration_data = df.iloc[:, 1].values
|
||||||
|
|
||||||
# Perform STFT
|
# Perform STFT
|
||||||
stft_magnitude = compute_stft(vibration_data)
|
stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size)
|
||||||
|
|
||||||
# Convert STFT result to DataFrame
|
# Convert STFT result to DataFrame
|
||||||
df_stft = pd.DataFrame(
|
df_stft = pd.DataFrame(
|
||||||
stft_magnitude,
|
stft_magnitude,
|
||||||
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
|
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
|
||||||
)
|
)
|
||||||
|
# only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
|
||||||
|
if damage_num == 0:
|
||||||
|
print(f"Processing damage_num = 0, test_num = {test_num}")
|
||||||
|
if test_num <= 45:
|
||||||
|
df_stft = df_stft.iloc[:22, :]
|
||||||
|
print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")
|
||||||
|
else:
|
||||||
|
df_stft = df_stft.iloc[:21, :]
|
||||||
|
print(f"Reduced df_stft shape (22 samples): {df_stft.shape}")
|
||||||
|
|
||||||
# Append to the aggregated list
|
# Append to the aggregated list
|
||||||
aggregated_stft.append(df_stft)
|
aggregated_stft.append(df_stft)
|
||||||
|
print(sum(df.shape[0] for df in aggregated_stft))
|
||||||
|
|
||||||
# Concatenate all STFT DataFrames vertically
|
# Concatenate all STFT DataFrames vertically
|
||||||
if aggregated_stft:
|
if aggregated_stft:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from convert import *
|
from data_preprocessing import *
|
||||||
from joblib import dump, load
|
from joblib import dump, load
|
||||||
|
|
||||||
# b = generate_damage_files_index(
|
# b = generate_damage_files_index(
|
||||||
@@ -19,8 +19,15 @@ special_groups_B = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
# Generate the tuples with the special group
|
# Generate the tuples with the special group
|
||||||
# a = generate_df_tuples(special_groups=special_groups_A)
|
a_complement = [(comp)
|
||||||
b = generate_df_tuples(special_groups=special_groups_B, prefix="zzzBD")
|
for n in range(1, 31)
|
||||||
|
for comp in complement_pairs(n)]
|
||||||
|
a = generate_df_tuples(special_groups=a_complement, prefix="zzzAD")
|
||||||
|
|
||||||
|
# b_complement = [(comp)
|
||||||
|
# for n in range(1, 31)
|
||||||
|
# for comp in complement_pairs(n)]
|
||||||
|
# b = generate_df_tuples(special_groups=b_complement, prefix="zzzBD")
|
||||||
|
|
||||||
|
|
||||||
# a = generate_damage_files_index(
|
# a = generate_damage_files_index(
|
||||||
@@ -32,14 +39,14 @@ b = generate_df_tuples(special_groups=special_groups_B, prefix="zzzBD")
|
|||||||
# # undamage_file="zzzBU.TXT"
|
# # undamage_file="zzzBU.TXT"
|
||||||
# )
|
# )
|
||||||
|
|
||||||
# data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
|
data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
|
||||||
# data_A.create_vector_column(overwrite=True)
|
# data_A.create_vector_column(overwrite=True)
|
||||||
# # data_A.create_limited_sensor_vector_column(overwrite=True)
|
# # data_A.create_limited_sensor_vector_column(overwrite=True)
|
||||||
# data_A.export_to_csv("D:/thesis/data/converted/raw")
|
data_A.export_to_csv("D:/thesis/data/converted/raw")
|
||||||
|
|
||||||
data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
|
# data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
|
||||||
# data_B.create_vector_column(overwrite=True)
|
# data_B.create_vector_column(overwrite=True)
|
||||||
# # data_B.create_limited_sensor_vector_column(overwrite=True)
|
# # data_B.create_limited_sensor_vector_column(overwrite=True)
|
||||||
data_B.export_to_csv("D:/thesis/data/converted/raw_B")
|
# data_B.export_to_csv("D:/thesis/data/converted/raw_B")
|
||||||
# a = load("D:/cache.joblib")
|
# a = load("D:/cache.joblib")
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
Reference in New Issue
Block a user