Merge pull request #100 from nuluh/feature/99-exp-alternative-undamage-case-data

[EXP] Alterntive Undamage Case Data
This commit was merged in pull request #100.
This commit is contained in:
Rifqi D. Panuluh
2025-07-24 18:09:05 +07:00
committed by GitHub
6 changed files with 799 additions and 860 deletions

File diff suppressed because one or more lines are too long

View File

@@ -25,9 +25,18 @@ class DamageFilesIndices(TypedDict):
damage_index: int damage_index: int
files: List[str] files: List[str]
def complement_pairs(n, prefix, extension):
"""
Return the four complement tuples for zzzBD<n>.TXT
"""
filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
orig_a = (n - 1) % 5 + 1 # 1 … 5
for a in range(1, 6): # a = 1 … 5
if a != orig_a: # skip original a
yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TXT", first_col_start=1, last_col_offset=25, def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset,
special_groups=None, group=True): group_size=5, special_groups=None, group=True):
""" """
Generate a structured list of tuples containing DataFrame references and column indices. Generate a structured list of tuples containing DataFrame references and column indices.
@@ -37,7 +46,7 @@ def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TX
Total number of DataFrames to include in the tuples Total number of DataFrames to include in the tuples
group_size : int, default 5 group_size : int, default 5
Number of DataFrames in each group (determines the pattern repeat) Number of DataFrames in each group (determines the pattern repeat)
prefix : str, default "df" prefix : str
Prefix for DataFrame variable names Prefix for DataFrame variable names
first_col_start : int, default 1 first_col_start : int, default 1
Starting value for the first column index (1-indexed) Starting value for the first column index (1-indexed)
@@ -68,22 +77,10 @@ def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TX
# Add special groups at specified positions (other than beginning) # Add special groups at specified positions (other than beginning)
if special_groups: if special_groups:
for group in special_groups: result.insert(0, special_groups)
position = group.get('position', 0) # default value is 0 if not specified
df_name = group['df_name']
size = group.get('size', group_size)
# Create the special group tuples
special_tuples = []
for i in range(size):
first_col = first_col_start + i
last_col = first_col + last_col_offset
special_tuples.append((df_name, [first_col, last_col]))
tuples.insert(position, special_tuples)
return tuples return result
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT") # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")

View File

@@ -1,6 +1,16 @@
from src.ml.model_selection import inference_model from src.ml.model_selection import inference_model
from joblib import load
model = {"SVM": "D:/thesis/models/sensor1/SVM.joblib", x = 30
"SVM with PCA": "D:/thesis/models/sensor1/SVM with StandardScaler and PCA.joblib"} file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT"
sensor = 1
model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib",
"SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib",
"XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"}
inference_model(model["SVM"], "D:/thesis/data/dataset_A/zzzAD2.TXT", column_question=1) index = ((x-1) % 5) + 1
inference_model(model["SVM"], file, column_question=index)
print("---")
inference_model(model["SVM with PCA"], file, column_question=index)
print("---")
inference_model(model["XGBoost"], file, column_question=index)

View File

@@ -8,7 +8,7 @@ from joblib import load
def create_ready_data( def create_ready_data(
stft_data_path: str, stft_data_path: str,
stratify: np.ndarray = None, stratify: np.ndarray = None,
) -> tuple: ) -> tuple[pd.DataFrame, np.ndarray]:
""" """
Create a stratified train-test split from STFT data. Create a stratified train-test split from STFT data.
@@ -22,7 +22,7 @@ def create_ready_data(
Returns: Returns:
-------- --------
tuple tuple
(X_train, X_test, y_train, y_test) - Split datasets (pd.DataFrame, np.ndarray) - Combined data and corresponding labels
""" """
ready_data = [] ready_data = []
for file in os.listdir(stft_data_path): for file in os.listdir(stft_data_path):
@@ -155,7 +155,7 @@ def train_and_evaluate_model(
except Exception as e: except Exception as e:
result["error"] = f"Training error: {str(e)}" result["error"] = f"Training error: {str(e)}"
return result return result
def plot_confusion_matrix(results_sensor, y_test): def plot_confusion_matrix(results_sensor, y_test, title):
""" """
Plot confusion matrices for each model in results_sensor1. Plot confusion matrices for each model in results_sensor1.
@@ -193,8 +193,7 @@ def plot_confusion_matrix(results_sensor, y_test):
# Plot # Plot
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues) # You can change colormap disp.plot(cmap=plt.cm.Blues) # You can change colormap
plt.title(f"{i['model']} {i['sensor']} Test") plt.title(f"{title}")
plt.show()
def calculate_label_percentages(labels): def calculate_label_percentages(labels):
""" """
@@ -255,9 +254,9 @@ def inference_model(
fs=1024, fs=1024,
window=hann(1024), window=hann(1024),
nperseg=1024, nperseg=1024,
noverlap=512 noverlap=1024-512
) )
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])]) data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
model = load(models) # Load the model from the provided path model = load(models) # Load the model from the provided path
return calculate_label_percentages(model.predict(data)) return calculate_label_percentages(model.predict(data.iloc[:21,:]))

View File

@@ -6,7 +6,7 @@ import glob
import multiprocessing # Added import for multiprocessing import multiprocessing # Added import for multiprocessing
# Define the base directory where DAMAGE_X folders are located # Define the base directory where DAMAGE_X folders are located
damage_base_path = 'D:/thesis/data/converted/raw_B' damage_base_path = 'D:/thesis/data/converted/raw'
# Define output directories for each sensor # Define output directories for each sensor
output_dirs = { output_dirs = {
@@ -25,13 +25,10 @@ window = hann(window_size)
Fs = 1024 Fs = 1024
# Number of damage cases (adjust as needed) # Number of damage cases (adjust as needed)
num_damage_cases = 6 # Change to 30 if you have 30 damage cases num_damage_cases = 0 # Change to 30 if you have 30 damage cases
# Number of test runs per damage case
num_test_runs = 5
# Function to perform STFT and return magnitude # Function to perform STFT and return magnitude
def compute_stft(vibration_data): def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size):
frequencies, times, Zxx = stft( frequencies, times, Zxx = stft(
vibration_data, vibration_data,
fs=Fs, fs=Fs,
@@ -42,9 +39,13 @@ def compute_stft(vibration_data):
stft_magnitude = np.abs(Zxx) stft_magnitude = np.abs(Zxx)
return stft_magnitude.T # Transpose to have frequencies as columns return stft_magnitude.T # Transpose to have frequencies as columns
def process_damage_case(damage_num): def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs):
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}') damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
if damage_num == 0:
# Number of test runs per damage case
num_test_runs = 120
else:
num_test_runs = 5
# Check if the damage folder exists # Check if the damage folder exists
if not os.path.isdir(damage_folder): if not os.path.isdir(damage_folder):
print(f"Folder {damage_folder} does not exist. Skipping...") print(f"Folder {damage_folder} does not exist. Skipping...")
@@ -79,20 +80,29 @@ def process_damage_case(damage_num):
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...") print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
continue continue
# Extract vibration data (assuming the second column is sensor data)
vibration_data = df.iloc[:, 1].values vibration_data = df.iloc[:, 1].values
# Perform STFT # Perform STFT
stft_magnitude = compute_stft(vibration_data) stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size)
# Convert STFT result to DataFrame # Convert STFT result to DataFrame
df_stft = pd.DataFrame( df_stft = pd.DataFrame(
stft_magnitude, stft_magnitude,
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])] columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
) )
# only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
if damage_num == 0:
print(f"Processing damage_num = 0, test_num = {test_num}")
if test_num <= 45:
df_stft = df_stft.iloc[:22, :]
print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")
else:
df_stft = df_stft.iloc[:21, :]
print(f"Reduced df_stft shape (22 samples): {df_stft.shape}")
# Append to the aggregated list # Append to the aggregated list
aggregated_stft.append(df_stft) aggregated_stft.append(df_stft)
print(sum(df.shape[0] for df in aggregated_stft))
# Concatenate all STFT DataFrames vertically # Concatenate all STFT DataFrames vertically
if aggregated_stft: if aggregated_stft:

View File

@@ -1,4 +1,4 @@
from convert import * from data_preprocessing import *
from joblib import dump, load from joblib import dump, load
# b = generate_damage_files_index( # b = generate_damage_files_index(
@@ -19,8 +19,15 @@ special_groups_B = [
] ]
# Generate the tuples with the special group # Generate the tuples with the special group
# a = generate_df_tuples(special_groups=special_groups_A) a_complement = [(comp)
b = generate_df_tuples(special_groups=special_groups_B, prefix="zzzBD") for n in range(1, 31)
for comp in complement_pairs(n)]
a = generate_df_tuples(special_groups=a_complement, prefix="zzzAD")
# b_complement = [(comp)
# for n in range(1, 31)
# for comp in complement_pairs(n)]
# b = generate_df_tuples(special_groups=b_complement, prefix="zzzBD")
# a = generate_damage_files_index( # a = generate_damage_files_index(
@@ -32,14 +39,14 @@ b = generate_df_tuples(special_groups=special_groups_B, prefix="zzzBD")
# # undamage_file="zzzBU.TXT" # # undamage_file="zzzBU.TXT"
# ) # )
# data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True) data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
# data_A.create_vector_column(overwrite=True) # data_A.create_vector_column(overwrite=True)
# # data_A.create_limited_sensor_vector_column(overwrite=True) # # data_A.create_limited_sensor_vector_column(overwrite=True)
# data_A.export_to_csv("D:/thesis/data/converted/raw") data_A.export_to_csv("D:/thesis/data/converted/raw")
data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True) # data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
# data_B.create_vector_column(overwrite=True) # data_B.create_vector_column(overwrite=True)
# # data_B.create_limited_sensor_vector_column(overwrite=True) # # data_B.create_limited_sensor_vector_column(overwrite=True)
data_B.export_to_csv("D:/thesis/data/converted/raw_B") # data_B.export_to_csv("D:/thesis/data/converted/raw_B")
# a = load("D:/cache.joblib") # a = load("D:/cache.joblib")
# breakpoint() # breakpoint()