Compare commits

..

26 Commits

Author SHA1 Message Date
nuluh
b491c7cf8b Merge branch 'latex/75-enhance-background-research' of https://github.com/nuluh/thesis into latex/75-enhance-background-research 2025-06-04 16:09:03 +07:00
nuluh
ddae7ef6a4 feat(latex): add glossary, acronyms, and notations 2025-06-04 16:05:22 +07:00
nuluh
e13b8874c7 chore(latex): clean up unused input commands in main.tex 2025-06-04 16:03:01 +07:00
nuluh
dc2f1eb649 feat(latex): add images 2025-06-04 16:03:01 +07:00
nuluh
0ccbb7c2b1 feat(latex): add glossary, acronyms, and notations 2025-06-04 16:03:01 +07:00
nuluh
3700531c2f chore(acknowledgements): remove empty acknowledgements file 2025-06-04 16:03:00 +07:00
nuluh
a16ebae170 feat(latex): enhance background section and add research questions for SHM study 2025-06-04 16:03:00 +07:00
nuluh
d6df4e5349 chore(latex): comment out table of contents in main.tex to ignore some error 2025-06-04 15:52:46 +07:00
nuluh
b5cfebf938 chore(latex): clean up unused input commands in main.tex 2025-06-04 15:34:15 +07:00
nuluh
1387206c7e feat(latex): add images 2025-06-04 15:34:15 +07:00
nuluh
1914cc3bf7 feat(latex): add glossary, acronyms, and notations 2025-06-04 15:34:15 +07:00
nuluh
cec43cb291 chore(acknowledgements): remove empty acknowledgements file 2025-06-04 15:34:13 +07:00
nuluh
6bedf9e297 feat(latex): enhance background section and add research questions for SHM study 2025-06-04 15:34:13 +07:00
nuluh
983e9c5834 feat(latex): add images 2025-06-04 14:48:49 +07:00
nuluh
d743ba451e feat(latex): add glossary, acronyms, and notations 2025-06-04 14:36:00 +07:00
nuluh
4a56579d31 Merge branch 'latex/75-enhance-background-research' of https://github.com/nuluh/thesis into latex/75-enhance-background-research 2025-06-04 12:55:47 +07:00
nuluh
305999ec40 chore(acknowledgements): remove empty acknowledgements file 2025-06-04 12:50:50 +07:00
nuluh
3e9085c15d feat(latex): enhance background section and add research questions for SHM study 2025-06-04 12:50:50 +07:00
nuluh
dcb2e52a38 Merge branch 'latex/75-enhance-background-research' of https://github.com/nuluh/thesis into latex/75-enhance-background-research 2025-06-03 20:20:37 +07:00
nuluh
f21fd8d195 chore(acknowledgements): remove empty acknowledgements file 2025-06-03 20:18:42 +07:00
nuluh
9b5b42a756 feat(latex): enhance background section and add research questions for SHM study 2025-06-03 20:18:42 +07:00
nuluh
1f8da59a6b Merge branch 'latex/75-enhance-background-research' of https://github.com/nuluh/thesis into latex/75-enhance-background-research 2025-06-03 16:48:20 +07:00
nuluh
b177dd04d8 chore(acknowledgements): remove empty acknowledgements file 2025-06-03 16:44:57 +07:00
nuluh
c9f4447e62 feat(latex): enhance background section and add research questions for SHM study 2025-06-03 16:44:56 +07:00
nuluh
a89d4caf75 chore(acknowledgements): remove empty acknowledgements file 2025-06-03 16:44:35 +07:00
nuluh
7dbc5bba0f feat(latex): enhance background section and add research questions for SHM study 2025-06-03 06:26:58 +07:00
19 changed files with 1412 additions and 1405 deletions

View File

@@ -1,7 +1,4 @@
{
"python.analysis.extraPaths": [
"./code/src/features",
"${workspaceFolder}/code/src"
],
"python.analysis.extraPaths": ["./code/src/features"],
"jupyter.notebookFileRoot": "${workspaceFolder}/code"
}

File diff suppressed because one or more lines are too long

View File

@@ -1,357 +0,0 @@
import pandas as pd
import os
import re
import sys
import numpy as np
from colorama import Fore, Style, init
from typing import TypedDict, Dict, List
from joblib import load
from pprint import pprint
# class DamageFilesIndices(TypedDict):
# damage_index: int
# files: list[int]
OriginalSingleDamageScenarioFilePath = str
DamageScenarioGroupIndex = int
OriginalSingleDamageScenario = pd.DataFrame
SensorIndex = int
VectorColumnIndex = List[SensorIndex]
VectorColumnIndices = List[VectorColumnIndex]
DamageScenarioGroup = List[OriginalSingleDamageScenario]
GroupDataset = List[DamageScenarioGroup]
class DamageFilesIndices(TypedDict):
damage_index: int
files: List[str]
def complement_pairs(n, prefix, extension):
"""
Return the four complement tuples for zzzBD<n>.TXT
"""
filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
orig_a = (n - 1) % 5 + 1 # 1 … 5
for a in range(1, 6): # a = 1 … 5
if a != orig_a: # skip original a
yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset,
group_size=5, special_groups=None, group=True):
"""
Generate a structured list of tuples containing DataFrame references and column indices.
Parameters:
-----------
total_dfs : int, default 30
Total number of DataFrames to include in the tuples
group_size : int, default 5
Number of DataFrames in each group (determines the pattern repeat)
prefix : str
Prefix for DataFrame variable names
first_col_start : int, default 1
Starting value for the first column index (1-indexed)
last_col_offset : int, default 25
Offset to add to first_col_start to get the last column index
special_groups : list of dict, optional
List of special groups to insert, each dict should contain:
- 'df_name': The DataFrame name to use for all tuples in this group
- 'position': Where to insert this group (0 for beginning)
- 'size': Size of this group (default: same as group_size)
Returns:
--------
list
List of tuples, where each tuple contains (df_name, [first_col, last_col])
"""
result = []
if group:
# Group tuples into sublists of group_size
for g in range(6): # TODO: shouldnt be hardcoded
group = []
for i in range(1, 6): # TODO: shouldnt be hardcoded
n = g * 5 + i
bottom_end = i # 1, 2, 3, 4, 5
top_end = bottom_end + 25 # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
result.append(group)
# Add special groups at specified positions (other than beginning)
if special_groups:
result.insert(0, special_groups)
return result
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names
class DataProcessor:
def __init__(self, file_index, cache_path: str = None, base_path: str = None, include_time: bool = False):
self.file_index = file_index
self.base_path = base_path
self.include_time = include_time
if cache_path:
self.data = load(cache_path)
else:
self.data = self.load_data()
def load_data(self):
for idxs, group in enumerate(self.file_index):
for idx, tuple in enumerate(group):
file_path = os.path.join(self.base_path, tuple[0]) # ('zzzAD1.TXT')
if self.include_time:
col_indices = [0] + tuple[1] # [1, 26] + [0] -> [0, 1, 26]
else:
col_indices = tuple[1] # [1, 26]
try:
# Read the CSV file
df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
self.file_index[idxs][idx] = df.iloc[:, col_indices].copy() # Extract the specified columns
print(f"Processed {file_path}, extracted columns: {col_indices}")
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
"""
Loads a single data file into a pandas DataFrame.
:param file_path: Path to the data file.
:return: DataFrame containing the numerical data.
"""
df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True, nrows=1)
return df
def _load_all_data(self) -> GroupDataset:
"""
Loads all data files based on the grouping dictionary and returns a nested list.
:return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
"""
data = []
# Find the maximum group index to determine the list size
max_group_idx = len(self.file_index) if self.file_index else 0
# Handle case when file_index is empty
if max_group_idx == 0:
raise ValueError("No file index provided; file_index is empty.")
# Initialize empty lists
for _ in range(max_group_idx):
data.append([])
# Fill the list with data
for group_idx, file_list in self.file_index.items():
group_idx -= 1 # adjust due to undamage file
data[group_idx] = [self._load_dataframe(file) for file in file_list]
return data
def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
"""
Returns the list of DataFrames for the given group index.
:param group_idx: Index of the group.
:return: List of DataFrames.
"""
return self.data.get([group_idx, []])
def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
"""
Returns the column names for the given group and file indices.
:param group_idx: Index of the group.
:param file_idx: Index of the file in the group.
:return: List of column names.
"""
if group_idx in self.data and len(self.data[group_idx]) > file_idx:
return self.data[group_idx][file_idx].columns.tolist()
return []
def get_data_info(self):
"""
Print information about the loaded data structure.
Adapted for when self.data is a List instead of a Dictionary.
"""
if isinstance(self.data, list):
# For each sublist in self.data, get the type names of all elements
pprint(
[
(
[type(item).__name__ for item in sublist]
if isinstance(sublist, list)
else type(sublist).__name__
)
for sublist in self.data
]
)
else:
pprint(
{
key: [type(df).__name__ for df in value]
for key, value in self.data.items()
}
if isinstance(self.data, dict)
else type(self.data).__name__
)
def _create_vector_column_index(self) -> VectorColumnIndices:
vector_col_idx: VectorColumnIndices = []
y = 0
for data_group in self.data: # len(data_group[i]) = 5
for j in data_group: # len(j[i]) =
c: VectorColumnIndex = []
x = 0
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
c.append(x + y)
x += 5
vector_col_idx.append(c)
y += 1
return vector_col_idx # TODO: refactor this so that it returns just from first data_group without using for loops through the self.data that seems unnecessary
def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idxs = self._create_vector_column_index()
for i, group in enumerate(self.data):
# add 1 to all indices to account for 'Time' being at position 0
for j, df in enumerate(group):
idx = [_ + 1 for _ in idxs[j]]
# slice out the desired columns, copy into a fresh DataFrame,
# then overwrite self.data[i][j] with it
self.data[i][j] = df.iloc[:, idx].copy()
# TODO: if !overwrite:
def create_limited_sensor_vector_column(self, overwrite=True):
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)): # damage(s)
for j in range(len(self.data[i])): # col(s)
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specifid 'Real' colmns
# First, we add 1 to all indices to acount for 'Time' being at positiion 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjustedd Real indices
all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
"""
Export the processed data to CSV files in the required folder structure.
:param output_dir: Directory to save the CSV files.
:param file_prefix: Prefix for the output filenames.
"""
for group_idx, group in enumerate(self.file_index, start=0):
group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
os.makedirs(group_folder, exist_ok=True)
for test_idx, df in enumerate(group, start=1):
out1 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv")
cols_to_export = [0, 1] if self.include_time else [1]
df.iloc[:, cols_to_export].to_csv(out1, index=False)
out2 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv")
cols_to_export = [0, 2] if self.include_time else [2]
df.iloc[:, cols_to_export].to_csv(out2, index=False)
# def create_damage_files(base_path, output_base, prefix):
# # Initialize colorama
# init(autoreset=True)
# # Generate column labels based on expected duplication in input files
# columns = ["Real"] + [
# f"Real.{i}" for i in range(1, 30)
# ] # Explicitly setting column names
# sensor_end_map = {
# 1: "Real.25",
# 2: "Real.26",
# 3: "Real.27",
# 4: "Real.28",
# 5: "Real.29",
# }
# # Define the damage scenarios and the corresponding original file indices
# damage_scenarios = {
# 1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv
# 2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv
# 3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
# 4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
# 5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv
# 6: range(26, 31), # Damage 6 files from zzzAD26.csv to zzzAD30.csv
# }
# damage_pad = len(str(len(damage_scenarios)))
# test_pad = len(str(30))
# for damage, files in damage_scenarios.items():
# for i, file_index in enumerate(files, start=1):
# # Load original data file
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv(
# file_path, sep="\t", skiprows=10
# ) # Read with explicit column names
# top_sensor = columns[i - 1]
# print(top_sensor, type(top_sensor))
# output_file_1 = os.path.join(
# output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
# )
# print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
# print("Taking datetime column on index 0...")
# print(f"Taking `{top_sensor}`...")
# os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
# df[["Time", top_sensor]].to_csv(output_file_1, index=False)
# print(Fore.GREEN + "Done")
# bottom_sensor = sensor_end_map[i]
# output_file_2 = os.path.join(
# output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
# )
# print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
# print("Taking datetime column on index 0...")
# print(f"Taking `{bottom_sensor}`...")
# os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
# df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
# print(Fore.GREEN + "Done")
# print("---")
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>")
sys.exit(1)
base_path = sys.argv[1]
output_base = sys.argv[2]
prefix = sys.argv[3] # Define output directory
# Create output folders if they don't exist
# for i in range(1, 7):
# os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
create_damage_files(base_path, output_base, prefix)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__":
main()

View File

@@ -1,16 +0,0 @@
from src.ml.model_selection import inference_model
from joblib import load
x = 30
file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT"
sensor = 1
model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib",
"SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib",
"XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"}
index = ((x-1) % 5) + 1
inference_model(model["SVM"], file, column_question=index)
print("---")
inference_model(model["SVM with PCA"], file, column_question=index)
print("---")
inference_model(model["XGBoost"], file, column_question=index)

View File

@@ -1,14 +1,13 @@
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from joblib import load
from sklearn.model_selection import train_test_split as sklearn_split
def create_ready_data(
stft_data_path: str,
stratify: np.ndarray = None,
) -> tuple[pd.DataFrame, np.ndarray]:
) -> tuple:
"""
Create a stratified train-test split from STFT data.
@@ -22,13 +21,13 @@ def create_ready_data(
Returns:
--------
tuple
(pd.DataFrame, np.ndarray) - Combined data and corresponding labels
(X_train, X_test, y_train, y_test) - Split datasets
"""
ready_data = []
for file in os.listdir(stft_data_path):
ready_data.append(pd.read_csv(os.path.join(stft_data_path, file), skiprows=1))
ready_data.append(pd.read_csv(os.path.join(stft_data_path, file)))
y_data = [i for i in range(len(ready_data))] # TODO: Should be replaced with actual desired labels
y_data = [i for i in range(len(ready_data))]
# Combine all dataframes in ready_data into a single dataframe
if ready_data: # Check if the list is not empty
@@ -56,207 +55,3 @@ def create_ready_data(
y = np.array([])
return X, y
def train_and_evaluate_model(
model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None
):
"""
Train a machine learning model, evaluate its performance, and optionally export it.
This function trains the provided model on the training data, evaluates its
performance on test data using accuracy score, and can save the trained model
to disk if an export path is provided.
Parameters
----------
model : estimator object
The machine learning model to train.
model_name : str
Name of the model, used for the export filename and in the returned results.
sensor_label : str
Label identifying which sensor's data the model is being trained on.
x_train : array-like or pandas.DataFrame
The training input samples.
y_train : array-like
The target values for training.
x_test : array-like or pandas.DataFrame
The test input samples.
y_test : array-like
The target values for testing.
export : str, optional
Directory path where the trained model should be saved. If None, model won't be saved.
Returns
-------
dict
Dictionary containing:
- 'model': model_name (str)
- 'sensor': sensor_label (str)
- 'accuracy': accuracy percentage (float)
Example
-------
>>> from sklearn.svm import SVC
>>> from sklearn.model_selection import train_test_split
>>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
>>> result = train_and_evaluate_model(
... SVC(),
... "SVM",
... "sensor1",
... X_train,
... y_train,
... X_test,
... y_test,
... export="models/sensor1"
... )
>>> print(f"Model accuracy: {result['accuracy']:.2f}%")
"""
from sklearn.metrics import accuracy_score
result = {"model": model_name, "sensor": sensor_label, "success": False}
try:
# Train the model
model.fit(x_train, y_train)
try:
y_pred = model.predict(x_test)
result["y_pred"] = y_pred # Convert to numpy array
except Exception as e:
result["error"] = f"Prediction error: {str(e)}"
return result
# Calculate accuracy
try:
accuracy = accuracy_score(y_test, y_pred) * 100
result["accuracy"] = accuracy
except Exception as e:
result["error"] = f"Accuracy calculation error: {str(e)}"
return result
# Export model if requested
if export:
try:
import joblib
full_path = os.path.join(export, f"{model_name}.joblib")
os.makedirs(os.path.dirname(full_path), exist_ok=True)
joblib.dump(model, full_path)
print(f"Model saved to {full_path}")
except Exception as e:
print(f"Warning: Failed to export model to {export}: {str(e)}")
result["export_error"] = str(e)
# Continue despite export error
result["success"] = True
return result
except Exception as e:
result["error"] = f"Training error: {str(e)}"
return result
def plot_confusion_matrix(results_sensor, y_test, title):
"""
Plot confusion matrices for each model in results_sensor1.
Parameters:
-----------
results_sensor1 : list
List of dictionaries containing model results.
x_test1 : array-like
Test input samples.
y_test : array-like
True labels for the test samples.
Returns:
--------
None
This function will display confusion matrices for each model in results_sensor1.
Example
-------
>>> results_sensor1 = [
... {'model': 'model1', 'accuracy': 95.0},
... {'model': 'model2', 'accuracy': 90.0}
... ]
>>> x_test1 = np.random.rand(100, 10) # Example test data
>>> y_test = np.random.randint(0, 2, size=100) # Example true labels
>>> plot_confusion_matrix(results_sensor1, x_test1, y_test)
"""
# Iterate through each model result and plot confusion matrix
for i in results_sensor:
model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib")
cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray
# get the class labels
labels = model.classes_
# Plot
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues) # You can change colormap
plt.title(f"{title}")
def calculate_label_percentages(labels):
"""
Calculate and print the percentage distribution of unique labels in a numpy array.
Parameters:
labels (np.array): Input array of labels.
Returns:
None
"""
# Count occurrences of each unique label
unique, counts = np.unique(labels, return_counts=True)
# Calculate percentages
percentages = (counts / len(labels)) * 100
# Build and print the result string
result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)])
return print(result)
def inference_model(
models, raw_file, column_question: int = None
):
"""
Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid.
Parameters
----------
model : dict with some exported model path
The trained machine learning model to use for inference.
x_test : array-like or pandas.DataFrame
The input samples for which predictions are to be made.
export : str, optional
Directory path where the predictions should be saved. If None, predictions won't be saved.
Returns
-------
np.ndarray
Array of predicted values.
Example
-------
>>> from sklearn.svm import SVC
>>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"}
>>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1)
"""
df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
col_idx = []
for i in range(1,6):
idx = [i, i+5, i+10, i+15, i+20, i+25]
col_idx.append(idx)
vibration_data = df.iloc[:, column_question].values
# Perform STFT
from scipy.signal import stft, hann
freq, times, Zxx = stft(
vibration_data,
fs=1024,
window=hann(1024),
nperseg=1024,
noverlap=1024-512
)
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
model = load(models) # Load the model from the provided path
return calculate_label_percentages(model.predict(data.iloc[:21,:]))

View File

@@ -25,10 +25,13 @@ window = hann(window_size)
Fs = 1024
# Number of damage cases (adjust as needed)
num_damage_cases = 0 # Change to 30 if you have 30 damage cases
num_damage_cases = 6 # Change to 30 if you have 30 damage cases
# Number of test runs per damage case
num_test_runs = 5
# Function to perform STFT and return magnitude
def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size):
def compute_stft(vibration_data):
frequencies, times, Zxx = stft(
vibration_data,
fs=Fs,
@@ -39,13 +42,9 @@ def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_si
stft_magnitude = np.abs(Zxx)
return stft_magnitude.T # Transpose to have frequencies as columns
def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs):
def process_damage_case(damage_num):
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
if damage_num == 0:
# Number of test runs per damage case
num_test_runs = 120
else:
num_test_runs = 5
# Check if the damage folder exists
if not os.path.isdir(damage_folder):
print(f"Folder {damage_folder} does not exist. Skipping...")
@@ -80,29 +79,20 @@ def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
continue
# Extract vibration data (assuming the second column is sensor data)
vibration_data = df.iloc[:, 1].values
# Perform STFT
stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size)
stft_magnitude = compute_stft(vibration_data)
# Convert STFT result to DataFrame
df_stft = pd.DataFrame(
stft_magnitude,
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
)
# only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
if damage_num == 0:
print(f"Processing damage_num = 0, test_num = {test_num}")
if test_num <= 45:
df_stft = df_stft.iloc[:22, :]
print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")
else:
df_stft = df_stft.iloc[:21, :]
print(f"Reduced df_stft shape (22 samples): {df_stft.shape}")
# Append to the aggregated list
aggregated_stft.append(df_stft)
print(sum(df.shape[0] for df in aggregated_stft))
# Concatenate all STFT DataFrames vertically
if aggregated_stft:
@@ -115,13 +105,11 @@ def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop
)
# Save the aggregated STFT to CSV
with open(output_file, 'w') as file:
file.write('sep=,\n')
df_aggregated.to_csv(output_file, index=False)
df_aggregated.to_csv(output_file, index=False)
print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
else:
print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")
if __name__ == "__main__": # Added main guard for multiprocessing
with multiprocessing.Pool() as pool:
pool.map(process_damage_case, range(0, num_damage_cases + 1))
pool.map(process_damage_case, range(1, num_damage_cases + 1))

360
data/QUGS/convert.py Normal file
View File

@@ -0,0 +1,360 @@
import pandas as pd
import os
import re
import sys
import numpy as np
from colorama import Fore, Style, init
from typing import TypedDict, Dict, List
from joblib import load
from pprint import pprint
# class DamageFilesIndices(TypedDict):
# damage_index: int
# files: list[int]
OriginalSingleDamageScenarioFilePath = str
DamageScenarioGroupIndex = int
OriginalSingleDamageScenario = pd.DataFrame
SensorIndex = int
VectorColumnIndex = List[SensorIndex]
VectorColumnIndices = List[VectorColumnIndex]
DamageScenarioGroup = List[OriginalSingleDamageScenario]
GroupDataset = List[DamageScenarioGroup]
class DamageFilesIndices(TypedDict):
damage_index: int
files: List[str]
def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
prefix: str = kwargs.get("prefix", "zzzAD")
extension: str = kwargs.get("extension", ".TXT")
num_damage: int = kwargs.get("num_damage")
file_index_start: int = kwargs.get("file_index_start")
col: int = kwargs.get("col")
base_path: str = kwargs.get("base_path")
damage_scenarios = {}
a = file_index_start
b = col + 1
for i in range(1, num_damage + 1):
damage_scenarios[i] = range(a, b)
a += col
b += col
# return damage_scenarios
x = {}
for damage, files in damage_scenarios.items():
x[damage] = [] # Initialize each key with an empty list
for i, file_index in enumerate(files, start=1):
if base_path:
x[damage].append(
os.path.normpath(
os.path.join(base_path, f"{prefix}{file_index}{extension}")
)
)
# if not os.path.exists(file_path):
# print(Fore.RED + f"File {file_path} does not exist.")
# continue
else:
x[damage].append(f"{prefix}{file_index}{extension}")
return x
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv( file_path, sep="\t", skiprows=10) # Read with explicit column names
class DataProcessor:
def __init__(self, file_index: DamageFilesIndices, cache_path: str = None):
self.file_index = file_index
if cache_path:
self.data = load(cache_path)
else:
self.data = self._load_all_data()
def _extract_column_names(self, file_path: str) -> List[str]:
"""
Extracts column names from the header of the given file.
Assumes the 6th line contains column names.
:param file_path: Path to the data file.
:return: List of column names.
"""
with open(file_path, "r") as f:
header_lines = [next(f) for _ in range(12)]
# Extract column names from the 6th line
channel_line = header_lines[10].strip()
tokens = re.findall(r'"([^"]+)"', channel_line)
if not channel_line.startswith('"'):
first_token = channel_line.split()[0]
tokens = [first_token] + tokens
return tokens # Prepend 'Time' column if applicable
def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
"""
Loads a single data file into a pandas DataFrame.
:param file_path: Path to the data file.
:return: DataFrame containing the numerical data.
"""
col_names = self._extract_column_names(file_path)
df = pd.read_csv(
file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
)
df.columns = col_names
return df
def _load_all_data(self) -> GroupDataset:
"""
Loads all data files based on the grouping dictionary and returns a nested list.
:return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
"""
data = []
# Find the maximum group index to determine the list size
max_group_idx = max(self.file_index.keys()) if self.file_index else 0
# Initialize empty lists
for _ in range(max_group_idx):
data.append([])
# Fill the list with data
for group_idx, file_list in self.file_index.items():
# Adjust index to be 0-based
list_idx = group_idx - 1
data[list_idx] = [self._load_dataframe(file) for file in file_list]
return data
def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
"""
Returns the list of DataFrames for the given group index.
:param group_idx: Index of the group.
:return: List of DataFrames.
"""
return self.data.get([group_idx, []])
def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
"""
Returns the column names for the given group and file indices.
:param group_idx: Index of the group.
:param file_idx: Index of the file in the group.
:return: List of column names.
"""
if group_idx in self.data and len(self.data[group_idx]) > file_idx:
return self.data[group_idx][file_idx].columns.tolist()
return []
def get_data_info(self):
"""
Print information about the loaded data structure.
Adapted for when self.data is a List instead of a Dictionary.
"""
if isinstance(self.data, list):
# For each sublist in self.data, get the type names of all elements
pprint(
[
(
[type(item).__name__ for item in sublist]
if isinstance(sublist, list)
else type(sublist).__name__
)
for sublist in self.data
]
)
else:
pprint(
{
key: [type(df).__name__ for df in value]
for key, value in self.data.items()
}
if isinstance(self.data, dict)
else type(self.data).__name__
)
def _create_vector_column_index(self) -> VectorColumnIndices:
vector_col_idx: VectorColumnIndices = []
y = 0
for data_group in self.data: # len(data_group[i]) = 5
for j in data_group: # len(j[i]) =
c: VectorColumnIndex = [] # column vector c_{j}
x = 0
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
c.append(x + y)
x += 5
vector_col_idx.append(c)
y += 1
return vector_col_idx
def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)):
for j in range(len(self.data[i])):
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specified 'Real' columns
# First, we add 1 to all indices to account for 'Time' being at position 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjusted Real indices
all_indices = [0] + real_indices
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def create_limited_sensor_vector_column(self, overwrite=True):
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)): # damage(s)
for j in range(len(self.data[i])): # col(s)
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specifid 'Real' colmns
# First, we add 1 to all indices to acount for 'Time' being at positiion 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjustedd Real indices
all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
"""
Export the processed data to CSV files in the required folder structure.
:param output_dir: Directory to save the CSV files.
:param file_prefix: Prefix for the output filenames.
"""
for group_idx, group in enumerate(self.data, start=1):
group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
os.makedirs(group_folder, exist_ok=True)
for test_idx, df in enumerate(group, start=1):
# Ensure columns are named uniquely if duplicated
df = df.copy()
df.columns = ["Time", "Real_0", "Real_1"] # Rename
# Export first Real column
out1 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
)
df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
out1, index=False
)
# Export last Real column
out2 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
)
df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
out2, index=False
)
def create_damage_files(base_path, output_base, prefix):
# Initialize colorama
init(autoreset=True)
# Generate column labels based on expected duplication in input files
columns = ["Real"] + [
f"Real.{i}" for i in range(1, 30)
] # Explicitly setting column names
sensor_end_map = {
1: "Real.25",
2: "Real.26",
3: "Real.27",
4: "Real.28",
5: "Real.29",
}
# Define the damage scenarios and the corresponding original file indices
damage_scenarios = {
1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv
2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv
3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv
6: range(26, 31), # Damage 6 files from zzzAD26.csv to zzzAD30.csv
}
damage_pad = len(str(len(damage_scenarios)))
test_pad = len(str(30))
for damage, files in damage_scenarios.items():
for i, file_index in enumerate(files, start=1):
# Load original data file
file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
df = pd.read_csv(
file_path, sep="\t", skiprows=10
) # Read with explicit column names
top_sensor = columns[i - 1]
print(top_sensor, type(top_sensor))
output_file_1 = os.path.join(
output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
)
print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{top_sensor}`...")
os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
df[["Time", top_sensor]].to_csv(output_file_1, index=False)
print(Fore.GREEN + "Done")
bottom_sensor = sensor_end_map[i]
output_file_2 = os.path.join(
output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
)
print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{bottom_sensor}`...")
os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
print(Fore.GREEN + "Done")
print("---")
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>")
sys.exit(1)
base_path = sys.argv[1]
output_base = sys.argv[2]
prefix = sys.argv[3] # Define output directory
# Create output folders if they don't exist
# for i in range(1, 7):
# os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
create_damage_files(base_path, output_base, prefix)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__":
main()

View File

@@ -1,52 +1,25 @@
from data_preprocessing import *
from convert import *
from joblib import dump, load
# b = generate_damage_files_index(
# num_damage=6,
# file_index_start=1,
# col=5,
# base_path="D:/thesis/data/dataset_B",
# prefix="zzzBD",
# # undamage_file="zzzBU.TXT"
# )
# Example: Generate tuples with a special group of df0 at the beginning
special_groups_A = [
{'df_name': 'zzzAU.TXT', 'position': 0, 'size': 5} # Add at beginning
]
special_groups_B = [
{'df_name': 'zzzBU.TXT', 'position': 0, 'size': 5} # Add at beginning
]
# Generate the tuples with the special group
a_complement = [(comp)
for n in range(1, 31)
for comp in complement_pairs(n)]
a = generate_df_tuples(special_groups=a_complement, prefix="zzzAD")
# b_complement = [(comp)
# for n in range(1, 31)
# for comp in complement_pairs(n)]
# b = generate_df_tuples(special_groups=b_complement, prefix="zzzBD")
# a = generate_damage_files_index(
# num_damage=6,
# file_index_start=1,
# col=5,
# base_path="D:/thesis/data/dataset_A",
# prefix="zzzAD",
# # undamage_file="zzzBU.TXT"
# num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
# )
data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
# data_A.create_vector_column(overwrite=True)
# # data_A.create_limited_sensor_vector_column(overwrite=True)
data_A.export_to_csv("D:/thesis/data/converted/raw")
b = generate_damage_files_index(
num_damage=6,
file_index_start=1,
col=5,
base_path="D:/thesis/data/dataset_B",
prefix="zzzBD",
)
# data_A = DataProcessor(file_index=a)
# # data.create_vector_column(overwrite=True)
# data_A.create_limited_sensor_vector_column(overwrite=True)
# data_A.export_to_csv("D:/thesis/data/converted/raw")
# data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
# data_B.create_vector_column(overwrite=True)
# # data_B.create_limited_sensor_vector_column(overwrite=True)
# data_B.export_to_csv("D:/thesis/data/converted/raw_B")
data_B = DataProcessor(file_index=b)
# data.create_vector_column(overwrite=True)
data_B.create_limited_sensor_vector_column(overwrite=True)
data_B.export_to_csv("D:/thesis/data/converted/raw_B")
# a = load("D:/cache.joblib")
# breakpoint()
# breakpoint()

View File

@@ -962,4 +962,16 @@
@thesis{zotero-622,
type = {thesis}
}
}
@thesis{rytter1993,
title = {Vibrational {{Based Inspection}} of {{Civil Engineering Structures}}},
author = {Rytter, Anders},
date = {1993},
institution = {Aalborg University},
location = {Aalborg},
url = {https://vbn.aau.dk/en/publications/vibrational-based-inspection-of-civil-engineering-structures},
abstract = {The thesis has been written in relation to two different research projects. Firstly, an offshore test programme, Integrated Experimental/Numerical Analysis of the Dynamic behavior of offshore structures, which was performed at the department of Building Technology and Structural Engineering at the University of Aalborg from 1988 to 1991. Secondly, a research project, In-Field Vibration Based Inspection of Civil Engineering Structures, which has been performed as a pilot project by the Consulting Engineers Rambøll, Hannemann and Højlund in cooperation with the department of Building Technology and Structural Engineering at the University of Aalborg since the beginning of 1992. Both projects have been supported by the Danish Technical Research Council. Further, the first mentioned project was supported by the Danish Energy Agency. Their financial support is gratefully acknowledged.},
langid = {english},
keywords = {Beam,Bridges,Cracks,Damping,Offshore Platform,Piles,Structural Damage,VBI,Vibration Based Inspection}
}

View File

@@ -1,25 +1,68 @@
\chapter{PENDAHULUAN}
\section{Latar Belakang}
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc consequat lectus dolor, a commodo odio suscipit nec. Aliquam posuere elit eget tellus dapibus, auctor ornare mi porttitor. Donec auctor aliquet nisl, quis convallis ligula rutrum id. Duis tortor ipsum, scelerisque vestibulum viverra eu, maximus vel mi. Nullam volutpat nunc et varius tempor. Vivamus convallis mi eros, aliquam semper dui tincidunt a. Morbi nunc dui, accumsan ac arcu nec, condimentum efficitur mauris. Etiam sed mauris semper, volutpat justo eu, placerat mauris. Suspendisse at erat eu arcu gravida mattis et id nunc. Aliquam malesuada magna odio, ac dictum erat vestibulum a. Mauris vel nisi sit amet elit tempor bibendum sit amet a velit. Morbi dignissim facilisis placerat.\par
\begin{figure}
\centering
\includegraphics[width=0.5\linewidth]{frontmatter/img/slice1.jpg}
\caption{Enter Caption}
\label{fig:enter-label}
\end{figure}
\indent Monitor Kesehatan Struktur (\textit{Structural Health Monitoring} atau SHM) merupakan . Salah satu komponen struktural yang umum digunakan dalam penyambungan adalah sambungan baut (\textit{bolt joint}), yang dikenal karena kemudahan dalam perakitan dan penggunaan ulang. Namun demikian, sambungan berulir ini rentan mengalami kelonggaran akibat beban kejut atau getaran terus-menerus \parencite{chen2017}. Kelonggaran baut yang tidak terdeteksi sejak dini dapat menyebabkan kerusakan serius pada struktur, sehingga identifikasi dini terhadap kerusakan sambungan baut menjadi krusial dalam bidang teknik sipil, mesin, dan kedirgantaraan.
Pellentesque vel accumsan lorem, id vulputate metus. Nulla mollis orci ante, et euismod erat venenatis eget. Proin tempus lobortis feugiat. Fusce vitae sem quis lacus iaculis dignissim ut eget turpis. Vivamus ut nisl in enim porttitor fringilla vel et mauris. Mauris quis porttitor magna. Pellentesque molestie viverra arcu at tincidunt. Maecenas non elit arcu.\par
\indent Deteksi kelonggaran baut telah dilakukan melalui berbagai metode. Kelompok pertama adalah inspeksi \textit{in-situ}, seperti inspeksi visual atau penggunaan alat mekanis seperti kunci torsi dan palu. Meskipun sederhana dan murah, metode ini sulit untuk mendeteksi kerusakan pada tahap awal \parencite{j.h.park2015}. Metode palu lebih efektif dibanding visual untuk mendeteksi awal kelonggaran, tetapi akurasinya dapat terganggu oleh kebisingan lingkungan, serta memakan waktu bila diaplikasikan pada struktur dengan banyak sambungan seperti jembatan \parencite{j.h.park2015,wang2013}.
Etiam feugiat enim sit amet tortor interdum lobortis. Curabitur elementum faucibus sapien. Morbi eget facilisis lorem. In sed suscipit metus. Etiam porttitor, libero sit amet sodales hendrerit, libero dolor hendrerit nulla, sed convallis risus leo posuere metus. Cras gravida ac elit viverra ultrices. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Maecenas dictum urna elit, nec eleifend nulla mattis sit amet. Pellentesque suscipit metus vitae leo suscipit, a vehicula quam pretium. Sed eu est ut risus convallis hendrerit a vulputate justo. Nulla sollicitudin quam ut risus euismod, quis consequat dui mattis. Mauris id eros varius, pellentesque quam quis, venenatis tellus. Nulla vitae condimentum nisl. Vestibulum suscipit scelerisque dui, non posuere purus finibus nec. Nulla ultrices felis quis vestibulum porta. Suspendisse potenti.\par
\indent Kelompok kedua menggunakan teknik berbasis penglihatan komputer seperti kamera dan pencitraan digital, termasuk deteksi rotasi kepala baut menggunakan CNN dan Faster R-CNN \parencite{zhang2020,zhao2019}. Meskipun teknik ini dapat mendeteksi kerusakan secara visual tanpa dipengaruhi oleh kebisingan akustik, tantangan tetap ada dalam hal penempatan kamera dan beban komputasi tinggi dari model deep learning, terutama dalam kondisi sempit seperti mesin kendaraan atau turbin.
Nam tempus tincidunt interdum. Pellentesque at ligula ac massa semper efficitur vitae non ante. Suspendisse potenti. Cras vitae interdum erat, nec facilisis urna. Nulla commodo porttitor tellus non posuere. Vestibulum tristique ut urna quis porttitor. Sed pellentesque lectus sit amet ultrices aliquam. Aliquam erat volutpat. Nam dictum eu erat a mollis. Donec eget nulla vel risus aliquet suscipit sed at libero.\par
\indent Kelompok ketiga dan yang menjadi fokus penelitian ini adalah teknik berbasis sensor, terutama pendekatan berbasis getaran (\textit{vibration-based}). Metode ini tidak hanya efektif dalam mengatasi keterbatasan teknik sebelumnya, tetapi juga mampu mendeteksi kelonggaran baut pada tahap awal secara andal dan akurat \parencite{nichols2004,razi2013}. Dalam penelitian ini, deteksi dilakukan melalui data akselerasi struktur yang diambil dari titik-titik sambungan dalam \textit{sistem grid} yang mewakili koneksi baut secara arah kolom.
\indent Pada penelitian sebelumnya oleh \textcite{abdeljaber2017}, deteksi kerusakan struktur menggunakan 1-D Convolutional Neural Network (1-D CNN) telah diterapkan secara efektif pada struktur grid dengan 30 titik sensor. Namun, keterbatasan tetap muncul dalam hal kebutuhan sumber daya komputasi yang tinggi ketika memproses data mentah berdimensi besar dari semua sensor secara simultan \parencite{yang2020, liu2022}. Beberapa studi menyarankan bahwa transformasi sinyal seperti STFT dapat digunakan sebagai alternatif ekstraksi fitur sebelum dilakukan klasifikasi \parencite{shahid2022}. Pendekatan ini tidak hanya mengurangi kompleksitas perhitungan tetapi juga dapat mempertahankan karakteristik penting dari sinyal yang tereduksi.
\indent Oleh karena itu, penelitian ini mengadopsi pendekatan pengurangan jumlah sensor menjadi hanya dua per jalur kolom (atas dan bawah), merepresentasikan sambungan vertikal seperti susunan baut, dengan tujuan menyederhanakan model tanpa kehilangan akurasi deteksi kerusakan. Data diproses melalui transformasi STFT sebelum diklasifikasikan menggunakan model algoritma pembelajaran mesin klasik. Dengan mengevaluasi berbagai pengklasifikasi dan validasi silang antar kolom, studi ini berkontribusi dalam menciptakan sistem SHM yang efisien, rendah biaya, dan mudah diimplementasikan.
\section{Rumusan Masalah}
Untuk memandu arah penelitian ini, beberapa permasalahan utama yang akan dibahas adalah sebagai berikut:
Maecenas hendrerit pharetra bibendum. Donec ut tortor ac augue aliquam ullamcorper nec id eros. Quisque consectetur elementum ipsum vitae posuere. Sed ultricies ipsum nibh, vitae volutpat neque bibendum at. Morbi dictum metus eu bibendum malesuada. Nam scelerisque purus erat, id dictum nisl pretium vitae. Curabitur finibus commodo dui ac molestie. In sed sem ac dui dapibus ullamcorper. Aenean molestie nulla eu lorem maximus hendrerit. Vivamus viverra velit dolor, in vehicula eros facilisis at. Vivamus in rhoncus sem.
\begin{enumerate}
\item Apakah sinyal getaran yang hanya diperoleh dari sensor pada bagian atas dan bawah suatu jalur kolom masih mampu merepresentasikan fitur-fitur penting yang diperlukan untuk mengklasifikasikan kerusakan struktur secara akurat?
\item Apakah penggabungan data dari beberapa jalur kolom dapat meningkatkan kemampuan generalisasi model, meskipun jumlah sensor pada tiap jalur dibatasi?
\item Apakah algoritma pemelajaran mesin klasik yang sederhana masih mampu menghasilkan model dengan kinerja yang cukup layak dibandingkan dengan model \textit{supervised} yang lebih kompleks ketika diterapkan pada skenario dengan input data sensor yang terbatas?
\end{enumerate}
% \section{Identifikasi Masalah}
% \begin{itemize}
% \item Kebanyakan kerangka kerja pada monitoring kesehatan struktur membutuhkan deretan sensor yang banyak, hal ini dibutuhkan biaya yang tinggi dan kurang praktikal untuk banyak pengaplikasian.
% \item Banyak model dengan performa tinggi bergantung pada teknik pemelajaran mendalam, sehingga dibutuhkan sumberdaya komputasi yang tinggi dan memungkinkan kurangnya kemudahan dan keterjangkauan untuk aplikasikan.
% \item Kurangnya kesederhanaan, pendeketan umum yang menyeimbangkan penggunaan sensor dengan keandalan dalam lokalisasi kerusakan.
% \end{itemize}
\section{Lingkup Penelitian}
Studi ini berfokus pada dataset yang tersedia secara publik didapat dari Queen's University Grandstand Simulator (QUGS), sebuah kerangka besi level laboratorium yang dipasang dengan tiga puluh titik sensor akselerometer dan \textit{white shaker noise}. Riset terdahulu telah dilakukan pengaplikasian pemelajaran mesin jaringan saraf terhadap seluruh sensor yang terpasang penuh pada setiap titik \textit{joint} untuk mencapai akurasi yang tinggi. Akan tetapi, pada praktiknya, instrumentasi penuh seperti ini terkadang kurang efektif dari segi biaya dan kurang layak dalam skala besar.
\section{Tujuan Penelitian}
\begin{enumerate}
\item Mengembangkan alur sistem (\textit{pipeline}) pemantauan kesehatan struktur (Structural Health Monitoring/SHM) yang disederhanakan dengan hanya menggunakan sepasang sensor di ujung-ujung struktur.
% \item Memperlakukan setiap grup kolom sensor sebagai elemen balok satu dimensi yang disederhanakan, dan mengevaluasi apakah karakteristik kerusakan tetap terjaga dalam energi getaran yang ditransmisikan antara kedua ujungnya.
% \item Menyusun setiap grup kolom sebagai satu dataset terpisah dan melakukan lima pengujian berbeda, di mana masing-masing grup kolom berperan sebagai data validasi secara bergantian.
% \item Menyertakan data dari setiap grup kolom ke dalam data pelatihan untuk membentuk satu model umum yang dapat digunakan untuk seluruh grup kolom.
\item Mengeksplorasi kemungkinan generalisasi satu model terhadap berbagai jalur kolom hanya dengan memanfaatkan data dari sensor pada kedua ujung kolom.
\end{enumerate}
% Dalam merespon hal tersebut, penelitian ini memperkenalkan pendekatan baru yang menekankan efisiensi pada penanganan data dan interpretasi fisik. Data pada sensor-sensor yang terpasang pada struktur grid ini dikelompokkan menjadi beberapa grup kolom, dan hanya menyisakan sensor awal dan sensor paling akhir dari setiap grup sensor sebagai input pengklasifikasian. Terdapat hipotesis bahwa energi getaran bergerak di sepanjang jalur kolom terjaga secara cukup baik antara ujung-ujung sensor untuk memungkinkan algoritma pemelajaran mesin, seperti Support-Vector Machine (SVM), Bagged Trees, Random Forest, Decision Tree, KNN, LDA, dan XGBoost, medeteksi dan mengklasifikasi secara akurat letak kerusakan.
\section{Manfaat Penelitian}
% \subsubsection{Dolor}
Penelitian ini memberikan beberapa manfaat yang diharapkan dapat berkontribusi dalam pengembangan sistem deteksi kerusakan struktur, antara lain:
\begin{enumerate}
\item Penelitian ini tidak berfokus pada pengembangan arsitektur model baru maupun penerapan \textit{transfer learning}, melainkan pada perancangan alur (\textit{pipeline}) klasifikasi yang sederhana dan mudah dipahami sebagai solusi tahap awal untuk pengembangan sistem monitor kesehatan struktur.
\item Dengan pemilihan titik sensor strategis yang terbatas (hanya di ujung atas dan bawah jalur kolom \textit{grid}) serta prapemrosesan berbasis transformasi STFT, penelitian ini menunjukkan bahwa efisiensi dapat dicapai tanpa mengorbankan akurasi secara signifikan.
\item Studi ini membuktikan bahwa algoritma pembelajaran mesin klasik seperti \gls{svm} \gls{svm}, KNN, dan LDA masih mampu memberikan performa model yang kompetitif dalam klasifikasi kerusakan, apabila dipadukan dengan ekstraksi fitur yang tepat.
\item Hasil penelitian ini diharapkan dapat menjadi alternatif sistem SHM yang lebih terjangkau dan praktis untuk diterapkan pada struktur nyata, khususnya dalam kondisi keterbatasan sumber daya.
\item Rangkaian eksperimen dan pendekatan sistematis dalam penelitian ini dapat dijadikan tolok ukur atau \textit{baseline} untuk studi komparatif selanjutnya dan pengembangan model arsitektur yang lebih kompleks.
\end{enumerate}

Binary file not shown.

After

Width:  |  Height:  |  Size: 325 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 793 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 188 KiB

View File

@@ -0,0 +1,15 @@
% Define an abbreviation (acronym)
% Acronyms for the thesis
\newacronym{ml}{ML}{machine learning}
\newacronym{stft}{STFT}{short-time fourier transform}
\newacronym{ai}{AI}{artificial intelligence}
\newacronym{dl}{DL}{deep learning}
\newacronym{nn}{NN}{neural network}
\newacronym{fft}{FFT}{fast fourier transform}
\newacronym{svm}{SVM}{support vector machine}
\newacronym{cnn}{CNN}{convolutional neural network}
\newacronym{rnn}{RNN}{recurrent neural network}
\newacronym{vbi}{VBI}{vibration-based inspection}
\newacronym{shm}{SHM}{structural health monitoring}
\newacronym{fea}{FEA}{finite element analysis}
\newacronym{1d-cnn}{1-D CNN}{\textit{One-Dimensional Convolutional Neural Network}}

View File

@@ -1,46 +1,3 @@
% % A new command that enables us to enter bi-lingual (Slovene and English) terms
% % syntax: \addterm[options]{label}{Slovene}{Slovene first use}{English}{Slovene
% % description}
% \newcommand{\addterm}[6][]{
% \newglossaryentry{#2}{
% name={#3 (angl.\ #5)},
% first={#4 (\emph{#5})},
% text={#3},
% sort={#3},
% description={#6},
% #1 % pass additional options to \newglossaryentry
% }
% }
% % A new command that enables us to enter (English) acronyms with bi-lingual
% % (Slovene and English) long versions
% % syntax: \addacronym[options]{label}{abbreviation}{Slovene long}{Slovene first
% % use long}{English long}{Slovene description}
% \newcommand{\addacronym}[7][]{
% % Create the main glossary entry with \newacronym
% % \newacronym[key-val list]{label}{abbrv}{long}
% \newacronym[
% name={#4 (angl.\ #6,\ #3)},
% first={\emph{#5} (angl.\ \emph{#6},\ \emph{#3})},
% sort={#4},
% description={#7},
% #1 % pass additional options to \newglossaryentry
% ]
% {#2}{#3}{#4}
% % Create a cross-reference from the abbreviation to the main glossary entry by
% % creating an auxiliary glossary entry (note: we set the label of this entry
% % to '<original label>_auxiliary' to avoid clashes)
% \newglossaryentry{#2_auxiliary}{
% name={#3},
% sort={#3},
% description={\makefirstuc{#6}},
% see=[See:]{#2}
% }
% }
% % Change the text of the cross-reference links to the Slovene long version.
% \renewcommand*{\glsseeitemformat}[1]{\emph{\acrlong{#1}}.}
% Define the Indonesian term and link it to the English term
\newglossaryentry{jaringansaraf}{
name=Jaringan Saraf,
@@ -52,27 +9,78 @@
% }
% Define the English term and link it to its acronym
\newglossaryentry{neuralnetwork}{
name=Neural Network,
description={A computational model inspired by the human brain, see \gls{nn}}
}
% \newglossaryentry{neuralnetwork}{
% name=Neural Network,
% description={A computational model inspired by the human brain, see \gls{nn}}
% }
% \newacronym
% [description={statistical pattern recognition technique}]
% {svm}{SVM}{support vector machine}
% \newglossaryentry{machinelearning}{
% name=Machine Learning,
% description={A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}}
% \newglossaryentry{pemelajaranmesin}{
% name={pemelajaran mesin (angl.\ #5)},
% first={pemelajaran mesin (\emph{machine learning})},
% text={pemelajaran mesin},
% sort={ },
% description={#6},
% #1 % pass additional options to \newglossaryentry
% }
\longnewglossaryentry{machinelearning}{name={machine learning}}
{A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
\newterm[see={machinelearning}]{pemelajaranmesin}
% \longnewglossaryentry{machinelearning}{name={machine learning}}
% {A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
% \newterm[see={machinelearning}]{pemelajaranmesin}
% \newglossaryentry{pemelajaran mesin}{}
% \addterm{machinelearning}{pemelajaran mesin}{pemelajaran mesin}{machine learning}{A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
\newacronym
[description={statistical pattern recognition technique}]
{svm}{SVM}{support vector machine}
\newglossaryentry{algoritma-genetika}{
name={Algoritma Genetika},
description={Kelas algoritma optimasi dan pencarian yang terinspirasi oleh proses evolusi biologis, seperti seleksi alam, mutasi, dan rekombinasi. Algoritma ini sering digunakan untuk menemukan solusi perkiraan untuk masalah yang kompleks dan sulit dipecahkan secara analitis.},
sort={Algoritma Genetika}
}
\newglossaryentry{deep-learning}{
name={\textit{deep learning}},
description={Bagian dari keluarga metode pembelajaran mesin yang lebih luas berdasarkan jaringan saraf tiruan dengan banyak lapisan (deep neural networks). Arsitektur ini memungkinkan model untuk belajar representasi data secara hierarkis, mulai dari fitur tingkat rendah hingga konsep abstrak tingkat tinggi.},
sort={Pembelajaran Mendalam}
}
\newglossaryentry{jaringan-saraf-tiruan}{
name={Jaringan Saraf Tiruan (Artificial Neural Network)},
description={Model komputasi yang terinspirasi oleh struktur dan fungsi jaringan saraf biologis di otak. JST terdiri dari unit pemrosesan yang saling terhubung (neuron) yang bekerja secara paralel untuk memproses informasi dan belajar dari data melalui penyesuaian bobot koneksi.},
sort={Jaringan Saraf Tiruan}
}
\newglossaryentry{pemrosesan-bahasa-alami}{
name={Pemrosesan Bahasa Alami (Natural Language Processing)},
description={Cabang ilmu komputer dan kecerdasan buatan yang berfokus pada interaksi antara komputer dan bahasa manusia. Tujuannya adalah untuk memungkinkan komputer memproses, memahami, menafsirkan, dan menghasilkan bahasa manusia dengan cara yang bermakna dan berguna.},
sort={Pemrosesan Bahasa Alami}
}
\newglossaryentry{pembelajaran-penguatan}{
name={Pembelajaran Penguatan (Reinforcement Learning)},
description={Area pembelajaran mesin yang berkaitan dengan bagaimana agen perangkat lunak harus mengambil tindakan dalam suatu lingkungan untuk memaksimalkan beberapa gagasan tentang imbalan kumulatif. Agen belajar melalui trial-and-error, menerima umpan balik berupa imbalan atau hukuman.},
sort={Pembelajaran Penguatan}
}
\newglossaryentry{visi-komputer}{
name={Visi Komputer (Computer Vision)},
description={Bidang interdisipliner yang membahas bagaimana komputer dapat dibuat untuk mendapatkan pemahaman tingkat tinggi dari gambar atau video digital. Dari perspektif rekayasa, ia berupaya mengotomatiskan tugas-tugas yang dapat dilakukan oleh sistem visual manusia.},
sort={Visi Komputer}
}
\newglossaryentry{model-generatif}{
name={Model Generatif},
description={Jenis model statistik dalam pembelajaran mesin yang bertujuan untuk mempelajari distribusi probabilitas dari data pelatihan. Setelah dilatih, model ini dapat menghasilkan sampel data baru yang mirip dengan data pelatihan, seperti membuat gambar, teks, atau suara baru.},
sort={Model Generatif}
}
\newglossaryentry{heuristik}{
name={Heuristik},
description={Teknik pemecahan masalah yang menggunakan pendekatan praktis atau jalan pintas yang tidak dijamin optimal atau sempurna, tetapi cukup untuk mencapai tujuan jangka pendek atau perkiraan solusi. Heuristik sering digunakan ketika pencarian solusi optimal terlalu mahal secara komputasi.},
sort={Heuristik}
}
\newglossaryentry{validasi-silang}{
name={Validasi Silang (Cross-Validation)},
description={Teknik statistik untuk mengevaluasi seberapa baik hasil analisis statistik (seperti model prediktif) akan generalisasi ke kumpulan data independen. Ini penting untuk menghindari overfitting dan mendapatkan estimasi kinerja model yang lebih andal pada data yang belum pernah dilihat.},
sort={Validasi Silang}
}
\newglossaryentry{bias-algoritmik}{
name={Bias Algoritmik},
description={Mengacu pada kesalahan sistematis atau hasil yang tidak adil yang dihasilkan oleh sistem kecerdasan buatan karena asumsi yang salah dalam proses pembelajaran mesin atau karena data pelatihan yang bias. Bias ini dapat mereplikasi atau bahkan memperkuat prasangka sosial yang ada.},
sort={Bias Algoritmik}
}

View File

@@ -0,0 +1,241 @@
% --- Glossary Definitions ---
% Note: Descriptions are based on the provided Indonesian text but translated to English
% for typical glossary conventions. You can adjust the language as needed.
\newglossaryentry{not:signal}{
name={\ensuremath{S}},
description={vektor sinyal akselerometer berdimensi 1$\times$262144},
sort={s},
type=notation,
}
\newglossaryentry{not:sampling_freq}{
name={\ensuremath{f_s}},
description={frekuensi dengan nilai \textit{sampling} ($s$) di mana sinyal kontinu didigitalkan},
sort={fs},
type=notation,
}
\newglossaryentry{not:time_length}{
name={\ensuremath{t}},
description={panjang waktu data dalam detik},
sort={t},
type=notation,
}
\newglossaryentry{not:dataset_A}{
name={\ensuremath{\mathcal{A}}},
description={matriks dataset A},
sort={adataset},
type=notation,
}
\newglossaryentry{not:dataset_B}{
name={\ensuremath{\mathcal{B}}},
description={matriks dataset B},
sort={bdataset},
type=notation,
}
\newglossaryentry{not:damage_file}{
name={\ensuremath{\mathbf{D}}},
description={matriks akselerometer untuk setiap berkas dengan bentuk $262144\times30$},
sort={filedamage},
type=notation,
}
\newglossaryentry{not:joint_index}{
name={\ensuremath{n}},
description={indeks atau nomor kerusakan \textit{joint}},
sort={indexjoint},
type=notation,
}
\newglossaryentry{not:damage_file_set_case}{
name={\ensuremath{\mathbf{d}}},
description={set matriks kerusakan},
sort={damagefilesetcase},
type=notation,
}
\newglossaryentry{not:k}{
name={$k$},
description={Index for measurement nodes, an integer ranging from 0 to 29.},
sort={k},
type=notation,
}
\newglossaryentry{not:Fk}{
name={$F_{k}$},
description={Filename string for the raw time-domain signal from node $k$. The specific format mentioned is \texttt{zzzAD}$k$\texttt{.TXT}.},
sort={Fk},
type=notation,
}
\newglossaryentry{not:nkFk}{
name={$n_{k}^{F_{k}}$},
description={Represents the measurement \textit{node} with index $k$. The raw time-domain signal data from this node, $x_k$, has a length of $L=262144$ samples.},
sort={nkFk},
type=notation,
}
\newglossaryentry{not:i}{
name={$i$},
description={Index for ``damage-case'' folders, an integer ranging from 0 to 5.},
sort={i},
type=notation,
}
\newglossaryentry{not:di}{
name={$d_{i}$},
description={Set representing the $i$-th damage scenario, containing data from five consecutive nodes: $\bigl\{\,n_{5i}^{F_{5i}},\;n_{5i+1}^{F_{5i+1}},\;\dots,\;n_{5i+4}^{F_{5i+4}}\bigr\}$. Cardinality: $|d_i|=5$ nodes.},
sort={di},
type=notation,
}
\newglossaryentry{not:diTD}{
name={$d_{i}^{\mathrm{TD}}$},
description={Time-domain subset of nodes from damage case $d_i$, containing only the first and last nodes: $\bigl\{\,n_{5i}^{F_{5i}},\;n_{5i+4}^{F_{5i+4}}\bigr\}$. Cardinality: $|d_{i}^{\mathrm{TD}}| = 2$ nodes.},
sort={diTD},
type=notation,
}
\newglossaryentry{not:calT}{
name={$\mathcal{T}$},
description={Short-Time Fourier Transform (STFT) operator. It maps a raw time-domain signal $n_k^{F_k}$ (or $x_k$) from $\mathbb{R}^{L}$ (with $L=262144$) to a magnitude spectrogram matrix $\widetilde{n}_k^{F_k}$ in $\mathbb{R}^{513 \times 513}$.},
sort={Tcal},
type=notation,
}
\newglossaryentry{not:L}{
name={$L$},
description={Length of the raw time-domain signal, $L=262144$ samples.},
sort={L},
type=notation,
}
\newglossaryentry{not:Nw}{
name={$N_{w}$},
description={Length of the Hanning window used in the STFT, $N_{w}=1024$ samples.},
sort={Nw},
type=notation,
}
\newglossaryentry{not:Nh}{
name={$N_{h}$},
description={Hop size (or step size) used in the STFT, $N_{h}=512$ samples.},
sort={Nh},
type=notation,
}
\newglossaryentry{not:wn}{
name={$w[n]$},
description={Value of the Hanning window function at sample index $n$. The window spans $N_w$ samples.},
sort={wn},
type=notation,
}
\newglossaryentry{not:n_summation}{
name={$n$},
description={Sample index within the Hanning window and for the STFT summation, an integer ranging from $0$ to $N_w-1$.},
sort={n_summation},
type=notation,
}
\newglossaryentry{not:xkm}{
name={$x_k[m]$}, % Or x_k if it's treated as the whole signal vector
description={Represents the raw time-domain signal for node $k$. As a discrete signal, it consists of $L=262144$ samples. $x_k[m]$ would be the $m$-th sample.},
sort={xkm},
type=notation,
}
\newglossaryentry{not:Skpt}{
name={$S_k(p,t)$},
description={Complex-valued result of the STFT for node $k$ at frequency bin $p$ and time frame $t$. This is a scalar value for each $(p,t)$ pair.},
sort={Skpt},
type=notation,
}
\newglossaryentry{not:p}{
name={$p$},
description={Frequency bin index in the STFT or spectrogram, an integer ranging from $0$ to $512$.},
sort={p},
type=notation,
}
\newglossaryentry{not:t_stft}{ % Differentiating t for STFT time frame and t for feature vector time slice if necessary
name={$t$},
description={Time frame index in the STFT or spectrogram, an integer ranging from $0$ to $512$. Also used as the time slice index for extracting feature vectors $\mathbf{x}_{i,s,r,t}$ from spectrograms.},
sort={t},
type=notation,
}
\newglossaryentry{not:ntildekFk}{ % New entry for the matrix
name={$\widetilde{n}_k^{F_k}$},
description={The magnitude spectrogram matrix for node $k$, obtained by applying the STFT operator $\mathcal{T}$ to the time-domain signal $n_k^{F_k}$. This matrix is an element of $\mathbb{R}^{513 \times 513}$.},
sort={ntildekFk},
type=notation,
}
\newglossaryentry{not:ntildekFkpt}{ % Modified entry for the element
name={$\widetilde{n}_k^{F_k}(p,t)$},
description={Scalar value representing the magnitude of the STFT for node $k$ at frequency bin $p$ and time frame $t$; specifically, $\widetilde{n}_k^{F_k}(p,t) = |S_k(p,t)|$. This is an element of the spectrogram matrix $\widetilde{n}_k^{F_k}$.},
sort={ntildekFkpt},
type=notation,
}
\newglossaryentry{not:R}{
name={$\mathbb{R}$},
description={The set of real numbers. Used to denote vector spaces like $\mathbb{R}^{N}$ (N-dimensional real vectors) or $\mathbb{R}^{M \times N}$ (M-by-N real matrices).},
sort={Rbb},
type=notation,
}
\newglossaryentry{not:diFD}{
name={$d_{i}^{\mathrm{FD}}$},
description={Frequency-domain subset for damage case $i$. It contains two spectrogram matrices: $\bigl\{\,\widetilde{n}_{5i}^{F_{5i}},\; \widetilde{n}_{5i+4}^{F_{5i+4}}\,\bigr\}$, where each spectrogram $\widetilde{n}$ is in $\mathbb{R}^{513 \times 513}$. Cardinality: $|d_{i}^{\mathrm{FD}}| = 2$ spectrograms.},
sort={diFD},
type=notation,
}
\newglossaryentry{not:r_repetition}{
name={$r$},
description={Repetition index within a single damage case, an integer ranging from $0$ to $4$.},
sort={r_repetition},
type=notation,
}
\newglossaryentry{not:xboldisr}{
name={$\mathbf{x}_{i,s,r,t}$},
description={Feature vector (a row or column, often referred to as a time slice) taken from the $r$-th spectrogram repetition, for damage case $i$ and sensor side $s$, at time slice $t$. This vector is an element of $\mathbb{R}^{513}$.},
sort={xisrt_bold},
type=notation,
}
\newglossaryentry{not:s_sensor}{
name={$s$},
description={Index representing the sensor side (e.g., identifying Sensor A or Sensor B).},
sort={s_sensor},
type=notation,
}
\newglossaryentry{not:yi}{
name={$y_{i}$},
description={Scalar label for the damage case $i$, defined as $y_i = i$. This is an integer value from 0 to 5.},
sort={yi},
type=notation,
}
\newglossaryentry{not:Lambda}{
name={$\Lambda(i,s,r,t)$},
description={Slicing function that concatenates a feature vector $\mathbf{x}_{i,s,r,t} \in \mathbb{R}^{513}$ with its corresponding damage case label $y_i \in \mathbb{R}$, resulting in a combined vector $\bigl[\,\mathbf{x}_{i,s,r,t}, \;y_{i}\bigr] \in \mathbb{R}^{514}$.},
sort={Lambda},
type=notation,
}
\newglossaryentry{not:calDs}{
name={$\mathcal{D}^{(s)}$},
description={The complete dataset for sensor side $s$. It is a collection of $15390$ data points, where each point is a vector in $\mathbb{R}^{514}$ (513 features + 1 label). Thus, the dataset can be viewed as a matrix of size $15390 \times 514$.},
sort={Dcal_s},
type=notation,
}
% --- End Glossary Definitions ---