refactor(stft): comment out unused imports and update SVM model loading for consistency

feat(model_selection): add timing for model training and validation processes
Merge pull request #100 from nuluh/feature/99-exp-alternative-undamage-case-data
2025-07-28 05:22:24 +07:00 · 2025-07-28 05:20:10 +07:00 · 2025-07-24 18:09:05 +07:00 · 2025-07-24 17:00:31 +07:00 · 2025-07-18 19:29:02 +07:00 · 2025-07-18 19:28:43 +07:00
14 changed files with 1630 additions and 1115 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,16 @@ data/**/*.csv
 .venv/
 *.pyc
 *.egg-info/
+
+# Latex
+*.aux
+*.log
+*.out
+*.toc
+*.bbl
+*.blg
+*.fdb_latexmk
+*.fls
+*.synctex.gz
+*.dvi
+
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,7 @@
 {
-  "python.analysis.extraPaths": ["./code/src/features"],
+  "python.analysis.extraPaths": [
+    "./code/src/features",
+    "${workspaceFolder}/code/src"
+  ],
  "jupyter.notebookFileRoot": "${workspaceFolder}/code"
 }
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
--- a/code/src/data_preprocessing.py
+++ b/code/src/data_preprocessing.py
@@ -0,0 +1,357 @@
+import pandas as pd
+import os
+import re
+import sys
+import numpy as np
+from colorama import Fore, Style, init
+from typing import TypedDict, Dict, List
+from joblib import load
+from pprint import pprint
+
+# class DamageFilesIndices(TypedDict):
+#     damage_index: int
+#     files: list[int]
+OriginalSingleDamageScenarioFilePath = str
+DamageScenarioGroupIndex = int
+OriginalSingleDamageScenario = pd.DataFrame
+SensorIndex = int
+VectorColumnIndex = List[SensorIndex]
+VectorColumnIndices = List[VectorColumnIndex]
+DamageScenarioGroup = List[OriginalSingleDamageScenario]
+GroupDataset = List[DamageScenarioGroup]
+
+
+class DamageFilesIndices(TypedDict):
+    damage_index: int
+    files: List[str]
+
+def complement_pairs(n, prefix, extension):
+    """
+    Return the four complement tuples for zzzBD<n>.TXT
+    """
+    filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
+    orig_a   = (n - 1) % 5 + 1                # 1 … 5
+    for a in range(1, 6):              # a = 1 … 5
+        if a != orig_a:                # skip original a
+            yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
+
+def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset, 
+                      group_size=5, special_groups=None, group=True):
+    """
+    Generate a structured list of tuples containing DataFrame references and column indices.
+    
+    Parameters:
+    -----------
+    total_dfs : int, default 30
+        Total number of DataFrames to include in the tuples
+    group_size : int, default 5
+        Number of DataFrames in each group (determines the pattern repeat)
+    prefix : str
+        Prefix for DataFrame variable names
+    first_col_start : int, default 1
+        Starting value for the first column index (1-indexed)
+    last_col_offset : int, default 25
+        Offset to add to first_col_start to get the last column index
+    special_groups : list of dict, optional
+        List of special groups to insert, each dict should contain:
+        - 'df_name': The DataFrame name to use for all tuples in this group
+        - 'position': Where to insert this group (0 for beginning)
+        - 'size': Size of this group (default: same as group_size)
+    
+    Returns:
+    --------
+    list
+        List of tuples, where each tuple contains (df_name, [first_col, last_col])
+    """
+    result = []
+    if group:
+        # Group tuples into sublists of group_size
+        for g in range(6):                # TODO: shouldnt be hardcoded
+            group = []
+            for i in range(1, 6):         # TODO: shouldnt be hardcoded
+                n = g * 5 + i
+                bottom_end = i                           # 1, 2, 3, 4, 5
+                top_end = bottom_end + 25                # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
+                group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
+            result.append(group)
+
+    # Add special groups at specified positions (other than beginning)
+    if special_groups:
+        result.insert(0, special_groups)
+    
+    
+    return result
+
+
+    # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
+    # df = pd.read_csv(file_path, sep="\t", skiprows=10)  # Read with explicit column names
+
+
+class DataProcessor:
+    def __init__(self, file_index, cache_path: str = None, base_path: str = None, include_time: bool = False):
+        self.file_index = file_index
+        self.base_path = base_path
+        self.include_time = include_time
+        if cache_path:
+            self.data = load(cache_path)
+        else:
+            self.data = self.load_data()
+
+    def load_data(self):
+        for idxs, group in enumerate(self.file_index):
+            for idx, tuple in enumerate(group):
+                file_path = os.path.join(self.base_path, tuple[0]) # ('zzzAD1.TXT')
+                if self.include_time:
+                    col_indices = [0] + tuple[1]  # [1, 26] + [0] -> [0, 1, 26]
+                else:
+                    col_indices = tuple[1] # [1, 26]
+                try:
+                    # Read the CSV file
+                    df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
+                    self.file_index[idxs][idx] = df.iloc[:, col_indices].copy()  # Extract the specified columns
+                    
+                    print(f"Processed {file_path}, extracted columns: {col_indices}")
+                    
+                except Exception as e:
+                    print(f"Error processing {file_path}: {str(e)}")
+    def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
+        """
+        Loads a single data file into a pandas DataFrame.
+
+        :param file_path: Path to the data file.
+        :return: DataFrame containing the numerical data.
+        """
+        df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True, nrows=1)
+        return df
+
+    def _load_all_data(self) -> GroupDataset:
+        """
+        Loads all data files based on the grouping dictionary and returns a nested list.
+
+        :return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
+        """
+        data = []
+        # Find the maximum group index to determine the list size
+        max_group_idx = len(self.file_index) if self.file_index else 0
+
+        # Handle case when file_index is empty
+        if max_group_idx == 0:
+            raise ValueError("No file index provided; file_index is empty.")
+
+        # Initialize empty lists
+        for _ in range(max_group_idx):
+            data.append([])
+
+        # Fill the list with data
+        for group_idx, file_list in self.file_index.items():
+            group_idx -= 1 # adjust due to undamage file
+            data[group_idx] = [self._load_dataframe(file) for file in file_list]
+        return data
+
+    def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
+        """
+        Returns the list of DataFrames for the given group index.
+
+        :param group_idx: Index of the group.
+        :return: List of DataFrames.
+        """
+        return self.data.get([group_idx, []])
+
+    def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
+        """
+        Returns the column names for the given group and file indices.
+
+        :param group_idx: Index of the group.
+        :param file_idx: Index of the file in the group.
+        :return: List of column names.
+        """
+        if group_idx in self.data and len(self.data[group_idx]) > file_idx:
+            return self.data[group_idx][file_idx].columns.tolist()
+        return []
+
+    def get_data_info(self):
+        """
+        Print information about the loaded data structure.
+        Adapted for when self.data is a List instead of a Dictionary.
+        """
+        if isinstance(self.data, list):
+            # For each sublist in self.data, get the type names of all elements
+            pprint(
+                [
+                    (
+                        [type(item).__name__ for item in sublist]
+                        if isinstance(sublist, list)
+                        else type(sublist).__name__
+                    )
+                    for sublist in self.data
+                ]
+            )
+        else:
+            pprint(
+                {
+                    key: [type(df).__name__ for df in value]
+                    for key, value in self.data.items()
+                }
+                if isinstance(self.data, dict)
+                else type(self.data).__name__
+            )
+
+    def _create_vector_column_index(self) -> VectorColumnIndices:
+        vector_col_idx: VectorColumnIndices = []
+        y = 0
+        for data_group in self.data:  # len(data_group[i]) = 5
+            for j in data_group:  # len(j[i]) =
+                c: VectorColumnIndex = []
+                x = 0
+                for _ in range(6):  # TODO: range(6) should be dynamic and parameterized
+                    c.append(x + y)
+                    x += 5
+                vector_col_idx.append(c)
+                y += 1
+            return vector_col_idx # TODO: refactor this so that it returns just from first data_group without using for loops through the self.data that seems unnecessary
+
+    def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
+        """
+        Create a vector column from the loaded data.
+
+        :param overwrite: Overwrite the original data with vector column-based data.
+        """
+        idxs = self._create_vector_column_index()
+        for i, group in enumerate(self.data):
+            # add 1 to all indices to account for 'Time' being at position 0
+            for j, df in enumerate(group):
+                idx = [_ + 1 for _ in idxs[j]]
+                # slice out the desired columns, copy into a fresh DataFrame,
+                # then overwrite self.data[i][j] with it
+                self.data[i][j] = df.iloc[:, idx].copy()
+
+            # TODO: if !overwrite:
+
+    def create_limited_sensor_vector_column(self, overwrite=True):
+        """
+        Create a vector column from the loaded data.
+
+        :param overwrite: Overwrite the original data with vector column-based data.
+        """
+        idx = self._create_vector_column_index()
+        # if overwrite:
+        for i in range(len(self.data)):  # damage(s)
+            for j in range(len(self.data[i])):  # col(s)
+                # Get the appropriate indices for slicing from idx
+                indices = idx[j]
+
+                # Get the current DataFrame
+                df = self.data[i][j]
+
+                # Keep the 'Time' column and select only specifid 'Real' colmns
+                # First, we add 1 to all indices to acount for 'Time' being at positiion 0
+                real_indices = [index + 1 for index in indices]
+
+                # Create list with Time column index (0) and the adjustedd Real indices
+                all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
+
+                # Apply the slicing
+                self.data[i][j] = df.iloc[:, all_indices]
+        # TODO: if !overwrite:
+
+    def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
+        """
+        Export the processed data to CSV files in the required folder structure.
+
+        :param output_dir: Directory to save the CSV files.
+        :param file_prefix: Prefix for the output filenames.
+        """
+        for group_idx, group in enumerate(self.file_index, start=0):
+            group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
+            os.makedirs(group_folder, exist_ok=True)
+
+            for test_idx, df in enumerate(group, start=1):
+                out1 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv")
+                cols_to_export = [0, 1] if self.include_time else [1]
+                df.iloc[:, cols_to_export].to_csv(out1, index=False)
+
+                out2 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv")
+                cols_to_export = [0, 2] if self.include_time else [2]
+                df.iloc[:, cols_to_export].to_csv(out2, index=False)
+
+# def create_damage_files(base_path, output_base, prefix):
+#     # Initialize colorama
+#     init(autoreset=True)
+
+#     # Generate column labels based on expected duplication in input files
+#     columns = ["Real"] + [
+#         f"Real.{i}" for i in range(1, 30)
+#     ]  # Explicitly setting column names
+
+#     sensor_end_map = {
+#         1: "Real.25",
+#         2: "Real.26",
+#         3: "Real.27",
+#         4: "Real.28",
+#         5: "Real.29",
+#     }
+
+#     # Define the damage scenarios and the corresponding original file indices
+#     damage_scenarios = {
+#         1: range(1, 6),  # Damage 1 files from zzzAD1.csv to zzzAD5.csv
+#         2: range(6, 11),  # Damage 2 files from zzzAD6.csv to zzzAD10.csv
+#         3: range(11, 16),  # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
+#         4: range(16, 21),  # Damage 4 files from zzzAD16.csv to zzzAD20.csv
+#         5: range(21, 26),  # Damage 5 files from zzzAD21.csv to zzzAD25.csv
+#         6: range(26, 31),  # Damage 6 files from zzzAD26.csv to zzzAD30.csv
+#     }
+#     damage_pad = len(str(len(damage_scenarios)))
+#     test_pad = len(str(30))
+
+#     for damage, files in damage_scenarios.items():
+#         for i, file_index in enumerate(files, start=1):
+#             # Load original data file
+#             file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
+#             df = pd.read_csv(
+#                 file_path, sep="\t", skiprows=10
+#             )  # Read with explicit column names
+
+#             top_sensor = columns[i - 1]
+#             print(top_sensor, type(top_sensor))
+#             output_file_1 = os.path.join(
+#                 output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
+#             )
+#             print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
+#             print("Taking datetime column on index 0...")
+#             print(f"Taking `{top_sensor}`...")
+#             os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
+#             df[["Time", top_sensor]].to_csv(output_file_1, index=False)
+#             print(Fore.GREEN + "Done")
+
+#             bottom_sensor = sensor_end_map[i]
+#             output_file_2 = os.path.join(
+#                 output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
+#             )
+#             print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
+#             print("Taking datetime column on index 0...")
+#             print(f"Taking `{bottom_sensor}`...")
+#             os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
+#             df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
+#             print(Fore.GREEN + "Done")
+#             print("---")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python convert.py <path_to_csv_files>")
+        sys.exit(1)
+
+    base_path = sys.argv[1]
+    output_base = sys.argv[2]
+    prefix = sys.argv[3]  # Define output directory
+
+    # Create output folders if they don't exist
+    # for i in range(1, 7):
+    #     os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
+
+    create_damage_files(base_path, output_base, prefix)
+    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
+
+
+if __name__ == "__main__":
+    main()
--- a/code/src/ml/inference.py
+++ b/code/src/ml/inference.py
@@ -0,0 +1,16 @@
+from src.ml.model_selection import inference_model
+from joblib import load
+
+x = 30
+file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT"
+sensor = 1
+model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib", 
+        "SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib",
+        "XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"}
+
+index = ((x-1) % 5) + 1
+inference_model(model["SVM"], file, column_question=index)
+print("---")
+inference_model(model["SVM with PCA"], file, column_question=index)
+print("---")
+inference_model(model["XGBoost"], file, column_question=index)
--- a/code/src/ml/model_selection.py
+++ b/code/src/ml/model_selection.py
@@ -1,13 +1,14 @@
 import numpy as np
 import pandas as pd
 import os
-from sklearn.model_selection import train_test_split as sklearn_split
-
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+from joblib import load

 def create_ready_data(
    stft_data_path: str,
    stratify: np.ndarray = None,
-) -> tuple:
+) -> tuple[pd.DataFrame, np.ndarray]:
    """
    Create a stratified train-test split from STFT data.

@@ -21,13 +22,13 @@ def create_ready_data(
    Returns:
    --------
    tuple
-        (X_train, X_test, y_train, y_test) - Split datasets
+        (pd.DataFrame, np.ndarray) - Combined data and corresponding labels
    """
    ready_data = []
    for file in os.listdir(stft_data_path):
-        ready_data.append(pd.read_csv(os.path.join(stft_data_path, file)))
+        ready_data.append(pd.read_csv(os.path.join(stft_data_path, file), skiprows=1))

-    y_data = [i for i in range(len(ready_data))]
+    y_data = [i for i in range(len(ready_data))] # TODO: Should be replaced with actual desired labels

    # Combine all dataframes in ready_data into a single dataframe
    if ready_data:  # Check if the list is not empty
@@ -55,3 +56,216 @@ def create_ready_data(
        y = np.array([])

    return X, y
+
+
+def train_and_evaluate_model(
+    model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None
+):
+    """
+    Train a machine learning model, evaluate its performance, and optionally export it.
+
+    This function trains the provided model on the training data, evaluates its
+    performance on test data using accuracy score, and can save the trained model
+    to disk if an export path is provided.
+
+    Parameters
+    ----------
+    model : estimator object
+        The machine learning model to train.
+    model_name : str
+        Name of the model, used for the export filename and in the returned results.
+    sensor_label : str
+        Label identifying which sensor's data the model is being trained on.
+    x_train : array-like or pandas.DataFrame
+        The training input samples.
+    y_train : array-like
+        The target values for training.
+    x_test : array-like or pandas.DataFrame
+        The test input samples.
+    y_test : array-like
+        The target values for testing.
+    export : str, optional
+        Directory path where the trained model should be saved. If None, model won't be saved.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+        - 'model': model_name (str)
+        - 'sensor': sensor_label (str)
+        - 'accuracy': accuracy percentage (float)
+
+    Example
+    -------
+    >>> from sklearn.svm import SVC
+    >>> from sklearn.model_selection import train_test_split
+    >>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
+    >>> result = train_and_evaluate_model(
+    ...     SVC(),
+    ...     "SVM",
+    ...     "sensor1",
+    ...     X_train,
+    ...     y_train,
+    ...     X_test,
+    ...     y_test,
+    ...     export="models/sensor1"
+    ... )
+    >>> print(f"Model accuracy: {result['accuracy']:.2f}%")
+    """
+    from sklearn.metrics import accuracy_score
+
+    result = {"model": model_name, "sensor": sensor_label, "success": False}
+
+    try:
+        import time
+        start_time = time.time()
+
+        # Train the model
+        model.fit(x_train, y_train)
+
+        result["elapsed_time_training"] = time.time() - start_time
+        try:
+            # Predict on the test set (validation)
+            start_time = time.time()
+    
+            y_pred = model.predict(x_test)
+
+            result["elapsed_time_validation"] = time.time() - start_time
+            result["y_pred"] = y_pred  # Convert to numpy array
+        except Exception as e:
+            result["error"] = f"Prediction error: {str(e)}"
+            return result
+
+        # Calculate accuracy
+        try:
+            accuracy = accuracy_score(y_test, y_pred) * 100
+            result["accuracy"] = accuracy
+        except Exception as e:
+            result["error"] = f"Accuracy calculation error: {str(e)}"
+            return result
+
+        # Export model if requested
+        if export:
+            try:
+                import joblib
+
+                full_path = os.path.join(export, f"{model_name}.joblib")
+                os.makedirs(os.path.dirname(full_path), exist_ok=True)
+                joblib.dump(model, full_path)
+                print(f"Model saved to {full_path}")
+            except Exception as e:
+                print(f"Warning: Failed to export model to {export}: {str(e)}")
+                result["export_error"] = str(e)
+                # Continue despite export error
+
+        result["success"] = True
+        return result
+
+    except Exception as e:
+        result["error"] = f"Training error: {str(e)}"
+        return result
+def plot_confusion_matrix(results_sensor, y_test, title):
+    """
+    Plot confusion matrices for each model in results_sensor1.
+
+    Parameters:
+    -----------
+    results_sensor1 : list
+        List of dictionaries containing model results.
+    x_test1 : array-like
+        Test input samples.
+    y_test : array-like
+        True labels for the test samples.
+
+    Returns:
+    --------
+    None
+    This function will display confusion matrices for each model in results_sensor1.
+
+    Example
+    -------
+    >>> results_sensor1 = [
+    ...     {'model': 'model1', 'accuracy': 95.0},
+    ...     {'model': 'model2', 'accuracy': 90.0}
+    ... ]
+    >>> x_test1 = np.random.rand(100, 10)  # Example test data
+    >>> y_test = np.random.randint(0, 2, size=100)  # Example true labels
+    >>> plot_confusion_matrix(results_sensor1, x_test1, y_test)
+    """
+    # Iterate through each model result and plot confusion matrix
+    for i in results_sensor:
+        model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib")
+        cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray
+
+        # get the class labels
+        labels = model.classes_
+        # Plot
+        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
+        disp.plot(cmap=plt.cm.Blues)  # You can change colormap
+        plt.title(f"{title}")
+
+def calculate_label_percentages(labels):
+    """
+    Calculate and print the percentage distribution of unique labels in a numpy array.
+
+    Parameters:
+        labels (np.array): Input array of labels.
+
+    Returns:
+        None
+    """
+    # Count occurrences of each unique label
+    unique, counts = np.unique(labels, return_counts=True)
+
+    # Calculate percentages
+    percentages = (counts / len(labels)) * 100
+
+    # Build and print the result string
+    result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)])
+    return print(result)
+
+def inference_model(
+    models, raw_file, column_question: int = None
+):
+    """
+    Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid.
+
+    Parameters
+    ----------
+    model : dict with some exported model path
+        The trained machine learning model to use for inference.
+    x_test : array-like or pandas.DataFrame
+        The input samples for which predictions are to be made.
+    export : str, optional
+        Directory path where the predictions should be saved. If None, predictions won't be saved.
+
+    Returns
+    -------
+    np.ndarray
+        Array of predicted values.
+
+    Example
+    -------
+    >>> from sklearn.svm import SVC
+    >>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"}
+    >>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1)
+    """
+    df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
+    col_idx = []
+    for i in range(1,6):
+        idx = [i, i+5, i+10, i+15, i+20, i+25]
+        col_idx.append(idx)
+    vibration_data = df.iloc[:, column_question].values
+    # Perform STFT
+    from scipy.signal import stft, hann
+    freq, times, Zxx = stft(
+                            vibration_data, 
+                            fs=1024, 
+                            window=hann(1024), 
+                            nperseg=1024, 
+                            noverlap=1024-512
+                            )
+    data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
+    data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
+    model = load(models)  # Load the model from the provided path
+    return calculate_label_percentages(model.predict(data.iloc[:21,:]))
--- a/code/src/process_stft.py
+++ b/code/src/process_stft.py
@@ -25,13 +25,10 @@ window = hann(window_size)
 Fs = 1024

 # Number of damage cases (adjust as needed)
-num_damage_cases = 6  # Change to 30 if you have 30 damage cases
-
-# Number of test runs per damage case
-num_test_runs = 5
+num_damage_cases = 0  # Change to 30 if you have 30 damage cases

 # Function to perform STFT and return magnitude
-def compute_stft(vibration_data):
+def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size):
    frequencies, times, Zxx = stft(
        vibration_data, 
        fs=Fs, 
@@ -42,9 +39,13 @@ def compute_stft(vibration_data):
    stft_magnitude = np.abs(Zxx)
    return stft_magnitude.T  # Transpose to have frequencies as columns

-def process_damage_case(damage_num):
+def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs):
    damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
-    
+    if damage_num == 0:
+        # Number of test runs per damage case
+        num_test_runs = 120
+    else:
+        num_test_runs = 5
    # Check if the damage folder exists
    if not os.path.isdir(damage_folder):
        print(f"Folder {damage_folder} does not exist. Skipping...")
@@ -79,20 +80,29 @@ def process_damage_case(damage_num):
                print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
                continue
            
-            # Extract vibration data (assuming the second column is sensor data)
            vibration_data = df.iloc[:, 1].values
            
            # Perform STFT
-            stft_magnitude = compute_stft(vibration_data)
+            stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size)
            
            # Convert STFT result to DataFrame
            df_stft = pd.DataFrame(
                stft_magnitude, 
                columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
            )
+            # only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
+            if damage_num == 0:
+                print(f"Processing damage_num = 0, test_num = {test_num}")
+                if test_num <= 45:
+                    df_stft = df_stft.iloc[:22, :]
+                    print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")
+                else:
+                    df_stft = df_stft.iloc[:21, :]
+                    print(f"Reduced df_stft shape (22 samples): {df_stft.shape}")
            
            # Append to the aggregated list
            aggregated_stft.append(df_stft)
+            print(sum(df.shape[0] for df in aggregated_stft))
        
        # Concatenate all STFT DataFrames vertically
        if aggregated_stft:
@@ -105,6 +115,8 @@ def process_damage_case(damage_num):
            )
            
            # Save the aggregated STFT to CSV
+            with open(output_file, 'w') as file:
+                file.write('sep=,\n')
                df_aggregated.to_csv(output_file, index=False)
            print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
        else:
@@ -112,4 +124,4 @@ def process_damage_case(damage_num):

 if __name__ == "__main__":  # Added main guard for multiprocessing
    with multiprocessing.Pool() as pool:
-        pool.map(process_damage_case, range(1, num_damage_cases + 1))
+        pool.map(process_damage_case, range(0, num_damage_cases + 1))
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -1,360 +0,0 @@
-import pandas as pd
-import os
-import re
-import sys
-import numpy as np
-from colorama import Fore, Style, init
-from typing import TypedDict, Dict, List
-from joblib import load
-from pprint import pprint
-
-# class DamageFilesIndices(TypedDict):
-#     damage_index: int
-#     files: list[int]
-OriginalSingleDamageScenarioFilePath = str
-DamageScenarioGroupIndex = int
-OriginalSingleDamageScenario = pd.DataFrame
-SensorIndex = int
-VectorColumnIndex = List[SensorIndex]
-VectorColumnIndices = List[VectorColumnIndex]
-DamageScenarioGroup = List[OriginalSingleDamageScenario]
-GroupDataset = List[DamageScenarioGroup]
-
-
-class DamageFilesIndices(TypedDict):
-    damage_index: int
-    files: List[str]
-
-
-def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
-    prefix: str = kwargs.get("prefix", "zzzAD")
-    extension: str = kwargs.get("extension", ".TXT")
-    num_damage: int = kwargs.get("num_damage")
-    file_index_start: int = kwargs.get("file_index_start")
-    col: int = kwargs.get("col")
-    base_path: str = kwargs.get("base_path")
-
-    damage_scenarios = {}
-    a = file_index_start
-    b = col + 1
-    for i in range(1, num_damage + 1):
-        damage_scenarios[i] = range(a, b)
-        a += col
-        b += col
-
-    # return damage_scenarios
-
-    x = {}
-    for damage, files in damage_scenarios.items():
-        x[damage] = []  # Initialize each key with an empty list
-        for i, file_index in enumerate(files, start=1):
-            if base_path:
-                x[damage].append(
-                    os.path.normpath(
-                        os.path.join(base_path, f"{prefix}{file_index}{extension}")
-                    )
-                )
-                # if not os.path.exists(file_path):
-                #     print(Fore.RED + f"File {file_path} does not exist.")
-                #     continue
-            else:
-                x[damage].append(f"{prefix}{file_index}{extension}")
-    return x
-
-    # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
-    # df = pd.read_csv( file_path, sep="\t", skiprows=10)  # Read with explicit column names
-
-
-class DataProcessor:
-    def __init__(self, file_index: DamageFilesIndices, cache_path: str = None):
-        self.file_index = file_index
-        if cache_path:
-            self.data = load(cache_path)
-        else:
-            self.data = self._load_all_data()
-
-    def _extract_column_names(self, file_path: str) -> List[str]:
-        """
-        Extracts column names from the header of the given file.
-        Assumes the 6th line contains column names.
-
-        :param file_path: Path to the data file.
-        :return: List of column names.
-        """
-        with open(file_path, "r") as f:
-            header_lines = [next(f) for _ in range(12)]
-
-        # Extract column names from the 6th line
-        channel_line = header_lines[10].strip()
-        tokens = re.findall(r'"([^"]+)"', channel_line)
-        if not channel_line.startswith('"'):
-            first_token = channel_line.split()[0]
-            tokens = [first_token] + tokens
-
-        return tokens  # Prepend 'Time' column if applicable
-
-    def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
-        """
-        Loads a single data file into a pandas DataFrame.
-
-        :param file_path: Path to the data file.
-        :return: DataFrame containing the numerical data.
-        """
-        col_names = self._extract_column_names(file_path)
-        df = pd.read_csv(
-            file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
-        )
-        df.columns = col_names
-        return df
-
-    def _load_all_data(self) -> GroupDataset:
-        """
-        Loads all data files based on the grouping dictionary and returns a nested list.
-
-        :return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
-        """
-        data = []
-        # Find the maximum group index to determine the list size
-        max_group_idx = max(self.file_index.keys()) if self.file_index else 0
-
-        # Initialize empty lists
-        for _ in range(max_group_idx):
-            data.append([])
-
-        # Fill the list with data
-        for group_idx, file_list in self.file_index.items():
-            # Adjust index to be 0-based
-            list_idx = group_idx - 1
-            data[list_idx] = [self._load_dataframe(file) for file in file_list]
-
-        return data
-
-    def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
-        """
-        Returns the list of DataFrames for the given group index.
-
-        :param group_idx: Index of the group.
-        :return: List of DataFrames.
-        """
-        return self.data.get([group_idx, []])
-
-    def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
-        """
-        Returns the column names for the given group and file indices.
-
-        :param group_idx: Index of the group.
-        :param file_idx: Index of the file in the group.
-        :return: List of column names.
-        """
-        if group_idx in self.data and len(self.data[group_idx]) > file_idx:
-            return self.data[group_idx][file_idx].columns.tolist()
-        return []
-
-    def get_data_info(self):
-        """
-        Print information about the loaded data structure.
-        Adapted for when self.data is a List instead of a Dictionary.
-        """
-        if isinstance(self.data, list):
-            # For each sublist in self.data, get the type names of all elements
-            pprint(
-                [
-                    (
-                        [type(item).__name__ for item in sublist]
-                        if isinstance(sublist, list)
-                        else type(sublist).__name__
-                    )
-                    for sublist in self.data
-                ]
-            )
-        else:
-            pprint(
-                {
-                    key: [type(df).__name__ for df in value]
-                    for key, value in self.data.items()
-                }
-                if isinstance(self.data, dict)
-                else type(self.data).__name__
-            )
-
-    def _create_vector_column_index(self) -> VectorColumnIndices:
-        vector_col_idx: VectorColumnIndices = []
-        y = 0
-        for data_group in self.data:  # len(data_group[i]) = 5
-            for j in data_group:  # len(j[i]) =
-                c: VectorColumnIndex = []  # column vector c_{j}
-                x = 0
-                for _ in range(6):  # TODO: range(6) should be dynamic and parameterized
-                    c.append(x + y)
-                    x += 5
-                vector_col_idx.append(c)
-                y += 1
-            return vector_col_idx
-
-    def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
-        """
-        Create a vector column from the loaded data.
-
-        :param overwrite: Overwrite the original data with vector column-based data.
-        """
-        idx = self._create_vector_column_index()
-        # if overwrite:
-        for i in range(len(self.data)):
-            for j in range(len(self.data[i])):
-                # Get the appropriate indices for slicing from idx
-                indices = idx[j]
-
-                # Get the current DataFrame
-                df = self.data[i][j]
-
-                # Keep the 'Time' column and select only specified 'Real' columns
-                # First, we add 1 to all indices to account for 'Time' being at position 0
-                real_indices = [index + 1 for index in indices]
-
-                # Create list with Time column index (0) and the adjusted Real indices
-                all_indices = [0] + real_indices
-
-                # Apply the slicing
-                self.data[i][j] = df.iloc[:, all_indices]
-        # TODO: if !overwrite:
-
-    def create_limited_sensor_vector_column(self, overwrite=True):
-        """
-        Create a vector column from the loaded data.
-
-        :param overwrite: Overwrite the original data with vector column-based data.
-        """
-        idx = self._create_vector_column_index()
-        # if overwrite:
-        for i in range(len(self.data)):  # damage(s)
-            for j in range(len(self.data[i])):  # col(s)
-                # Get the appropriate indices for slicing from idx
-                indices = idx[j]
-
-                # Get the current DataFrame
-                df = self.data[i][j]
-
-                # Keep the 'Time' column and select only specifid 'Real' colmns
-                # First, we add 1 to all indices to acount for 'Time' being at positiion 0
-                real_indices = [index + 1 for index in indices]
-
-                # Create list with Time column index (0) and the adjustedd Real indices
-                all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
-
-                # Apply the slicing
-                self.data[i][j] = df.iloc[:, all_indices]
-        # TODO: if !overwrite:
-
-    def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
-        """
-        Export the processed data to CSV files in the required folder structure.
-
-        :param output_dir: Directory to save the CSV files.
-        :param file_prefix: Prefix for the output filenames.
-        """
-        for group_idx, group in enumerate(self.data, start=1):
-            group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
-            os.makedirs(group_folder, exist_ok=True)
-            for test_idx, df in enumerate(group, start=1):
-                # Ensure columns are named uniquely if duplicated
-                df = df.copy()
-                df.columns = ["Time", "Real_0", "Real_1"]  # Rename
-
-                # Export first Real column
-                out1 = os.path.join(
-                    group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
-                )
-                df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
-                    out1, index=False
-                )
-
-                # Export last Real column
-                out2 = os.path.join(
-                    group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
-                )
-                df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
-                    out2, index=False
-                )
-
-
-def create_damage_files(base_path, output_base, prefix):
-    # Initialize colorama
-    init(autoreset=True)
-
-    # Generate column labels based on expected duplication in input files
-    columns = ["Real"] + [
-        f"Real.{i}" for i in range(1, 30)
-    ]  # Explicitly setting column names
-
-    sensor_end_map = {
-        1: "Real.25",
-        2: "Real.26",
-        3: "Real.27",
-        4: "Real.28",
-        5: "Real.29",
-    }
-
-    # Define the damage scenarios and the corresponding original file indices
-    damage_scenarios = {
-        1: range(1, 6),  # Damage 1 files from zzzAD1.csv to zzzAD5.csv
-        2: range(6, 11),  # Damage 2 files from zzzAD6.csv to zzzAD10.csv
-        3: range(11, 16),  # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
-        4: range(16, 21),  # Damage 4 files from zzzAD16.csv to zzzAD20.csv
-        5: range(21, 26),  # Damage 5 files from zzzAD21.csv to zzzAD25.csv
-        6: range(26, 31),  # Damage 6 files from zzzAD26.csv to zzzAD30.csv
-    }
-    damage_pad = len(str(len(damage_scenarios)))
-    test_pad = len(str(30))
-
-    for damage, files in damage_scenarios.items():
-        for i, file_index in enumerate(files, start=1):
-            # Load original data file
-            file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
-            df = pd.read_csv(
-                file_path, sep="\t", skiprows=10
-            )  # Read with explicit column names
-
-            top_sensor = columns[i - 1]
-            print(top_sensor, type(top_sensor))
-            output_file_1 = os.path.join(
-                output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
-            )
-            print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
-            print("Taking datetime column on index 0...")
-            print(f"Taking `{top_sensor}`...")
-            os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
-            df[["Time", top_sensor]].to_csv(output_file_1, index=False)
-            print(Fore.GREEN + "Done")
-
-            bottom_sensor = sensor_end_map[i]
-            output_file_2 = os.path.join(
-                output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
-            )
-            print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
-            print("Taking datetime column on index 0...")
-            print(f"Taking `{bottom_sensor}`...")
-            os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
-            df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
-            print(Fore.GREEN + "Done")
-            print("---")
-
-
-def main():
-    if len(sys.argv) < 2:
-        print("Usage: python convert.py <path_to_csv_files>")
-        sys.exit(1)
-
-    base_path = sys.argv[1]
-    output_base = sys.argv[2]
-    prefix = sys.argv[3]  # Define output directory
-
-    # Create output folders if they don't exist
-    # for i in range(1, 7):
-    #     os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
-
-    create_damage_files(base_path, output_base, prefix)
-    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
-
-
-if __name__ == "__main__":
-    main()
--- a/data/QUGS/test.py
+++ b/data/QUGS/test.py
@@ -1,25 +1,52 @@
-from convert import *
+from data_preprocessing import *
 from joblib import dump, load

+# b = generate_damage_files_index(
+#     num_damage=6,
+#     file_index_start=1,
+#     col=5,
+#     base_path="D:/thesis/data/dataset_B",
+#     prefix="zzzBD",
+#     # undamage_file="zzzBU.TXT"
+# )
+# Example: Generate tuples with a special group of df0 at the beginning
+special_groups_A = [
+    {'df_name': 'zzzAU.TXT', 'position': 0, 'size': 5}  # Add at beginning
+]
+
+special_groups_B = [
+    {'df_name': 'zzzBU.TXT', 'position': 0, 'size': 5}  # Add at beginning
+]
+
+# Generate the tuples with the special group
+a_complement = [(comp)
+                for n in range(1, 31)
+                for comp in complement_pairs(n)]
+a = generate_df_tuples(special_groups=a_complement, prefix="zzzAD")
+
+# b_complement = [(comp)
+#                 for n in range(1, 31)
+#                 for comp in complement_pairs(n)]
+# b = generate_df_tuples(special_groups=b_complement, prefix="zzzBD")
+
+
 # a = generate_damage_files_index(
-#     num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
+#     num_damage=6,
+#     file_index_start=1,
+#     col=5,
+#     base_path="D:/thesis/data/dataset_A",
+#     prefix="zzzAD",
+#     # undamage_file="zzzBU.TXT"
 # )

-b = generate_damage_files_index(
-    num_damage=6,
-    file_index_start=1,
-    col=5,
-    base_path="D:/thesis/data/dataset_B",
-    prefix="zzzBD",
-)
-# data_A = DataProcessor(file_index=a)
-# # data.create_vector_column(overwrite=True)
-# data_A.create_limited_sensor_vector_column(overwrite=True)
-# data_A.export_to_csv("D:/thesis/data/converted/raw")
+data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
+# data_A.create_vector_column(overwrite=True)
+# # data_A.create_limited_sensor_vector_column(overwrite=True)
+data_A.export_to_csv("D:/thesis/data/converted/raw")

-data_B = DataProcessor(file_index=b)
-# data.create_vector_column(overwrite=True)
-data_B.create_limited_sensor_vector_column(overwrite=True)
-data_B.export_to_csv("D:/thesis/data/converted/raw_B")
+# data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
+# data_B.create_vector_column(overwrite=True)
+# # data_B.create_limited_sensor_vector_column(overwrite=True)
+# data_B.export_to_csv("D:/thesis/data/converted/raw_B")
 # a = load("D:/cache.joblib")
 # breakpoint()
--- a/latex/chapters/id/03_methodology/steps/index.tex
+++ b/latex/chapters/id/03_methodology/steps/index.tex
@@ -3,7 +3,7 @@ Alur keseluruhan penelitian ini dilakukan melalui tahapan-tahapan sebagai beriku

 \begin{figure}[H]
    \centering
-    \includegraphics[width=0.3\linewidth]{chapters/id/flow.png}
+    \includegraphics[width=0.3\linewidth]{chapters/img/flow.png}
    \caption{Diagram alir tahapan penelitian}
    \label{fig:flowchart}
 \end{figure}
--- a/latex/figures/A4
+++ b/latex/figures/A4
--- a/latex/main.tex
+++ b/latex/main.tex
@@ -1,14 +1,18 @@
 \documentclass[draftmark]{thesis}

-% Title Information
-\setthesisinfo
-  {Prediksi Lokasi Kerusakan dengan Machine Learning}
-  {Rifqi Damar Panuluh}
-  {20210110224}
-  {PROGRAM STUDI TEKNIK SIPIL}
-  {FAKULTAS TEKNIK}
-  {UNIVERSITAS MUHAMMADIYAH YOGYAKARTA}
-  {2025}
+% Metadata
+\title{Prediksi Lokasi Kerusakan dengan Machine Learning}
+\author{Rifqi Damar Panuluh}
+\date{\today}
+\authorid{20210110224}
+\firstadvisor{Ir. Muhammad Ibnu Syamsi, Ph.D.}
+\secondadvisor{}
+\headdepartement{Puji Harsanto, S.T., M.T., Ph.D.}
+\headdepartementid{19740607201404123064}
+\faculty{Fakultas Teknik}
+\program{Program Studi Teknik Sipil}
+\university{Universitas Muhammadiyah Yogyakarta}
+\yearofsubmission{2025}

 % Input preamble
 \input{preamble/packages}
@@ -16,13 +20,13 @@
 \input{preamble/macros}

 \begin{document}
-\input{frontmatter/maketitle}
-\input{frontmatter/maketitle_secondary}
+% \input{frontmatter/maketitle}
+% \input{frontmatter/maketitle_secondary}
 \frontmatter
 % \input{frontmatter/approval}\clearpage
 % \input{frontmatter/originality}\clearpage
 % \input{frontmatter/acknowledgement}\clearpage
-\tableofcontents
+% \tableofcontents
 \clearpage
 \mainmatter
 \pagestyle{fancyplain}
--- a/latex/metadata.tex
+++ b/latex/metadata.tex
@@ -1,11 +0,0 @@
-\newcommand{\studentname}{Rifqi Damar Panuluh}
-\newcommand{\studentid}{20210110224}
-\newcommand{\thesistitle}{Prediksi Lokasi Kerusakan dengan Machine Learning}
-\newcommand{\firstadvisor}{Ir. Muhammad Ibnu Syamsi, Ph.D.}
-\newcommand{\secondadvisor}{}
-\newcommand{\headdepartement}{Puji Harsanto, S.T. M.T., Ph.D.}
-\newcommand{\headdepartementid}{19740607201404123064}
-\newcommand{\faculty}{Fakultas Teknik}
-\newcommand{\program}{Teknik Sipil}
-\newcommand{\university}{Universitas Muhammadiyah Yogyakarta}
-\newcommand{\yearofsubmission}{2025}
--- a/latex/thesis.cls
+++ b/latex/thesis.cls
@@ -1,7 +1,7 @@
 \NeedsTeXFormat{LaTeX2e}
 \ProvidesClass{thesis}[2025/05/10 Bachelor Thesis Class]

-\newif\if@draftmark
+\newif\if@draftmark \@draftmarkfalse
 \@draftmarkfalse

 \DeclareOption{draftmark}{\@draftmarktrue}
@@ -12,6 +12,7 @@
 \RequirePackage{polyglossia}
 \RequirePackage{fontspec}
 \RequirePackage{titlesec}
+\RequirePackage{titling}
 \RequirePackage{fancyhdr}
 \RequirePackage{geometry}
 \RequirePackage{setspace}
@@ -24,7 +25,8 @@
 \RequirePackage{svg}           % Allows including SVG images directly
 \RequirePackage{indentfirst}   % Makes first paragraph after headings indented
 \RequirePackage{float}         % Provides [H] option to force figure/table placement
-\RequirePackage[style=apa, backend=biber, language=indonesian]{biblatex}
+\RequirePackage[style=apa, backend=biber]{biblatex}
+\RequirePackage[acronym, nogroupskip, toc]{glossaries}
 % Polyglossia set language
 \setdefaultlanguage[variant=indonesian]{malay}  % Proper Indonesian language setup
 \setotherlanguage{english}             % Enables English as secondary language
@@ -36,17 +38,18 @@
 % Conditionally load the watermark package and settings
 \if@draftmark
  \RequirePackage{draftwatermark}
-  \SetWatermarkText{nuluh/thesis (wip) draft: \today}
+  \SetWatermarkText{nuluh/thesis (wip) [draft: \today]}
  \SetWatermarkColor[gray]{0.8}                    % Opacity: 0.8 = 20% transparent  
  \SetWatermarkFontSize{1.5cm}
  \SetWatermarkAngle{90}
  \SetWatermarkHorCenter{1.5cm}
+  \RequirePackage[left]{lineno}
+  \linenumbers
 \fi

 % Page layout
-\geometry{left=3cm, top=3cm, right=3cm, bottom=3cm}
+\geometry{left=4cm, top=3cm, right=3cm, bottom=3cm}
 \setlength{\parskip}{0.5em}
-\setlength{\parindent}{0pt}
 \onehalfspacing

 % Fonts
@@ -55,17 +58,45 @@
 \setsansfont{Arial}
 \setmonofont{Courier New}

-
-\newcommand{\setthesisinfo}[7]{%
-  \renewcommand{\thesistitle}{#1}%
-  \renewcommand{\studentname}{#2}%
-  \renewcommand{\studentid}{#3}%
-  \renewcommand{\program}{#4}%
-  \renewcommand{\faculty}{#5}%
-  \renewcommand{\university}{#6}%
-  \renewcommand{\yearofsubmission}{#7}%
+\makeatletter
+% Extracting the Year from \today
+\newcommand{\theyear}{%
+  \expandafter\@car\expandafter\@gobble\the\year\@nil
 }

+% Declare internal macros as initially empty
+\newcommand{\@authorid}{}
+\newcommand{\@firstadvisor}{}
+\newcommand{\@secondadvisor}{}
+\newcommand{\@headdepartement}{}
+\newcommand{\@headdepartementid}{}
+\newcommand{\@faculty}{}
+\newcommand{\@program}{}
+\newcommand{\@university}{}
+\newcommand{\@yearofsubmission}{}
+
+% Define user commands to set these values.
+\newcommand{\authorid}[1]{\gdef\@authorid{#1}}
+\newcommand{\firstadvisor}[1]{\gdef\@firstadvisor{#1}}
+\newcommand{\secondadvisor}[1]{\gdef\@secondadvisor{#1}}
+\newcommand{\headdepartement}[1]{\gdef\@headdepartement{#1}}
+\newcommand{\headdepartementid}[1]{\gdef\@headdepartementid{#1}}
+\newcommand{\faculty}[1]{\gdef\@faculty{#1}}
+\newcommand{\program}[1]{\gdef\@program{#1}}
+\newcommand{\university}[1]{\gdef\@university{#1}}
+\newcommand{\yearofsubmission}[1]{\gdef\@yearofsubmission{#1}}
+
+% Now expose robust “the‑” getters to access the values
+\newcommand{\theauthorid}{\@authorid}
+\newcommand{\thefirstadvisor}{\@firstadvisor}
+\newcommand{\thesecondadvisor}{\@secondadvisor}
+\newcommand{\theheaddepartement}{\@headdepartement}
+\newcommand{\theheaddepartementid}{\@headdepartementid}
+\newcommand{\thefaculty}{\@faculty}
+\newcommand{\theprogram}{\@program}
+\newcommand{\theuniversity}{\@university}
+\newcommand{\theyearofsubmission}{\@yearofsubmission}
+\makeatother
 % % Header and footer
 \fancypagestyle{fancy}{%
    \fancyhf{}
@@ -107,8 +138,6 @@
 \renewcommand{\cftchappresnum}{BAB~}
 \renewcommand{\cftchapaftersnum}{\quad}

-% \titlespacing*{\chapter}{0pt}{-10pt}{20pt}
-
 % Chapter & Section format
 \renewcommand{\cftchapfont}{\normalsize\MakeUppercase}
 % \renewcommand{\cftsecfont}{}
@@ -130,11 +159,15 @@
 \setlength{\cftsubsecnumwidth}{2.5em}
 \setlength{\cftfignumwidth}{5em}
 \setlength{\cfttabnumwidth}{4em}
-\renewcommand \cftchapdotsep{1}           % Denser dots (closer together) https://tex.stackexchange.com/a/273764
-\renewcommand \cftsecdotsep{1}            % Apply to sections too
-\renewcommand \cftsubsecdotsep{1}         % Apply to subsections too
+\renewcommand \cftchapdotsep{1} % https://tex.stackexchange.com/a/273764
+\renewcommand \cftsecdotsep{1} % https://tex.stackexchange.com/a/273764
+\renewcommand \cftsubsecdotsep{1} % https://tex.stackexchange.com/a/273764
+\renewcommand \cftfigdotsep{1.5} % https://tex.stackexchange.com/a/273764
+\renewcommand \cfttabdotsep{1.5} % https://tex.stackexchange.com/a/273764
 \renewcommand{\cftchapleader}{\normalfont\cftdotfill{\cftsecdotsep}}
 \renewcommand{\cftchappagefont}{\normalfont}
+
+% Add Prefix in the Lof and LoT entries
 \renewcommand{\cftfigpresnum}{\figurename~}
 \renewcommand{\cfttabpresnum}{\tablename~}

@@ -159,6 +192,147 @@
 % \renewcommand{\cfttoctitlefont}{\bfseries\MakeUppercase}
 % \renewcommand{\cftaftertoctitle}{\vskip 2em}

+% Defines a new glossary called “notation”
+\newglossary[nlg]{notation}{not}{ntn}{Notation}
+
+% Define the header for the location column
+\providecommand*{\locationname}{Location}
+
+% Define the new glossary style called 'mylistalt' for main glossaries
+\makeatletter
+\newglossarystyle{mylistalt}{%
+  % start the list, initializing glossaries internals
+  \renewenvironment{theglossary}%
+    {\glslistinit\begin{enumerate}}%
+    {\end{enumerate}}%
+  % suppress all headers/groupskips
+  \renewcommand*{\glossaryheader}{}%
+  \renewcommand*{\glsgroupheading}[1]{}%
+  \renewcommand*{\glsgroupskip}{}%
+  % main entries: let \item produce "1." etc., then break
+  \renewcommand*{\glossentry}[2]{%
+    \item \glstarget{##1}{\glossentryname{##1}}%
+    \mbox{}\\
+    \glossentrydesc{##1}\space 
+    [##2] % appears on page x
+  }%
+  % sub-entries as separate paragraphs, still aligned
+  \renewcommand*{\subglossentry}[3]{%
+    \par
+    \glssubentryitem{##2}%
+    \glstarget{##2}{\strut}\space
+    \glossentrydesc{##2}\space ##3%
+  }%
+}
+
+
+% Define the new glossary style 'altlong3customheader' for notation
+\newglossarystyle{altlong3customheader}{%
+  % The glossary will be a longtable environment with three columns:
+  % 1. Symbol (left-aligned)
+  % 2. Description (paragraph, width \glsdescwidth)
+  % 3. Location (paragraph, width \glspagelistwidth)
+  \renewenvironment{theglossary}%
+    {\begin{longtable}{lp{\glsdescwidth}p{\glspagelistwidth}}}%
+    {\end{longtable}}%
+  % Define the table header row
+  \renewcommand*{\symbolname}{Simbol}
+  \renewcommand*{\descriptionname}{Keterangan}
+  \renewcommand*{\locationname}{Halaman}
+  \renewcommand*{\glossaryheader}{%
+    \bfseries\symbolname & \bfseries\descriptionname & \bfseries\locationname \tabularnewline\endhead}%
+  % Suppress group headings (e.g., A, B, C...)
+  \renewcommand*{\glsgroupheading}[1]{}%
+  % Define how a main glossary entry is displayed
+  % ##1 is the entry label
+  % ##2 is the location list (page numbers)
+  \renewcommand{\glossentry}[2]{%
+    \glsentryitem{##1}% Inserts entry number if entrycounter option is used
+    \glstarget{##1}{\glossentryname{##1}} & % Column 1: Symbol (with hyperlink target)
+    \glossentrydesc{##1}\glspostdescription & % Column 2: Description (with post-description punctuation)
+    ##2\tabularnewline % Column 3: Location list
+  }%
+  % Define how a sub-entry is displayed
+  % ##1 is the sub-entry level (e.g., 1 for first sub-level)
+  % ##2 is the entry label
+  % ##3 is the location list
+  \renewcommand{\subglossentry}[3]{%
+    & % Column 1 (Symbol) is left blank for sub-entries to create an indented look
+    \glssubentryitem{##2}% Inserts sub-entry number if subentrycounter is used
+    \glstarget{##2}{\strut}\glossentrydesc{##2}\glspostdescription & % Column 2: Description (target on strut for hyperlink)
+    ##3\tabularnewline % Column 3: Location list
+  }%
+  % Define the skip between letter groups (if group headings were enabled)
+  % For 3 columns, we need 2 ampersands for a full blank row if not using \multicolumn
+  \ifglsnogroupskip
+    \renewcommand*{\glsgroupskip}{}%
+  \else
+    \renewcommand*{\glsgroupskip}{& & \tabularnewline}%
+  \fi
+}
+
+% Define a new style 'supercol' based on 'super' for acronyms glossaries
+\newglossarystyle{supercol}{%
+  \setglossarystyle{super}% inherit everything from the original
+  % override just the main-entry format:
+  \renewcommand*{\glossentry}[2]{%
+    \glsentryitem{##1}%
+    \glstarget{##1}{\glossentryname{##1}}\space  % <-- added colon here
+    &: \glossentrydesc{##1}\glspostdescription\space ##2\tabularnewline
+  }%
+  % likewise for sub‐entries, if you want a colon there too:
+  \renewcommand*{\subglossentry}[3]{%
+    &: 
+    \glssubentryitem{##2}%
+    \glstarget{##2}{\strut}\glossentryname{##2}\space % <-- and here
+    \glossentrydesc{##2}\glspostdescription\space ##3\tabularnewline
+  }%
+}
+\makeatother
+
+% A new command that enables us to enter bi-lingual (Bahasa Indonesia and English) terms
+% syntax: \addterm[options]{label}{Bahasa Indonesia}{Bahasa Indonesia first use}{English}{Bahasa Indonesia
+% description}
+\newcommand{\addterm}[6][]{
+  \newglossaryentry{#2}{
+    name={#3 (angl.\ #5)},
+    first={#4 (\emph{#5})},
+    text={#3},
+    sort={#3},
+    description={#6},
+    #1 % pass additional options to \newglossaryentry
+  }
+}
+
+% A new command that enables us to enter (English) acronyms with bi-lingual
+% (Bahasa Indonesia and English) long versions
+% syntax: \addacronym[options]{label}{abbreviation}{Bahasa Indonesia long}{Bahasa Indonesia first
+% use long}{English long}{Bahasa Indonesia description}
+\newcommand{\addacronym}[7][]{
+  % Create the main glossary entry with \newacronym
+  % \newacronym[key-val list]{label}{abbrv}{long}
+  \newacronym[
+    name={#4 (angl.\ #6,\ #3)},
+    first={\emph{#5} (angl.\ \emph{#6},\ \emph{#3})},
+    sort={#4},
+    description={#7},
+    #1 % pass additional options to \newglossaryentry
+    ]
+    {#2}{#3}{#4}
+  % Create a cross-reference from the abbreviation to the main glossary entry by
+  % creating an auxiliary glossary entry (note: we set the label of this entry
+  % to '<original label>_auxiliary' to avoid clashes)
+  \newglossaryentry{#2_auxiliary}{
+    name={#3},
+    sort={#3},
+    description={\makefirstuc{#6}},
+    see=[See:]{#2}
+  }
+}
+
+% Change the text of the cross-reference links to the Bahasa Indonesia long version.
+\renewcommand*{\glsseeitemformat}[1]{\emph{\acrlong{#1}}.}
+
 % % Apply a custom fancyhdr layout only on the first page of each \chapter, and use no header/footer elsewhere
 % % \let\oldchapter\chapter
 % % \renewcommand{\chapter}{%
Author	SHA1	Message	Date
nuluh	3e2b153d11	refactor(stft): comment out unused imports and update SVM model loading for consistency	2025-07-28 05:22:24 +07:00
nuluh	3cbef17b0c	feat(model_selection): add timing for model training and validation processes	2025-07-28 05:20:10 +07:00
Rifqi D. Panuluh	80d4a66925	Merge pull request #100 from nuluh/feature/99-exp-alternative-undamage-case-data [EXP] Alterntive Undamage Case Data	2025-07-24 18:09:05 +07:00
nuluh	9b018efc15	refactor(notebooks): update STFT notebook to improve clarity and structure of sensor evaluation sections	2025-07-24 17:00:31 +07:00
nuluh	2fbdeac1eb	refactor(test): update import statement to use data_preprocessing module	2025-07-18 19:29:02 +07:00
nuluh	086032c250	refactor(notebooks): clean up to be more readable notebooks	2025-07-18 19:28:43 +07:00
nuluh	f6c71739df	refactor(ml): clean up model_selection.py by removing unused code and improving function structure	2025-07-18 19:27:46 +07:00
Rifqi D. Panuluh	2dc915949b	chore(.gitignore): add additional LaTeX file types to ignore list	2025-07-17 14:05:59 +00:00
nuluh	18824e05c0	refactor(ml): update inference calls to use new model structure and improve clarity	2025-07-17 00:18:01 +07:00
nuluh	2504157b29	feat(src): replace `convert.py` to `src/data_preprocessing.py` and fix some functions `prefix` parameter	2025-07-02 03:25:18 +07:00
nuluh	5ba628b678	refactor(src): make `compute_stft` and `process_damage_case` to be pure function that explicitly need STFT arguments to be passed	2025-07-01 14:32:52 +07:00
nuluh	a93adc8af3	feat(notebooks): minimize stft.ipynb notebooks and add STFT data preview plot. - Consolidated import statements for pandas and matplotlib. - Updated STFT plotting for Sensor 1 and Sensor 2 datasets with improved visualization using pcolormesh. - Enhanced subplot organization for better clarity in visual representation. - Added titles and adjusted layout for all plots.	2025-06-30 01:36:44 +07:00
nuluh	c2df42cc2b	feat(ml): add XGBoost model to inference options and update commented inference calls	2025-06-27 10:35:27 +07:00
nuluh	465ed121f9	feat(notebooks): training model with new alternative undamaged (label 0) data	2025-06-27 10:34:23 +07:00
nuluh	d6975b4817	feat(src): update damage base path and adjust test run logic for damage case processing for undamage case new method	2025-06-27 10:33:54 +07:00
nuluh	79070921d7	feat(data): add complement_pairs function to generate complement tuples for implementing alternative undamage case method	2025-06-27 10:33:36 +07:00
nuluh	e8eb07a91b	refactor(data): improve variable naming in generate_df_tuples function for clarity	2025-06-26 10:53:10 +07:00
nuluh	c98c6a091b	refactor(data): update generate_df_tuples function for improved readibility code	2025-06-26 10:51:29 +07:00
nuluh	9921d7663b	feat(src): add inference script for model evaluation	2025-06-24 14:08:38 +07:00
nuluh	459fbcc17a	refactor(notebooks): visualization for sensor analysis and streamline data processing	2025-06-24 14:08:02 +07:00
nuluh	5041ee3feb	feat(src): add confusion matrix plotting and label percentage calculation	2025-06-24 14:06:56 +07:00
nuluh	114ab849b9	feat(src): Add confusion matrix plotting function for model evaluation	2025-06-24 00:27:15 +07:00
nuluh	6196523ea0	feat(notebooks): Add confusion matrix plotting loop for Sensor 1 models	2025-06-21 01:10:03 +07:00
Rifqi D. Panuluh	46b66e0a90	Merge pull request #98 from nuluh/feat/53-feat-include-undamaged-node-classification Closes #53	2025-06-18 09:06:04 +07:00
nuluh	18892c1188	WIP(notebooks): Add SVM with StandardScaler and PCA to sensor model definitions	2025-06-18 08:31:55 +07:00
nuluh	d0b603ba9f	fix(data): Update DataProcessor instantiation for new data preprocessing implementation	2025-06-18 08:30:12 +07:00
nuluh	a7d8f1ef56	fix(data): Fix pool mapping to include undamaged case and add csv header separator line for Excel compatibility	2025-06-18 08:25:01 +07:00
nuluh	1164627bac	fix(data): Fix export_to_csv to adapt new added undamaged scenario and add new parameter `include_time` to include 'Time' data	2025-06-18 01:54:12 +07:00
nuluh	58a672a680	fix(data): Fix generate_df_tuples function output bug when special_groups args is passed	2025-06-17 13:20:27 +07:00
nuluh	24c1484300	feat(data): Enhance DataProcessor to support dynamic base path and improve data loading with error handling and memory efficiency	2025-06-16 17:35:27 +07:00
nuluh	60ff4e0fa9	feat(data): Propose new damage file index generation to improve structure and flexibility in DataFrame handling	2025-06-16 03:13:07 +07:00
nuluh	3e652accfb	refactor(data): remove unnecessary variable declaration in DataProcessor for loading dataframes	2025-06-14 04:02:42 +07:00
nuluh	66a09e0ddf	feat(data): Enhance damage file index generation with undamaged file handling and improved error management (WIP)	2025-06-14 04:02:42 +07:00
nuluh	195f8143f0	refactor(data): remove redundant column extraction method and simplify dataframe loading	2025-06-14 00:57:54 +07:00
nuluh	e7332252a6	Merge branch 'feat/90-feat-preserve-trained-model' into dev	2025-06-12 03:38:15 +07:00
nuluh	4b0819f94e	feat(notebooks): Enhance STFT notebook and model selection functionality - Updated paths in the STFT notebook to reflect new data files. - Improved plotting aesthetics for combined plots and added grid lines. - Introduced a 3D spectrogram visualization for better data representation. - Refactored model training function to include error handling and model export functionality. - Adjusted model training calls to include export paths for saved models. Closes #90 - Added additional markdown cells for better documentation and clarity in the notebook.	2025-06-12 03:35:21 +07:00
nuluh	7613c08ebd	feat(figures): add data preprocessing illustration diagram	2025-06-10 17:21:49 +07:00
nuluh	ad6cda4270	fix(notebooks): update sensor data paths and improve plotting aesthetics	2025-06-10 17:20:13 +07:00
nuluh	ebaa263781	chore(convert): comment out create_damage_files obsolete function	2025-06-09 18:59:51 +07:00
nuluh	f5dada1b9c	fix(latex): fix image path for flowchart in methodology section	2025-06-04 15:59:13 +07:00
nuluh	37c9a0765a	fix(documentclass): remove language option from biblatex package	2025-06-04 15:53:57 +07:00
nuluh	8656289a1c	chore(documentclass): comment out table of contents for temporary removal	2025-06-04 15:53:35 +07:00
nuluh	15fe8339ec	feat(documentclass): add new glossary for notation	2025-06-04 15:31:00 +07:00
nuluh	44210ef372	chore(latex): comment out maketitle inputs for temporary	2025-06-04 11:27:56 +07:00
nuluh	9192d4c81c	chore(documentclass): remove commented-out code for chapter formatting and header layout	2025-06-03 21:37:32 +07:00
nuluh	0373743ca7	fix(documentclass): enhance dot separation in ToC and add prefixes for figures and tables	2025-06-03 21:34:05 +07:00
nuluh	49d6395e6f	fix(documentclass): add missing \RequirePackage{titling} for maketitle formatting	2025-06-03 21:16:34 +07:00
nuluh	bf9cca2d90	feat(documentclass): redefine metadata information to main.tex by consdolidate internal command inside thesis.cls and remove metadata.tex Closes #96	2025-06-03 21:13:28 +07:00
nuluh	08420296e6	fix(documentclass): add missing \makeatother command to properly close the @ symbol	2025-06-03 20:59:11 +07:00
nuluh	1540213eec	feat(documentclass): add commands for bilingual terms and acronyms with custom glossary entries	2025-06-03 20:58:18 +07:00
nuluh	6fd4b7465e	feat(documentclass): add new glossary style 'supercol' for enhanced acronym formatting Closes #85	2025-06-03 20:55:26 +07:00
nuluh	85a0aebf36	feat(documentclass): add custom glossary style 'altlong3customheader' for notation with three-column layout Closes #95	2025-06-03 20:54:45 +07:00
nuluh	8d1edfdbf7	feat(glossaries): add glossary support with custom style for main glossaries entry and location header Closes 84	2025-06-03 20:52:54 +07:00
nuluh	ff862d9467	fix(documentclass): adjust page layout by increasing left margin to 4cm	2025-06-03 20:39:03 +07:00
nuluh	dfb64db1d8	feat(documentclass): add draft watermark and optional line numbering with 'draftmark' option	2025-06-03 20:37:29 +07:00
Rifqi D. Panuluh	3e3de577ba	Merge pull request #94 from nuluh/latex/91-bug-expose-maketitle Maketitle Replaced with \input for Flexibility when integrated with latexdiff-latexpand Workflow	2025-06-03 20:16:30 +07:00