refactor(stft): comment out unused imports and update SVM model loading for consistency

feat(model_selection): add timing for model training and validation processes
Merge pull request #100 from nuluh/feature/99-exp-alternative-undamage-case-data
2025-07-28 05:22:24 +07:00 · 2025-07-28 05:20:10 +07:00 · 2025-07-24 18:09:05 +07:00 · 2025-07-24 17:00:31 +07:00 · 2025-07-18 19:29:02 +07:00 · 2025-07-18 19:28:43 +07:00
20 changed files with 1762 additions and 1412 deletions
--- a/.github/workflows/latex-lint.yml
+++ b/.github/workflows/latex-lint.yml
@@ -1,52 +0,0 @@
-name: LaTeX Lint
-
-on:
-  push:
-    branches:
-      - main
-      - dev
-    paths:
-      - 'latex/**/*.tex'
-      - 'latex/main.tex'
-  workflow_dispatch:
-  
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Install chktex
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y chktex
-
-      - name: Run chktex inside latex/
-        working-directory: latex
-        run: |
-          TEX_FILES=$(find . -type f -name "*.tex")
-          if [ -z "$TEX_FILES" ]; then
-            echo "No .tex files found in latex/. Skipping lint."
-            exit 0
-          fi
-          
-          echo "🔍 Linting .tex files with chktex..."
-          FAIL=0
-          
-          for f in $TEX_FILES; do
-            echo "▶ Checking $f"
-            # Run chktex and show output; capture error status
-            if ! chktex "$f"; then
-              echo "::warning file=$f::ChkTeX found issues in $f"
-              FAIL=1
-            fi
-          done
-          
-          if [ $FAIL -ne 0 ]; then
-            echo "::error::❌ Lint errors or warnings were found in one or more .tex files above."
-            exit 1
-          else
-            echo "✅ All files passed chktex lint."
-          fi
--- a/.github/workflows/latexdiff.yml
+++ b/.github/workflows/latexdiff.yml
@@ -1,102 +0,0 @@
-name: LaTeX Diff
-
-on:
-  workflow_dispatch:
-    inputs:
-      base_branch:
-        description: 'Base branch (older version)'
-        required: true
-      compare_branch:
-        description: 'Compare branch (new version)'
-        required: true
-
-jobs:
-  latexdiff:
-    runs-on: ubuntu-latest
-    container:
-      image: ghcr.io/xu-cheng/texlive-full:latest
-      options: --user root
-
-    steps:
-      - name: Install latexpand (Perl script)
-        run: |
-          tlmgr init-usertree
-          tlmgr install latexpand
-        
-      - name: Checkout base branch
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.base_branch }}
-          path: base
-
-      - name: Checkout compare branch
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.compare_branch }}
-          path: compare
-
-
-      - name: Create output folder
-        run: mkdir -p diff_output
-
-      - name: Flatten base/main.tex (with latexpand)
-        run: |
-          cd base/latex
-          echo "📂 Listing files in base/latex:"
-          ls -R
-          echo "🔄 Flattening with latexpand..."
-          latexpand --verbose --keep-comments --output=../../diff_output/base_flat.tex main.tex
-          echo "✅ Preview of base_flat.tex:"
-          head -n 50 ../../diff_output/base_flat.tex
-
-
-      - name: Flatten compare/main.tex (with latexpand)
-        run: |
-          cd compare/latex
-          echo "📂 Listing files in compare/latex:"
-          ls -R
-          echo "🔄 Flattening with latexpand..."
-          latexpand --verbose --keep-comments --output=../../diff_output/compare_flat.tex main.tex
-          echo "✅ Preview of compare_flat.tex:"
-          head -n 50 ../../diff_output/compare_flat.tex
-
-      - name: Generate diff.tex using latexdiff
-        run: |
-          latexdiff diff_output/base_flat.tex diff_output/compare_flat.tex > diff_output/diff.tex
-            
-      - name: Copy thesis.cls to diff_output
-        run: cp compare/latex/thesis.cls diff_output/
-
-      - name: Copy chapters/img into diff_output
-        run: |
-          # Create the same chapters/img path inside diff_output
-          mkdir -p diff_output/chapters/img
-          # Copy all images from compare branch into diff_output
-          cp -R compare/latex/chapters/img/* diff_output/chapters/img/
-          
-      - name: Copy .bib files into diff_output
-        run: |
-          mkdir -p diff_output
-          cp compare/latex/*.bib diff_output/
-        
-      - name: Override “\input{preamble/fonts}” in diff.tex
-        run: |
-          sed -i "/\\input{preamble\/fonts}/c % — replaced by CI: use TeX Gyre fonts instead of Times New Roman\/Arial\n\\\setmainfont{TeX Gyre Termes}\n\\\setsansfont{TeX Gyre Heros}\n\\\setmonofont{TeX Gyre Cursor}" diff_output/diff.tex
-          
-      - name: Print preview of diff.tex (after font override)
-        run: |
-          echo "📄 Preview of diff_output/diff.tex after font override:"
-          head -n 50 diff_output/diff.tex
-        
-      - name: Compile diff.tex to PDF
-        working-directory: diff_output
-        continue-on-error: true
-        run: |
-          xelatex -interaction=nonstopmode diff.tex
-          xelatex -interaction=nonstopmode diff.tex
-
-      - name: Upload diff output files
-        uses: actions/upload-artifact@v4
-        with:
-          name: latex-diff-output
-          path: diff_output/
--- a/.github/workflows/latexmk.yml
+++ b/.github/workflows/latexmk.yml
@@ -1,29 +0,0 @@
-name: Render XeLaTeX on PR to dev
-
-on:
-  pull_request:
-    branches:
-      - dev
-
-jobs:
-  build-pdf:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Compile XeLaTeX
-        uses: dante-ev/latex-action@2021-A
-        with:
-          root_file: main.tex
-          working_directory: latex
-          compiler: xelatex
-          args: -interaction=nonstopmode -halt-on-error -file-line-error
-          extra_system_packages: "fonts-freefont-otf"
-
-      - name: Upload compiled PDF
-        uses: actions/upload-artifact@v4
-        with:
-          name: compiled-pdf
-          path: latex/main.pdf
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,16 @@ data/**/*.csv
 .venv/
 *.pyc
 *.egg-info/
+
+# Latex
+*.aux
+*.log
+*.out
+*.toc
+*.bbl
+*.blg
+*.fdb_latexmk
+*.fls
+*.synctex.gz
+*.dvi
+
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,7 @@
 {
-  "python.analysis.extraPaths": ["./code/src/features"],
+  "python.analysis.extraPaths": [
+    "./code/src/features",
+    "${workspaceFolder}/code/src"
+  ],
  "jupyter.notebookFileRoot": "${workspaceFolder}/code"
 }
--- a/README.md
+++ b/README.md
@@ -4,14 +4,20 @@ This repository contains the work related to my thesis, which focuses on damage

 **Note:** This repository does not contain the secondary data used in the analysis. The code is designed to work with data from the [QUGS (Qatar University Grandstand Simulator)](https://www.structuralvibration.com/benchmark/qugs/) dataset, which is not included here.

+The repository is private and access is restricted only to those who have been given explicit permission by the owner. Access is provided solely for the purpose of brief review or seeking technical guidance.
+
 ## Restrictions

 - **No Derivative Works or Cloning:** Any form of copying, cloning, or creating derivative works based on this repository is strictly prohibited.
+- **Limited Access:** Use beyond brief review or collaboration is not allowed without prior permission from the owner.
+
+---
+
+All contents of this repository, including the thesis idea, code, and associated data, are copyrighted © 2024 by Rifqi Panuluh. Unauthorized use or duplication is prohibited.

 [LICENSE](https://github.com/nuluh/thesis?tab=License-1-ov-file#readme)

 ## How to Run `stft.ipynb`

 1. run `pip install -e .` in root project first
-
 2. run the notebook
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
--- a/code/src/data_preprocessing.py
+++ b/code/src/data_preprocessing.py
@@ -0,0 +1,357 @@
+import pandas as pd
+import os
+import re
+import sys
+import numpy as np
+from colorama import Fore, Style, init
+from typing import TypedDict, Dict, List
+from joblib import load
+from pprint import pprint
+
+# class DamageFilesIndices(TypedDict):
+#     damage_index: int
+#     files: list[int]
+OriginalSingleDamageScenarioFilePath = str
+DamageScenarioGroupIndex = int
+OriginalSingleDamageScenario = pd.DataFrame
+SensorIndex = int
+VectorColumnIndex = List[SensorIndex]
+VectorColumnIndices = List[VectorColumnIndex]
+DamageScenarioGroup = List[OriginalSingleDamageScenario]
+GroupDataset = List[DamageScenarioGroup]
+
+
+class DamageFilesIndices(TypedDict):
+    damage_index: int
+    files: List[str]
+
+def complement_pairs(n, prefix, extension):
+    """
+    Return the four complement tuples for zzzBD<n>.TXT
+    """
+    filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
+    orig_a   = (n - 1) % 5 + 1                # 1 … 5
+    for a in range(1, 6):              # a = 1 … 5
+        if a != orig_a:                # skip original a
+            yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
+
+def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset, 
+                      group_size=5, special_groups=None, group=True):
+    """
+    Generate a structured list of tuples containing DataFrame references and column indices.
+    
+    Parameters:
+    -----------
+    total_dfs : int, default 30
+        Total number of DataFrames to include in the tuples
+    group_size : int, default 5
+        Number of DataFrames in each group (determines the pattern repeat)
+    prefix : str
+        Prefix for DataFrame variable names
+    first_col_start : int, default 1
+        Starting value for the first column index (1-indexed)
+    last_col_offset : int, default 25
+        Offset to add to first_col_start to get the last column index
+    special_groups : list of dict, optional
+        List of special groups to insert, each dict should contain:
+        - 'df_name': The DataFrame name to use for all tuples in this group
+        - 'position': Where to insert this group (0 for beginning)
+        - 'size': Size of this group (default: same as group_size)
+    
+    Returns:
+    --------
+    list
+        List of tuples, where each tuple contains (df_name, [first_col, last_col])
+    """
+    result = []
+    if group:
+        # Group tuples into sublists of group_size
+        for g in range(6):                # TODO: shouldnt be hardcoded
+            group = []
+            for i in range(1, 6):         # TODO: shouldnt be hardcoded
+                n = g * 5 + i
+                bottom_end = i                           # 1, 2, 3, 4, 5
+                top_end = bottom_end + 25                # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
+                group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
+            result.append(group)
+
+    # Add special groups at specified positions (other than beginning)
+    if special_groups:
+        result.insert(0, special_groups)
+    
+    
+    return result
+
+
+    # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
+    # df = pd.read_csv(file_path, sep="\t", skiprows=10)  # Read with explicit column names
+
+
+class DataProcessor:
+    def __init__(self, file_index, cache_path: str = None, base_path: str = None, include_time: bool = False):
+        self.file_index = file_index
+        self.base_path = base_path
+        self.include_time = include_time
+        if cache_path:
+            self.data = load(cache_path)
+        else:
+            self.data = self.load_data()
+
+    def load_data(self):
+        for idxs, group in enumerate(self.file_index):
+            for idx, tuple in enumerate(group):
+                file_path = os.path.join(self.base_path, tuple[0]) # ('zzzAD1.TXT')
+                if self.include_time:
+                    col_indices = [0] + tuple[1]  # [1, 26] + [0] -> [0, 1, 26]
+                else:
+                    col_indices = tuple[1] # [1, 26]
+                try:
+                    # Read the CSV file
+                    df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
+                    self.file_index[idxs][idx] = df.iloc[:, col_indices].copy()  # Extract the specified columns
+                    
+                    print(f"Processed {file_path}, extracted columns: {col_indices}")
+                    
+                except Exception as e:
+                    print(f"Error processing {file_path}: {str(e)}")
+    def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
+        """
+        Loads a single data file into a pandas DataFrame.
+
+        :param file_path: Path to the data file.
+        :return: DataFrame containing the numerical data.
+        """
+        df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True, nrows=1)
+        return df
+
+    def _load_all_data(self) -> GroupDataset:
+        """
+        Loads all data files based on the grouping dictionary and returns a nested list.
+
+        :return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
+        """
+        data = []
+        # Find the maximum group index to determine the list size
+        max_group_idx = len(self.file_index) if self.file_index else 0
+
+        # Handle case when file_index is empty
+        if max_group_idx == 0:
+            raise ValueError("No file index provided; file_index is empty.")
+
+        # Initialize empty lists
+        for _ in range(max_group_idx):
+            data.append([])
+
+        # Fill the list with data
+        for group_idx, file_list in self.file_index.items():
+            group_idx -= 1 # adjust due to undamage file
+            data[group_idx] = [self._load_dataframe(file) for file in file_list]
+        return data
+
+    def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
+        """
+        Returns the list of DataFrames for the given group index.
+
+        :param group_idx: Index of the group.
+        :return: List of DataFrames.
+        """
+        return self.data.get([group_idx, []])
+
+    def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
+        """
+        Returns the column names for the given group and file indices.
+
+        :param group_idx: Index of the group.
+        :param file_idx: Index of the file in the group.
+        :return: List of column names.
+        """
+        if group_idx in self.data and len(self.data[group_idx]) > file_idx:
+            return self.data[group_idx][file_idx].columns.tolist()
+        return []
+
+    def get_data_info(self):
+        """
+        Print information about the loaded data structure.
+        Adapted for when self.data is a List instead of a Dictionary.
+        """
+        if isinstance(self.data, list):
+            # For each sublist in self.data, get the type names of all elements
+            pprint(
+                [
+                    (
+                        [type(item).__name__ for item in sublist]
+                        if isinstance(sublist, list)
+                        else type(sublist).__name__
+                    )
+                    for sublist in self.data
+                ]
+            )
+        else:
+            pprint(
+                {
+                    key: [type(df).__name__ for df in value]
+                    for key, value in self.data.items()
+                }
+                if isinstance(self.data, dict)
+                else type(self.data).__name__
+            )
+
+    def _create_vector_column_index(self) -> VectorColumnIndices:
+        vector_col_idx: VectorColumnIndices = []
+        y = 0
+        for data_group in self.data:  # len(data_group[i]) = 5
+            for j in data_group:  # len(j[i]) =
+                c: VectorColumnIndex = []
+                x = 0
+                for _ in range(6):  # TODO: range(6) should be dynamic and parameterized
+                    c.append(x + y)
+                    x += 5
+                vector_col_idx.append(c)
+                y += 1
+            return vector_col_idx # TODO: refactor this so that it returns just from first data_group without using for loops through the self.data that seems unnecessary
+
+    def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
+        """
+        Create a vector column from the loaded data.
+
+        :param overwrite: Overwrite the original data with vector column-based data.
+        """
+        idxs = self._create_vector_column_index()
+        for i, group in enumerate(self.data):
+            # add 1 to all indices to account for 'Time' being at position 0
+            for j, df in enumerate(group):
+                idx = [_ + 1 for _ in idxs[j]]
+                # slice out the desired columns, copy into a fresh DataFrame,
+                # then overwrite self.data[i][j] with it
+                self.data[i][j] = df.iloc[:, idx].copy()
+
+            # TODO: if !overwrite:
+
+    def create_limited_sensor_vector_column(self, overwrite=True):
+        """
+        Create a vector column from the loaded data.
+
+        :param overwrite: Overwrite the original data with vector column-based data.
+        """
+        idx = self._create_vector_column_index()
+        # if overwrite:
+        for i in range(len(self.data)):  # damage(s)
+            for j in range(len(self.data[i])):  # col(s)
+                # Get the appropriate indices for slicing from idx
+                indices = idx[j]
+
+                # Get the current DataFrame
+                df = self.data[i][j]
+
+                # Keep the 'Time' column and select only specifid 'Real' colmns
+                # First, we add 1 to all indices to acount for 'Time' being at positiion 0
+                real_indices = [index + 1 for index in indices]
+
+                # Create list with Time column index (0) and the adjustedd Real indices
+                all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
+
+                # Apply the slicing
+                self.data[i][j] = df.iloc[:, all_indices]
+        # TODO: if !overwrite:
+
+    def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
+        """
+        Export the processed data to CSV files in the required folder structure.
+
+        :param output_dir: Directory to save the CSV files.
+        :param file_prefix: Prefix for the output filenames.
+        """
+        for group_idx, group in enumerate(self.file_index, start=0):
+            group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
+            os.makedirs(group_folder, exist_ok=True)
+
+            for test_idx, df in enumerate(group, start=1):
+                out1 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv")
+                cols_to_export = [0, 1] if self.include_time else [1]
+                df.iloc[:, cols_to_export].to_csv(out1, index=False)
+
+                out2 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv")
+                cols_to_export = [0, 2] if self.include_time else [2]
+                df.iloc[:, cols_to_export].to_csv(out2, index=False)
+
+# def create_damage_files(base_path, output_base, prefix):
+#     # Initialize colorama
+#     init(autoreset=True)
+
+#     # Generate column labels based on expected duplication in input files
+#     columns = ["Real"] + [
+#         f"Real.{i}" for i in range(1, 30)
+#     ]  # Explicitly setting column names
+
+#     sensor_end_map = {
+#         1: "Real.25",
+#         2: "Real.26",
+#         3: "Real.27",
+#         4: "Real.28",
+#         5: "Real.29",
+#     }
+
+#     # Define the damage scenarios and the corresponding original file indices
+#     damage_scenarios = {
+#         1: range(1, 6),  # Damage 1 files from zzzAD1.csv to zzzAD5.csv
+#         2: range(6, 11),  # Damage 2 files from zzzAD6.csv to zzzAD10.csv
+#         3: range(11, 16),  # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
+#         4: range(16, 21),  # Damage 4 files from zzzAD16.csv to zzzAD20.csv
+#         5: range(21, 26),  # Damage 5 files from zzzAD21.csv to zzzAD25.csv
+#         6: range(26, 31),  # Damage 6 files from zzzAD26.csv to zzzAD30.csv
+#     }
+#     damage_pad = len(str(len(damage_scenarios)))
+#     test_pad = len(str(30))
+
+#     for damage, files in damage_scenarios.items():
+#         for i, file_index in enumerate(files, start=1):
+#             # Load original data file
+#             file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
+#             df = pd.read_csv(
+#                 file_path, sep="\t", skiprows=10
+#             )  # Read with explicit column names
+
+#             top_sensor = columns[i - 1]
+#             print(top_sensor, type(top_sensor))
+#             output_file_1 = os.path.join(
+#                 output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
+#             )
+#             print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
+#             print("Taking datetime column on index 0...")
+#             print(f"Taking `{top_sensor}`...")
+#             os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
+#             df[["Time", top_sensor]].to_csv(output_file_1, index=False)
+#             print(Fore.GREEN + "Done")
+
+#             bottom_sensor = sensor_end_map[i]
+#             output_file_2 = os.path.join(
+#                 output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
+#             )
+#             print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
+#             print("Taking datetime column on index 0...")
+#             print(f"Taking `{bottom_sensor}`...")
+#             os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
+#             df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
+#             print(Fore.GREEN + "Done")
+#             print("---")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python convert.py <path_to_csv_files>")
+        sys.exit(1)
+
+    base_path = sys.argv[1]
+    output_base = sys.argv[2]
+    prefix = sys.argv[3]  # Define output directory
+
+    # Create output folders if they don't exist
+    # for i in range(1, 7):
+    #     os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
+
+    create_damage_files(base_path, output_base, prefix)
+    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
+
+
+if __name__ == "__main__":
+    main()
--- a/code/src/ml/inference.py
+++ b/code/src/ml/inference.py
@@ -0,0 +1,16 @@
+from src.ml.model_selection import inference_model
+from joblib import load
+
+x = 30
+file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT"
+sensor = 1
+model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib", 
+        "SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib",
+        "XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"}
+
+index = ((x-1) % 5) + 1
+inference_model(model["SVM"], file, column_question=index)
+print("---")
+inference_model(model["SVM with PCA"], file, column_question=index)
+print("---")
+inference_model(model["XGBoost"], file, column_question=index)
--- a/code/src/ml/model_selection.py
+++ b/code/src/ml/model_selection.py
@@ -1,13 +1,14 @@
 import numpy as np
 import pandas as pd
 import os
-from sklearn.model_selection import train_test_split as sklearn_split
-
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+from joblib import load

 def create_ready_data(
    stft_data_path: str,
    stratify: np.ndarray = None,
-) -> tuple:
+) -> tuple[pd.DataFrame, np.ndarray]:
    """
    Create a stratified train-test split from STFT data.

@@ -21,13 +22,13 @@ def create_ready_data(
    Returns:
    --------
    tuple
-        (X_train, X_test, y_train, y_test) - Split datasets
+        (pd.DataFrame, np.ndarray) - Combined data and corresponding labels
    """
    ready_data = []
    for file in os.listdir(stft_data_path):
-        ready_data.append(pd.read_csv(os.path.join(stft_data_path, file)))
+        ready_data.append(pd.read_csv(os.path.join(stft_data_path, file), skiprows=1))

-    y_data = [i for i in range(len(ready_data))]
+    y_data = [i for i in range(len(ready_data))] # TODO: Should be replaced with actual desired labels

    # Combine all dataframes in ready_data into a single dataframe
    if ready_data:  # Check if the list is not empty
@@ -55,3 +56,216 @@ def create_ready_data(
        y = np.array([])

    return X, y
+
+
+def train_and_evaluate_model(
+    model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None
+):
+    """
+    Train a machine learning model, evaluate its performance, and optionally export it.
+
+    This function trains the provided model on the training data, evaluates its
+    performance on test data using accuracy score, and can save the trained model
+    to disk if an export path is provided.
+
+    Parameters
+    ----------
+    model : estimator object
+        The machine learning model to train.
+    model_name : str
+        Name of the model, used for the export filename and in the returned results.
+    sensor_label : str
+        Label identifying which sensor's data the model is being trained on.
+    x_train : array-like or pandas.DataFrame
+        The training input samples.
+    y_train : array-like
+        The target values for training.
+    x_test : array-like or pandas.DataFrame
+        The test input samples.
+    y_test : array-like
+        The target values for testing.
+    export : str, optional
+        Directory path where the trained model should be saved. If None, model won't be saved.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+        - 'model': model_name (str)
+        - 'sensor': sensor_label (str)
+        - 'accuracy': accuracy percentage (float)
+
+    Example
+    -------
+    >>> from sklearn.svm import SVC
+    >>> from sklearn.model_selection import train_test_split
+    >>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
+    >>> result = train_and_evaluate_model(
+    ...     SVC(),
+    ...     "SVM",
+    ...     "sensor1",
+    ...     X_train,
+    ...     y_train,
+    ...     X_test,
+    ...     y_test,
+    ...     export="models/sensor1"
+    ... )
+    >>> print(f"Model accuracy: {result['accuracy']:.2f}%")
+    """
+    from sklearn.metrics import accuracy_score
+
+    result = {"model": model_name, "sensor": sensor_label, "success": False}
+
+    try:
+        import time
+        start_time = time.time()
+
+        # Train the model
+        model.fit(x_train, y_train)
+
+        result["elapsed_time_training"] = time.time() - start_time
+        try:
+            # Predict on the test set (validation)
+            start_time = time.time()
+    
+            y_pred = model.predict(x_test)
+
+            result["elapsed_time_validation"] = time.time() - start_time
+            result["y_pred"] = y_pred  # Convert to numpy array
+        except Exception as e:
+            result["error"] = f"Prediction error: {str(e)}"
+            return result
+
+        # Calculate accuracy
+        try:
+            accuracy = accuracy_score(y_test, y_pred) * 100
+            result["accuracy"] = accuracy
+        except Exception as e:
+            result["error"] = f"Accuracy calculation error: {str(e)}"
+            return result
+
+        # Export model if requested
+        if export:
+            try:
+                import joblib
+
+                full_path = os.path.join(export, f"{model_name}.joblib")
+                os.makedirs(os.path.dirname(full_path), exist_ok=True)
+                joblib.dump(model, full_path)
+                print(f"Model saved to {full_path}")
+            except Exception as e:
+                print(f"Warning: Failed to export model to {export}: {str(e)}")
+                result["export_error"] = str(e)
+                # Continue despite export error
+
+        result["success"] = True
+        return result
+
+    except Exception as e:
+        result["error"] = f"Training error: {str(e)}"
+        return result
+def plot_confusion_matrix(results_sensor, y_test, title):
+    """
+    Plot confusion matrices for each model in results_sensor1.
+
+    Parameters:
+    -----------
+    results_sensor1 : list
+        List of dictionaries containing model results.
+    x_test1 : array-like
+        Test input samples.
+    y_test : array-like
+        True labels for the test samples.
+
+    Returns:
+    --------
+    None
+    This function will display confusion matrices for each model in results_sensor1.
+
+    Example
+    -------
+    >>> results_sensor1 = [
+    ...     {'model': 'model1', 'accuracy': 95.0},
+    ...     {'model': 'model2', 'accuracy': 90.0}
+    ... ]
+    >>> x_test1 = np.random.rand(100, 10)  # Example test data
+    >>> y_test = np.random.randint(0, 2, size=100)  # Example true labels
+    >>> plot_confusion_matrix(results_sensor1, x_test1, y_test)
+    """
+    # Iterate through each model result and plot confusion matrix
+    for i in results_sensor:
+        model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib")
+        cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray
+
+        # get the class labels
+        labels = model.classes_
+        # Plot
+        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
+        disp.plot(cmap=plt.cm.Blues)  # You can change colormap
+        plt.title(f"{title}")
+
+def calculate_label_percentages(labels):
+    """
+    Calculate and print the percentage distribution of unique labels in a numpy array.
+
+    Parameters:
+        labels (np.array): Input array of labels.
+
+    Returns:
+        None
+    """
+    # Count occurrences of each unique label
+    unique, counts = np.unique(labels, return_counts=True)
+
+    # Calculate percentages
+    percentages = (counts / len(labels)) * 100
+
+    # Build and print the result string
+    result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)])
+    return print(result)
+
+def inference_model(
+    models, raw_file, column_question: int = None
+):
+    """
+    Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid.
+
+    Parameters
+    ----------
+    model : dict with some exported model path
+        The trained machine learning model to use for inference.
+    x_test : array-like or pandas.DataFrame
+        The input samples for which predictions are to be made.
+    export : str, optional
+        Directory path where the predictions should be saved. If None, predictions won't be saved.
+
+    Returns
+    -------
+    np.ndarray
+        Array of predicted values.
+
+    Example
+    -------
+    >>> from sklearn.svm import SVC
+    >>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"}
+    >>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1)
+    """
+    df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
+    col_idx = []
+    for i in range(1,6):
+        idx = [i, i+5, i+10, i+15, i+20, i+25]
+        col_idx.append(idx)
+    vibration_data = df.iloc[:, column_question].values
+    # Perform STFT
+    from scipy.signal import stft, hann
+    freq, times, Zxx = stft(
+                            vibration_data, 
+                            fs=1024, 
+                            window=hann(1024), 
+                            nperseg=1024, 
+                            noverlap=1024-512
+                            )
+    data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
+    data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
+    model = load(models)  # Load the model from the provided path
+    return calculate_label_percentages(model.predict(data.iloc[:21,:]))
--- a/code/src/process_stft.py
+++ b/code/src/process_stft.py
@@ -25,13 +25,10 @@ window = hann(window_size)
 Fs = 1024

 # Number of damage cases (adjust as needed)
-num_damage_cases = 6  # Change to 30 if you have 30 damage cases
-
-# Number of test runs per damage case
-num_test_runs = 5
+num_damage_cases = 0  # Change to 30 if you have 30 damage cases

 # Function to perform STFT and return magnitude
-def compute_stft(vibration_data):
+def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size):
    frequencies, times, Zxx = stft(
        vibration_data, 
        fs=Fs, 
@@ -42,9 +39,13 @@ def compute_stft(vibration_data):
    stft_magnitude = np.abs(Zxx)
    return stft_magnitude.T  # Transpose to have frequencies as columns

-def process_damage_case(damage_num):
+def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs):
    damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
-    
+    if damage_num == 0:
+        # Number of test runs per damage case
+        num_test_runs = 120
+    else:
+        num_test_runs = 5
    # Check if the damage folder exists
    if not os.path.isdir(damage_folder):
        print(f"Folder {damage_folder} does not exist. Skipping...")
@@ -79,20 +80,29 @@ def process_damage_case(damage_num):
                print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
                continue
            
-            # Extract vibration data (assuming the second column is sensor data)
            vibration_data = df.iloc[:, 1].values
            
            # Perform STFT
-            stft_magnitude = compute_stft(vibration_data)
+            stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size)
            
            # Convert STFT result to DataFrame
            df_stft = pd.DataFrame(
                stft_magnitude, 
                columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
            )
+            # only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
+            if damage_num == 0:
+                print(f"Processing damage_num = 0, test_num = {test_num}")
+                if test_num <= 45:
+                    df_stft = df_stft.iloc[:22, :]
+                    print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")
+                else:
+                    df_stft = df_stft.iloc[:21, :]
+                    print(f"Reduced df_stft shape (22 samples): {df_stft.shape}")
            
            # Append to the aggregated list
            aggregated_stft.append(df_stft)
+            print(sum(df.shape[0] for df in aggregated_stft))
        
        # Concatenate all STFT DataFrames vertically
        if aggregated_stft:
@@ -105,11 +115,13 @@ def process_damage_case(damage_num):
            )
            
            # Save the aggregated STFT to CSV
-            df_aggregated.to_csv(output_file, index=False)
+            with open(output_file, 'w') as file:
+                file.write('sep=,\n')
+                df_aggregated.to_csv(output_file, index=False)
            print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
        else:
            print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")

 if __name__ == "__main__":  # Added main guard for multiprocessing
    with multiprocessing.Pool() as pool:
-        pool.map(process_damage_case, range(1, num_damage_cases + 1))
+        pool.map(process_damage_case, range(0, num_damage_cases + 1))
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -1,360 +0,0 @@
-import pandas as pd
-import os
-import re
-import sys
-import numpy as np
-from colorama import Fore, Style, init
-from typing import TypedDict, Dict, List
-from joblib import load
-from pprint import pprint
-
-# class DamageFilesIndices(TypedDict):
-#     damage_index: int
-#     files: list[int]
-OriginalSingleDamageScenarioFilePath = str
-DamageScenarioGroupIndex = int
-OriginalSingleDamageScenario = pd.DataFrame
-SensorIndex = int
-VectorColumnIndex = List[SensorIndex]
-VectorColumnIndices = List[VectorColumnIndex]
-DamageScenarioGroup = List[OriginalSingleDamageScenario]
-GroupDataset = List[DamageScenarioGroup]
-
-
-class DamageFilesIndices(TypedDict):
-    damage_index: int
-    files: List[str]
-
-
-def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
-    prefix: str = kwargs.get("prefix", "zzzAD")
-    extension: str = kwargs.get("extension", ".TXT")
-    num_damage: int = kwargs.get("num_damage")
-    file_index_start: int = kwargs.get("file_index_start")
-    col: int = kwargs.get("col")
-    base_path: str = kwargs.get("base_path")
-
-    damage_scenarios = {}
-    a = file_index_start
-    b = col + 1
-    for i in range(1, num_damage + 1):
-        damage_scenarios[i] = range(a, b)
-        a += col
-        b += col
-
-    # return damage_scenarios
-
-    x = {}
-    for damage, files in damage_scenarios.items():
-        x[damage] = []  # Initialize each key with an empty list
-        for i, file_index in enumerate(files, start=1):
-            if base_path:
-                x[damage].append(
-                    os.path.normpath(
-                        os.path.join(base_path, f"{prefix}{file_index}{extension}")
-                    )
-                )
-                # if not os.path.exists(file_path):
-                #     print(Fore.RED + f"File {file_path} does not exist.")
-                #     continue
-            else:
-                x[damage].append(f"{prefix}{file_index}{extension}")
-    return x
-
-    # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
-    # df = pd.read_csv( file_path, sep="\t", skiprows=10)  # Read with explicit column names
-
-
-class DataProcessor:
-    def __init__(self, file_index: DamageFilesIndices, cache_path: str = None):
-        self.file_index = file_index
-        if cache_path:
-            self.data = load(cache_path)
-        else:
-            self.data = self._load_all_data()
-
-    def _extract_column_names(self, file_path: str) -> List[str]:
-        """
-        Extracts column names from the header of the given file.
-        Assumes the 6th line contains column names.
-
-        :param file_path: Path to the data file.
-        :return: List of column names.
-        """
-        with open(file_path, "r") as f:
-            header_lines = [next(f) for _ in range(12)]
-
-        # Extract column names from the 6th line
-        channel_line = header_lines[10].strip()
-        tokens = re.findall(r'"([^"]+)"', channel_line)
-        if not channel_line.startswith('"'):
-            first_token = channel_line.split()[0]
-            tokens = [first_token] + tokens
-
-        return tokens  # Prepend 'Time' column if applicable
-
-    def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
-        """
-        Loads a single data file into a pandas DataFrame.
-
-        :param file_path: Path to the data file.
-        :return: DataFrame containing the numerical data.
-        """
-        col_names = self._extract_column_names(file_path)
-        df = pd.read_csv(
-            file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
-        )
-        df.columns = col_names
-        return df
-
-    def _load_all_data(self) -> GroupDataset:
-        """
-        Loads all data files based on the grouping dictionary and returns a nested list.
-
-        :return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
-        """
-        data = []
-        # Find the maximum group index to determine the list size
-        max_group_idx = max(self.file_index.keys()) if self.file_index else 0
-
-        # Initialize empty lists
-        for _ in range(max_group_idx):
-            data.append([])
-
-        # Fill the list with data
-        for group_idx, file_list in self.file_index.items():
-            # Adjust index to be 0-based
-            list_idx = group_idx - 1
-            data[list_idx] = [self._load_dataframe(file) for file in file_list]
-
-        return data
-
-    def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
-        """
-        Returns the list of DataFrames for the given group index.
-
-        :param group_idx: Index of the group.
-        :return: List of DataFrames.
-        """
-        return self.data.get([group_idx, []])
-
-    def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
-        """
-        Returns the column names for the given group and file indices.
-
-        :param group_idx: Index of the group.
-        :param file_idx: Index of the file in the group.
-        :return: List of column names.
-        """
-        if group_idx in self.data and len(self.data[group_idx]) > file_idx:
-            return self.data[group_idx][file_idx].columns.tolist()
-        return []
-
-    def get_data_info(self):
-        """
-        Print information about the loaded data structure.
-        Adapted for when self.data is a List instead of a Dictionary.
-        """
-        if isinstance(self.data, list):
-            # For each sublist in self.data, get the type names of all elements
-            pprint(
-                [
-                    (
-                        [type(item).__name__ for item in sublist]
-                        if isinstance(sublist, list)
-                        else type(sublist).__name__
-                    )
-                    for sublist in self.data
-                ]
-            )
-        else:
-            pprint(
-                {
-                    key: [type(df).__name__ for df in value]
-                    for key, value in self.data.items()
-                }
-                if isinstance(self.data, dict)
-                else type(self.data).__name__
-            )
-
-    def _create_vector_column_index(self) -> VectorColumnIndices:
-        vector_col_idx: VectorColumnIndices = []
-        y = 0
-        for data_group in self.data:  # len(data_group[i]) = 5
-            for j in data_group:  # len(j[i]) =
-                c: VectorColumnIndex = []  # column vector c_{j}
-                x = 0
-                for _ in range(6):  # TODO: range(6) should be dynamic and parameterized
-                    c.append(x + y)
-                    x += 5
-                vector_col_idx.append(c)
-                y += 1
-            return vector_col_idx
-
-    def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
-        """
-        Create a vector column from the loaded data.
-
-        :param overwrite: Overwrite the original data with vector column-based data.
-        """
-        idx = self._create_vector_column_index()
-        # if overwrite:
-        for i in range(len(self.data)):
-            for j in range(len(self.data[i])):
-                # Get the appropriate indices for slicing from idx
-                indices = idx[j]
-
-                # Get the current DataFrame
-                df = self.data[i][j]
-
-                # Keep the 'Time' column and select only specified 'Real' columns
-                # First, we add 1 to all indices to account for 'Time' being at position 0
-                real_indices = [index + 1 for index in indices]
-
-                # Create list with Time column index (0) and the adjusted Real indices
-                all_indices = [0] + real_indices
-
-                # Apply the slicing
-                self.data[i][j] = df.iloc[:, all_indices]
-        # TODO: if !overwrite:
-
-    def create_limited_sensor_vector_column(self, overwrite=True):
-        """
-        Create a vector column from the loaded data.
-
-        :param overwrite: Overwrite the original data with vector column-based data.
-        """
-        idx = self._create_vector_column_index()
-        # if overwrite:
-        for i in range(len(self.data)):  # damage(s)
-            for j in range(len(self.data[i])):  # col(s)
-                # Get the appropriate indices for slicing from idx
-                indices = idx[j]
-
-                # Get the current DataFrame
-                df = self.data[i][j]
-
-                # Keep the 'Time' column and select only specifid 'Real' colmns
-                # First, we add 1 to all indices to acount for 'Time' being at positiion 0
-                real_indices = [index + 1 for index in indices]
-
-                # Create list with Time column index (0) and the adjustedd Real indices
-                all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
-
-                # Apply the slicing
-                self.data[i][j] = df.iloc[:, all_indices]
-        # TODO: if !overwrite:
-
-    def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
-        """
-        Export the processed data to CSV files in the required folder structure.
-
-        :param output_dir: Directory to save the CSV files.
-        :param file_prefix: Prefix for the output filenames.
-        """
-        for group_idx, group in enumerate(self.data, start=1):
-            group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
-            os.makedirs(group_folder, exist_ok=True)
-            for test_idx, df in enumerate(group, start=1):
-                # Ensure columns are named uniquely if duplicated
-                df = df.copy()
-                df.columns = ["Time", "Real_0", "Real_1"]  # Rename
-
-                # Export first Real column
-                out1 = os.path.join(
-                    group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
-                )
-                df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
-                    out1, index=False
-                )
-
-                # Export last Real column
-                out2 = os.path.join(
-                    group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
-                )
-                df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
-                    out2, index=False
-                )
-
-
-def create_damage_files(base_path, output_base, prefix):
-    # Initialize colorama
-    init(autoreset=True)
-
-    # Generate column labels based on expected duplication in input files
-    columns = ["Real"] + [
-        f"Real.{i}" for i in range(1, 30)
-    ]  # Explicitly setting column names
-
-    sensor_end_map = {
-        1: "Real.25",
-        2: "Real.26",
-        3: "Real.27",
-        4: "Real.28",
-        5: "Real.29",
-    }
-
-    # Define the damage scenarios and the corresponding original file indices
-    damage_scenarios = {
-        1: range(1, 6),  # Damage 1 files from zzzAD1.csv to zzzAD5.csv
-        2: range(6, 11),  # Damage 2 files from zzzAD6.csv to zzzAD10.csv
-        3: range(11, 16),  # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
-        4: range(16, 21),  # Damage 4 files from zzzAD16.csv to zzzAD20.csv
-        5: range(21, 26),  # Damage 5 files from zzzAD21.csv to zzzAD25.csv
-        6: range(26, 31),  # Damage 6 files from zzzAD26.csv to zzzAD30.csv
-    }
-    damage_pad = len(str(len(damage_scenarios)))
-    test_pad = len(str(30))
-
-    for damage, files in damage_scenarios.items():
-        for i, file_index in enumerate(files, start=1):
-            # Load original data file
-            file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
-            df = pd.read_csv(
-                file_path, sep="\t", skiprows=10
-            )  # Read with explicit column names
-
-            top_sensor = columns[i - 1]
-            print(top_sensor, type(top_sensor))
-            output_file_1 = os.path.join(
-                output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
-            )
-            print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
-            print("Taking datetime column on index 0...")
-            print(f"Taking `{top_sensor}`...")
-            os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
-            df[["Time", top_sensor]].to_csv(output_file_1, index=False)
-            print(Fore.GREEN + "Done")
-
-            bottom_sensor = sensor_end_map[i]
-            output_file_2 = os.path.join(
-                output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
-            )
-            print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
-            print("Taking datetime column on index 0...")
-            print(f"Taking `{bottom_sensor}`...")
-            os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
-            df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
-            print(Fore.GREEN + "Done")
-            print("---")
-
-
-def main():
-    if len(sys.argv) < 2:
-        print("Usage: python convert.py <path_to_csv_files>")
-        sys.exit(1)
-
-    base_path = sys.argv[1]
-    output_base = sys.argv[2]
-    prefix = sys.argv[3]  # Define output directory
-
-    # Create output folders if they don't exist
-    # for i in range(1, 7):
-    #     os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
-
-    create_damage_files(base_path, output_base, prefix)
-    print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
-
-
-if __name__ == "__main__":
-    main()
--- a/data/QUGS/test.py
+++ b/data/QUGS/test.py
@@ -1,25 +1,52 @@
-from convert import *
+from data_preprocessing import *
 from joblib import dump, load

+# b = generate_damage_files_index(
+#     num_damage=6,
+#     file_index_start=1,
+#     col=5,
+#     base_path="D:/thesis/data/dataset_B",
+#     prefix="zzzBD",
+#     # undamage_file="zzzBU.TXT"
+# )
+# Example: Generate tuples with a special group of df0 at the beginning
+special_groups_A = [
+    {'df_name': 'zzzAU.TXT', 'position': 0, 'size': 5}  # Add at beginning
+]
+
+special_groups_B = [
+    {'df_name': 'zzzBU.TXT', 'position': 0, 'size': 5}  # Add at beginning
+]
+
+# Generate the tuples with the special group
+a_complement = [(comp)
+                for n in range(1, 31)
+                for comp in complement_pairs(n)]
+a = generate_df_tuples(special_groups=a_complement, prefix="zzzAD")
+
+# b_complement = [(comp)
+#                 for n in range(1, 31)
+#                 for comp in complement_pairs(n)]
+# b = generate_df_tuples(special_groups=b_complement, prefix="zzzBD")
+
+
 # a = generate_damage_files_index(
-#     num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
+#     num_damage=6,
+#     file_index_start=1,
+#     col=5,
+#     base_path="D:/thesis/data/dataset_A",
+#     prefix="zzzAD",
+#     # undamage_file="zzzBU.TXT"
 # )

-b = generate_damage_files_index(
-    num_damage=6,
-    file_index_start=1,
-    col=5,
-    base_path="D:/thesis/data/dataset_B",
-    prefix="zzzBD",
-)
-# data_A = DataProcessor(file_index=a)
-# # data.create_vector_column(overwrite=True)
-# data_A.create_limited_sensor_vector_column(overwrite=True)
-# data_A.export_to_csv("D:/thesis/data/converted/raw")
+data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
+# data_A.create_vector_column(overwrite=True)
+# # data_A.create_limited_sensor_vector_column(overwrite=True)
+data_A.export_to_csv("D:/thesis/data/converted/raw")

-data_B = DataProcessor(file_index=b)
-# data.create_vector_column(overwrite=True)
-data_B.create_limited_sensor_vector_column(overwrite=True)
-data_B.export_to_csv("D:/thesis/data/converted/raw_B")
+# data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
+# data_B.create_vector_column(overwrite=True)
+# # data_B.create_limited_sensor_vector_column(overwrite=True)
+# data_B.export_to_csv("D:/thesis/data/converted/raw_B")
 # a = load("D:/cache.joblib")
 # breakpoint()
--- a/latex/chapters/id/03_methodology/steps/index.tex
+++ b/latex/chapters/id/03_methodology/steps/index.tex
@@ -3,7 +3,7 @@ Alur keseluruhan penelitian ini dilakukan melalui tahapan-tahapan sebagai beriku

 \begin{figure}[H]
    \centering
-    \includegraphics[width=0.3\linewidth]{chapters/id/flow.png}
+    \includegraphics[width=0.3\linewidth]{chapters/img/flow.png}
    \caption{Diagram alir tahapan penelitian}
    \label{fig:flowchart}
 \end{figure}
--- a/latex/figures/A4
+++ b/latex/figures/A4
--- a/latex/frontmatter/acknowledgement.tex
+++ b/latex/frontmatter/acknowledgement.tex
--- a/latex/frontmatter/glossaries.tex
+++ b/latex/frontmatter/glossaries.tex
@@ -0,0 +1,78 @@
+% % A new command that enables us to enter bi-lingual (Slovene and English) terms
+% % syntax: \addterm[options]{label}{Slovene}{Slovene first use}{English}{Slovene
+% % description}
+% \newcommand{\addterm}[6][]{
+%   \newglossaryentry{#2}{
+%     name={#3 (angl.\ #5)},
+%     first={#4 (\emph{#5})},
+%     text={#3},
+%     sort={#3},
+%     description={#6},
+%     #1 % pass additional options to \newglossaryentry
+%   }
+% }
+
+% % A new command that enables us to enter (English) acronyms with bi-lingual
+% % (Slovene and English) long versions
+% % syntax: \addacronym[options]{label}{abbreviation}{Slovene long}{Slovene first
+% % use long}{English long}{Slovene description}
+% \newcommand{\addacronym}[7][]{
+%   % Create the main glossary entry with \newacronym
+%   % \newacronym[key-val list]{label}{abbrv}{long}
+%   \newacronym[
+%     name={#4 (angl.\ #6,\ #3)},
+%     first={\emph{#5} (angl.\ \emph{#6},\ \emph{#3})},
+%     sort={#4},
+%     description={#7},
+%     #1 % pass additional options to \newglossaryentry
+%     ]
+%     {#2}{#3}{#4}
+%   % Create a cross-reference from the abbreviation to the main glossary entry by
+%   % creating an auxiliary glossary entry (note: we set the label of this entry
+%   % to '<original label>_auxiliary' to avoid clashes)
+%   \newglossaryentry{#2_auxiliary}{
+%     name={#3},
+%     sort={#3},
+%     description={\makefirstuc{#6}},
+%     see=[See:]{#2}
+%   }
+% }
+
+% % Change the text of the cross-reference links to the Slovene long version.
+% \renewcommand*{\glsseeitemformat}[1]{\emph{\acrlong{#1}}.}
+
+% Define the Indonesian term and link it to the English term
+\newglossaryentry{jaringansaraf}{
+  name=Jaringan Saraf,
+  description={The Indonesian term for \gls{nn}}
+}
+% \newglossaryentry{pemelajaranmesin}{
+%   name=Pemelajaran Mesin,
+%   description={Lihat \gls{machinelearning}}
+% }
+
+% Define the English term and link it to its acronym
+\newglossaryentry{neuralnetwork}{
+  name=Neural Network,
+  description={A computational model inspired by the human brain, see \gls{nn}}
+}
+
+% \newglossaryentry{machinelearning}{
+%   name=Machine Learning,
+%   description={A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}}
+% \newglossaryentry{pemelajaranmesin}{
+%     name={pemelajaran mesin (angl.\ #5)},
+%     first={pemelajaran mesin (\emph{machine learning})},
+%     text={pemelajaran mesin},
+%     sort={ },
+%     description={#6},
+%     #1 % pass additional options to \newglossaryentry
+% }
+\longnewglossaryentry{machinelearning}{name={machine learning}}
+{A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
+\newterm[see={machinelearning}]{pemelajaranmesin}
+% \newglossaryentry{pemelajaran mesin}{}
+% \addterm{machinelearning}{pemelajaran mesin}{pemelajaran mesin}{machine learning}{A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
+\newacronym
+ [description={statistical pattern recognition technique}]
+ {svm}{SVM}{support vector machine}
--- a/latex/main.tex
+++ b/latex/main.tex
@@ -1,14 +1,18 @@
 \documentclass[draftmark]{thesis}

-% Title Information
-\setthesisinfo
-  {Prediksi Lokasi Kerusakan dengan Machine Learning}
-  {Rifqi Damar Panuluh}
-  {20210110224}
-  {PROGRAM STUDI TEKNIK SIPIL}
-  {FAKULTAS TEKNIK}
-  {UNIVERSITAS MUHAMMADIYAH YOGYAKARTA}
-  {2025}
+% Metadata
+\title{Prediksi Lokasi Kerusakan dengan Machine Learning}
+\author{Rifqi Damar Panuluh}
+\date{\today}
+\authorid{20210110224}
+\firstadvisor{Ir. Muhammad Ibnu Syamsi, Ph.D.}
+\secondadvisor{}
+\headdepartement{Puji Harsanto, S.T., M.T., Ph.D.}
+\headdepartementid{19740607201404123064}
+\faculty{Fakultas Teknik}
+\program{Program Studi Teknik Sipil}
+\university{Universitas Muhammadiyah Yogyakarta}
+\yearofsubmission{2025}

 % Input preamble
 \input{preamble/packages}
@@ -16,22 +20,19 @@
 \input{preamble/macros}

 \begin{document}
-
-\maketitle
+% \input{frontmatter/maketitle}
+% \input{frontmatter/maketitle_secondary}
 \frontmatter
-\input{frontmatter/approval}\clearpage
-\input{frontmatter/originality}\clearpage
-\input{frontmatter/acknowledgement}\clearpage
-\tableofcontents
+% \input{frontmatter/approval}\clearpage
+% \input{frontmatter/originality}\clearpage
+% \input{frontmatter/acknowledgement}\clearpage
+% \tableofcontents
 \clearpage
 \mainmatter
 \pagestyle{fancyplain}
-% Include content
-\include{content/abstract}
-\include{content/introduction}
 \include{chapters/01_introduction}
-\include{content/chapter2}
-\include{content/conclusion}
+\include{chapters/id/02_literature_review/index}
+\include{chapters/id/03_methodology/index}

 % Bibliography
 % \bibliographystyle{IEEEtran}
--- a/latex/metadata.tex
+++ b/latex/metadata.tex
@@ -1,11 +0,0 @@
-\newcommand{\studentname}{Rifqi Damar Panuluh}
-\newcommand{\studentid}{20210110224}
-\newcommand{\thesistitle}{Prediksi Lokasi Kerusakan dengan Machine Learning}
-\newcommand{\firstadvisor}{Ir. Muhammad Ibnu Syamsi, Ph.D.}
-\newcommand{\secondadvisor}{}
-\newcommand{\headdepartement}{Puji Harsanto, S.T. M.T., Ph.D.}
-\newcommand{\headdepartementid}{19740607201404123064}
-\newcommand{\faculty}{Fakultas Teknik}
-\newcommand{\program}{Teknik Sipil}
-\newcommand{\university}{Universitas Muhammadiyah Yogyakarta}
-\newcommand{\yearofsubmission}{2025}
--- a/latex/thesis.cls
+++ b/latex/thesis.cls
@@ -1,7 +1,7 @@
 \NeedsTeXFormat{LaTeX2e}
 \ProvidesClass{thesis}[2025/05/10 Bachelor Thesis Class]

-\newif\if@draftmark
+\newif\if@draftmark \@draftmarkfalse
 \@draftmarkfalse

 \DeclareOption{draftmark}{\@draftmarktrue}
@@ -12,6 +12,7 @@
 \RequirePackage{polyglossia}
 \RequirePackage{fontspec}
 \RequirePackage{titlesec}
+\RequirePackage{titling}
 \RequirePackage{fancyhdr}
 \RequirePackage{geometry}
 \RequirePackage{setspace}
@@ -24,30 +25,31 @@
 \RequirePackage{svg}           % Allows including SVG images directly
 \RequirePackage{indentfirst}   % Makes first paragraph after headings indented
 \RequirePackage{float}         % Provides [H] option to force figure/table placement
-
+\RequirePackage[style=apa, backend=biber]{biblatex}
+\RequirePackage[acronym, nogroupskip, toc]{glossaries}
 % Polyglossia set language
-+ \setdefaultlanguage[variant=indonesian]{malay}  % Proper Indonesian language setup
-+ \setotherlanguage{english}             % Enables English as secondary language
-
-+ \DefineBibliographyStrings{english}{%  % Customizes bibliography text
-+   andothers={dkk\adddot},              % Changes "et al." to "dkk."
-+   pages={hlm\adddot},                  % Changes "pp." to "hlm."
-+ }
+\setdefaultlanguage[variant=indonesian]{malay}  % Proper Indonesian language setup
+\setotherlanguage{english}             % Enables English as secondary language
+\DefineBibliographyStrings{english}{%  % Customizes bibliography text
+  andothers={dkk\adddot},              % Changes "et al." to "dkk."
+  pages={hlm\adddot},                  % Changes "pp." to "hlm."
+}

 % Conditionally load the watermark package and settings
 \if@draftmark
  \RequirePackage{draftwatermark}
-  \SetWatermarkText{nuluh/thesis (wip) draft: \today}
+  \SetWatermarkText{nuluh/thesis (wip) [draft: \today]}
  \SetWatermarkColor[gray]{0.8}                    % Opacity: 0.8 = 20% transparent  
  \SetWatermarkFontSize{1.5cm}
  \SetWatermarkAngle{90}
  \SetWatermarkHorCenter{1.5cm}
+  \RequirePackage[left]{lineno}
+  \linenumbers
 \fi

 % Page layout
-\geometry{left=3cm, top=3cm, right=3cm, bottom=3cm}
+\geometry{left=4cm, top=3cm, right=3cm, bottom=3cm}
 \setlength{\parskip}{0.5em}
-\setlength{\parindent}{0pt}
 \onehalfspacing

 % Fonts
@@ -56,19 +58,45 @@
 \setsansfont{Arial}
 \setmonofont{Courier New}

-% Metadata commands
-\input{metadata}
-
-\newcommand{\setthesisinfo}[7]{%
-  \renewcommand{\thesistitle}{#1}%
-  \renewcommand{\studentname}{#2}%
-  \renewcommand{\studentid}{#3}%
-  \renewcommand{\program}{#4}%
-  \renewcommand{\faculty}{#5}%
-  \renewcommand{\university}{#6}%
-  \renewcommand{\yearofsubmission}{#7}%
+\makeatletter
+% Extracting the Year from \today
+\newcommand{\theyear}{%
+  \expandafter\@car\expandafter\@gobble\the\year\@nil
 }

+% Declare internal macros as initially empty
+\newcommand{\@authorid}{}
+\newcommand{\@firstadvisor}{}
+\newcommand{\@secondadvisor}{}
+\newcommand{\@headdepartement}{}
+\newcommand{\@headdepartementid}{}
+\newcommand{\@faculty}{}
+\newcommand{\@program}{}
+\newcommand{\@university}{}
+\newcommand{\@yearofsubmission}{}
+
+% Define user commands to set these values.
+\newcommand{\authorid}[1]{\gdef\@authorid{#1}}
+\newcommand{\firstadvisor}[1]{\gdef\@firstadvisor{#1}}
+\newcommand{\secondadvisor}[1]{\gdef\@secondadvisor{#1}}
+\newcommand{\headdepartement}[1]{\gdef\@headdepartement{#1}}
+\newcommand{\headdepartementid}[1]{\gdef\@headdepartementid{#1}}
+\newcommand{\faculty}[1]{\gdef\@faculty{#1}}
+\newcommand{\program}[1]{\gdef\@program{#1}}
+\newcommand{\university}[1]{\gdef\@university{#1}}
+\newcommand{\yearofsubmission}[1]{\gdef\@yearofsubmission{#1}}
+
+% Now expose robust “the‑” getters to access the values
+\newcommand{\theauthorid}{\@authorid}
+\newcommand{\thefirstadvisor}{\@firstadvisor}
+\newcommand{\thesecondadvisor}{\@secondadvisor}
+\newcommand{\theheaddepartement}{\@headdepartement}
+\newcommand{\theheaddepartementid}{\@headdepartementid}
+\newcommand{\thefaculty}{\@faculty}
+\newcommand{\theprogram}{\@program}
+\newcommand{\theuniversity}{\@university}
+\newcommand{\theyearofsubmission}{\@yearofsubmission}
+\makeatother
 % % Header and footer
 \fancypagestyle{fancy}{%
    \fancyhf{}
@@ -110,11 +138,6 @@
 \renewcommand{\cftchappresnum}{BAB~}
 \renewcommand{\cftchapaftersnum}{\quad}

-% \titlespacing*{\chapter}{0pt}{-10pt}{20pt}
-
-% Redefine \maketitle
-\renewcommand{\maketitle}{\input{frontmatter/maketitle}}
-
 % Chapter & Section format
 \renewcommand{\cftchapfont}{\normalsize\MakeUppercase}
 % \renewcommand{\cftsecfont}{}
@@ -136,11 +159,15 @@
 \setlength{\cftsubsecnumwidth}{2.5em}
 \setlength{\cftfignumwidth}{5em}
 \setlength{\cfttabnumwidth}{4em}
-\renewcommand \cftchapdotsep{1}           % Denser dots (closer together) https://tex.stackexchange.com/a/273764
-\renewcommand \cftsecdotsep{1}            % Apply to sections too
-\renewcommand \cftsubsecdotsep{1}         % Apply to subsections too
+\renewcommand \cftchapdotsep{1} % https://tex.stackexchange.com/a/273764
+\renewcommand \cftsecdotsep{1} % https://tex.stackexchange.com/a/273764
+\renewcommand \cftsubsecdotsep{1} % https://tex.stackexchange.com/a/273764
+\renewcommand \cftfigdotsep{1.5} % https://tex.stackexchange.com/a/273764
+\renewcommand \cfttabdotsep{1.5} % https://tex.stackexchange.com/a/273764
 \renewcommand{\cftchapleader}{\normalfont\cftdotfill{\cftsecdotsep}}
 \renewcommand{\cftchappagefont}{\normalfont}
+
+% Add Prefix in the Lof and LoT entries
 \renewcommand{\cftfigpresnum}{\figurename~}
 \renewcommand{\cfttabpresnum}{\tablename~}

@@ -165,6 +192,147 @@
 % \renewcommand{\cfttoctitlefont}{\bfseries\MakeUppercase}
 % \renewcommand{\cftaftertoctitle}{\vskip 2em}

+% Defines a new glossary called “notation”
+\newglossary[nlg]{notation}{not}{ntn}{Notation}
+
+% Define the header for the location column
+\providecommand*{\locationname}{Location}
+
+% Define the new glossary style called 'mylistalt' for main glossaries
+\makeatletter
+\newglossarystyle{mylistalt}{%
+  % start the list, initializing glossaries internals
+  \renewenvironment{theglossary}%
+    {\glslistinit\begin{enumerate}}%
+    {\end{enumerate}}%
+  % suppress all headers/groupskips
+  \renewcommand*{\glossaryheader}{}%
+  \renewcommand*{\glsgroupheading}[1]{}%
+  \renewcommand*{\glsgroupskip}{}%
+  % main entries: let \item produce "1." etc., then break
+  \renewcommand*{\glossentry}[2]{%
+    \item \glstarget{##1}{\glossentryname{##1}}%
+    \mbox{}\\
+    \glossentrydesc{##1}\space 
+    [##2] % appears on page x
+  }%
+  % sub-entries as separate paragraphs, still aligned
+  \renewcommand*{\subglossentry}[3]{%
+    \par
+    \glssubentryitem{##2}%
+    \glstarget{##2}{\strut}\space
+    \glossentrydesc{##2}\space ##3%
+  }%
+}
+
+
+% Define the new glossary style 'altlong3customheader' for notation
+\newglossarystyle{altlong3customheader}{%
+  % The glossary will be a longtable environment with three columns:
+  % 1. Symbol (left-aligned)
+  % 2. Description (paragraph, width \glsdescwidth)
+  % 3. Location (paragraph, width \glspagelistwidth)
+  \renewenvironment{theglossary}%
+    {\begin{longtable}{lp{\glsdescwidth}p{\glspagelistwidth}}}%
+    {\end{longtable}}%
+  % Define the table header row
+  \renewcommand*{\symbolname}{Simbol}
+  \renewcommand*{\descriptionname}{Keterangan}
+  \renewcommand*{\locationname}{Halaman}
+  \renewcommand*{\glossaryheader}{%
+    \bfseries\symbolname & \bfseries\descriptionname & \bfseries\locationname \tabularnewline\endhead}%
+  % Suppress group headings (e.g., A, B, C...)
+  \renewcommand*{\glsgroupheading}[1]{}%
+  % Define how a main glossary entry is displayed
+  % ##1 is the entry label
+  % ##2 is the location list (page numbers)
+  \renewcommand{\glossentry}[2]{%
+    \glsentryitem{##1}% Inserts entry number if entrycounter option is used
+    \glstarget{##1}{\glossentryname{##1}} & % Column 1: Symbol (with hyperlink target)
+    \glossentrydesc{##1}\glspostdescription & % Column 2: Description (with post-description punctuation)
+    ##2\tabularnewline % Column 3: Location list
+  }%
+  % Define how a sub-entry is displayed
+  % ##1 is the sub-entry level (e.g., 1 for first sub-level)
+  % ##2 is the entry label
+  % ##3 is the location list
+  \renewcommand{\subglossentry}[3]{%
+    & % Column 1 (Symbol) is left blank for sub-entries to create an indented look
+    \glssubentryitem{##2}% Inserts sub-entry number if subentrycounter is used
+    \glstarget{##2}{\strut}\glossentrydesc{##2}\glspostdescription & % Column 2: Description (target on strut for hyperlink)
+    ##3\tabularnewline % Column 3: Location list
+  }%
+  % Define the skip between letter groups (if group headings were enabled)
+  % For 3 columns, we need 2 ampersands for a full blank row if not using \multicolumn
+  \ifglsnogroupskip
+    \renewcommand*{\glsgroupskip}{}%
+  \else
+    \renewcommand*{\glsgroupskip}{& & \tabularnewline}%
+  \fi
+}
+
+% Define a new style 'supercol' based on 'super' for acronyms glossaries
+\newglossarystyle{supercol}{%
+  \setglossarystyle{super}% inherit everything from the original
+  % override just the main-entry format:
+  \renewcommand*{\glossentry}[2]{%
+    \glsentryitem{##1}%
+    \glstarget{##1}{\glossentryname{##1}}\space  % <-- added colon here
+    &: \glossentrydesc{##1}\glspostdescription\space ##2\tabularnewline
+  }%
+  % likewise for sub‐entries, if you want a colon there too:
+  \renewcommand*{\subglossentry}[3]{%
+    &: 
+    \glssubentryitem{##2}%
+    \glstarget{##2}{\strut}\glossentryname{##2}\space % <-- and here
+    \glossentrydesc{##2}\glspostdescription\space ##3\tabularnewline
+  }%
+}
+\makeatother
+
+% A new command that enables us to enter bi-lingual (Bahasa Indonesia and English) terms
+% syntax: \addterm[options]{label}{Bahasa Indonesia}{Bahasa Indonesia first use}{English}{Bahasa Indonesia
+% description}
+\newcommand{\addterm}[6][]{
+  \newglossaryentry{#2}{
+    name={#3 (angl.\ #5)},
+    first={#4 (\emph{#5})},
+    text={#3},
+    sort={#3},
+    description={#6},
+    #1 % pass additional options to \newglossaryentry
+  }
+}
+
+% A new command that enables us to enter (English) acronyms with bi-lingual
+% (Bahasa Indonesia and English) long versions
+% syntax: \addacronym[options]{label}{abbreviation}{Bahasa Indonesia long}{Bahasa Indonesia first
+% use long}{English long}{Bahasa Indonesia description}
+\newcommand{\addacronym}[7][]{
+  % Create the main glossary entry with \newacronym
+  % \newacronym[key-val list]{label}{abbrv}{long}
+  \newacronym[
+    name={#4 (angl.\ #6,\ #3)},
+    first={\emph{#5} (angl.\ \emph{#6},\ \emph{#3})},
+    sort={#4},
+    description={#7},
+    #1 % pass additional options to \newglossaryentry
+    ]
+    {#2}{#3}{#4}
+  % Create a cross-reference from the abbreviation to the main glossary entry by
+  % creating an auxiliary glossary entry (note: we set the label of this entry
+  % to '<original label>_auxiliary' to avoid clashes)
+  \newglossaryentry{#2_auxiliary}{
+    name={#3},
+    sort={#3},
+    description={\makefirstuc{#6}},
+    see=[See:]{#2}
+  }
+}
+
+% Change the text of the cross-reference links to the Bahasa Indonesia long version.
+\renewcommand*{\glsseeitemformat}[1]{\emph{\acrlong{#1}}.}
+
 % % Apply a custom fancyhdr layout only on the first page of each \chapter, and use no header/footer elsewhere
 % % \let\oldchapter\chapter
 % % \renewcommand{\chapter}{%
Author	SHA1	Message	Date
nuluh	3e2b153d11	refactor(stft): comment out unused imports and update SVM model loading for consistency	2025-07-28 05:22:24 +07:00
nuluh	3cbef17b0c	feat(model_selection): add timing for model training and validation processes	2025-07-28 05:20:10 +07:00
Rifqi D. Panuluh	80d4a66925	Merge pull request #100 from nuluh/feature/99-exp-alternative-undamage-case-data [EXP] Alterntive Undamage Case Data	2025-07-24 18:09:05 +07:00
nuluh	9b018efc15	refactor(notebooks): update STFT notebook to improve clarity and structure of sensor evaluation sections	2025-07-24 17:00:31 +07:00
nuluh	2fbdeac1eb	refactor(test): update import statement to use data_preprocessing module	2025-07-18 19:29:02 +07:00
nuluh	086032c250	refactor(notebooks): clean up to be more readable notebooks	2025-07-18 19:28:43 +07:00
nuluh	f6c71739df	refactor(ml): clean up model_selection.py by removing unused code and improving function structure	2025-07-18 19:27:46 +07:00
Rifqi D. Panuluh	2dc915949b	chore(.gitignore): add additional LaTeX file types to ignore list	2025-07-17 14:05:59 +00:00
nuluh	18824e05c0	refactor(ml): update inference calls to use new model structure and improve clarity	2025-07-17 00:18:01 +07:00
nuluh	2504157b29	feat(src): replace `convert.py` to `src/data_preprocessing.py` and fix some functions `prefix` parameter	2025-07-02 03:25:18 +07:00
nuluh	5ba628b678	refactor(src): make `compute_stft` and `process_damage_case` to be pure function that explicitly need STFT arguments to be passed	2025-07-01 14:32:52 +07:00
nuluh	a93adc8af3	feat(notebooks): minimize stft.ipynb notebooks and add STFT data preview plot. - Consolidated import statements for pandas and matplotlib. - Updated STFT plotting for Sensor 1 and Sensor 2 datasets with improved visualization using pcolormesh. - Enhanced subplot organization for better clarity in visual representation. - Added titles and adjusted layout for all plots.	2025-06-30 01:36:44 +07:00
nuluh	c2df42cc2b	feat(ml): add XGBoost model to inference options and update commented inference calls	2025-06-27 10:35:27 +07:00
nuluh	465ed121f9	feat(notebooks): training model with new alternative undamaged (label 0) data	2025-06-27 10:34:23 +07:00
nuluh	d6975b4817	feat(src): update damage base path and adjust test run logic for damage case processing for undamage case new method	2025-06-27 10:33:54 +07:00
nuluh	79070921d7	feat(data): add complement_pairs function to generate complement tuples for implementing alternative undamage case method	2025-06-27 10:33:36 +07:00
nuluh	e8eb07a91b	refactor(data): improve variable naming in generate_df_tuples function for clarity	2025-06-26 10:53:10 +07:00
nuluh	c98c6a091b	refactor(data): update generate_df_tuples function for improved readibility code	2025-06-26 10:51:29 +07:00
nuluh	9921d7663b	feat(src): add inference script for model evaluation	2025-06-24 14:08:38 +07:00
nuluh	459fbcc17a	refactor(notebooks): visualization for sensor analysis and streamline data processing	2025-06-24 14:08:02 +07:00
nuluh	5041ee3feb	feat(src): add confusion matrix plotting and label percentage calculation	2025-06-24 14:06:56 +07:00
nuluh	114ab849b9	feat(src): Add confusion matrix plotting function for model evaluation	2025-06-24 00:27:15 +07:00
nuluh	6196523ea0	feat(notebooks): Add confusion matrix plotting loop for Sensor 1 models	2025-06-21 01:10:03 +07:00
Rifqi D. Panuluh	46b66e0a90	Merge pull request #98 from nuluh/feat/53-feat-include-undamaged-node-classification Closes #53	2025-06-18 09:06:04 +07:00
nuluh	18892c1188	WIP(notebooks): Add SVM with StandardScaler and PCA to sensor model definitions	2025-06-18 08:31:55 +07:00
nuluh	d0b603ba9f	fix(data): Update DataProcessor instantiation for new data preprocessing implementation	2025-06-18 08:30:12 +07:00
nuluh	a7d8f1ef56	fix(data): Fix pool mapping to include undamaged case and add csv header separator line for Excel compatibility	2025-06-18 08:25:01 +07:00
nuluh	1164627bac	fix(data): Fix export_to_csv to adapt new added undamaged scenario and add new parameter `include_time` to include 'Time' data	2025-06-18 01:54:12 +07:00
nuluh	58a672a680	fix(data): Fix generate_df_tuples function output bug when special_groups args is passed	2025-06-17 13:20:27 +07:00
nuluh	24c1484300	feat(data): Enhance DataProcessor to support dynamic base path and improve data loading with error handling and memory efficiency	2025-06-16 17:35:27 +07:00
nuluh	60ff4e0fa9	feat(data): Propose new damage file index generation to improve structure and flexibility in DataFrame handling	2025-06-16 03:13:07 +07:00
nuluh	3e652accfb	refactor(data): remove unnecessary variable declaration in DataProcessor for loading dataframes	2025-06-14 04:02:42 +07:00
nuluh	66a09e0ddf	feat(data): Enhance damage file index generation with undamaged file handling and improved error management (WIP)	2025-06-14 04:02:42 +07:00
nuluh	195f8143f0	refactor(data): remove redundant column extraction method and simplify dataframe loading	2025-06-14 00:57:54 +07:00
nuluh	e7332252a6	Merge branch 'feat/90-feat-preserve-trained-model' into dev	2025-06-12 03:38:15 +07:00
nuluh	4b0819f94e	feat(notebooks): Enhance STFT notebook and model selection functionality - Updated paths in the STFT notebook to reflect new data files. - Improved plotting aesthetics for combined plots and added grid lines. - Introduced a 3D spectrogram visualization for better data representation. - Refactored model training function to include error handling and model export functionality. - Adjusted model training calls to include export paths for saved models. Closes #90 - Added additional markdown cells for better documentation and clarity in the notebook.	2025-06-12 03:35:21 +07:00
nuluh	7613c08ebd	feat(figures): add data preprocessing illustration diagram	2025-06-10 17:21:49 +07:00
nuluh	ad6cda4270	fix(notebooks): update sensor data paths and improve plotting aesthetics	2025-06-10 17:20:13 +07:00
nuluh	ebaa263781	chore(convert): comment out create_damage_files obsolete function	2025-06-09 18:59:51 +07:00
nuluh	f5dada1b9c	fix(latex): fix image path for flowchart in methodology section	2025-06-04 15:59:13 +07:00
nuluh	37c9a0765a	fix(documentclass): remove language option from biblatex package	2025-06-04 15:53:57 +07:00
nuluh	8656289a1c	chore(documentclass): comment out table of contents for temporary removal	2025-06-04 15:53:35 +07:00
nuluh	15fe8339ec	feat(documentclass): add new glossary for notation	2025-06-04 15:31:00 +07:00
nuluh	44210ef372	chore(latex): comment out maketitle inputs for temporary	2025-06-04 11:27:56 +07:00
nuluh	9192d4c81c	chore(documentclass): remove commented-out code for chapter formatting and header layout	2025-06-03 21:37:32 +07:00
nuluh	0373743ca7	fix(documentclass): enhance dot separation in ToC and add prefixes for figures and tables	2025-06-03 21:34:05 +07:00
nuluh	49d6395e6f	fix(documentclass): add missing \RequirePackage{titling} for maketitle formatting	2025-06-03 21:16:34 +07:00
nuluh	bf9cca2d90	feat(documentclass): redefine metadata information to main.tex by consdolidate internal command inside thesis.cls and remove metadata.tex Closes #96	2025-06-03 21:13:28 +07:00
nuluh	08420296e6	fix(documentclass): add missing \makeatother command to properly close the @ symbol	2025-06-03 20:59:11 +07:00
nuluh	1540213eec	feat(documentclass): add commands for bilingual terms and acronyms with custom glossary entries	2025-06-03 20:58:18 +07:00
nuluh	6fd4b7465e	feat(documentclass): add new glossary style 'supercol' for enhanced acronym formatting Closes #85	2025-06-03 20:55:26 +07:00
nuluh	85a0aebf36	feat(documentclass): add custom glossary style 'altlong3customheader' for notation with three-column layout Closes #95	2025-06-03 20:54:45 +07:00
nuluh	8d1edfdbf7	feat(glossaries): add glossary support with custom style for main glossaries entry and location header Closes 84	2025-06-03 20:52:54 +07:00
nuluh	ff862d9467	fix(documentclass): adjust page layout by increasing left margin to 4cm	2025-06-03 20:39:03 +07:00
nuluh	dfb64db1d8	feat(documentclass): add draft watermark and optional line numbering with 'draftmark' option	2025-06-03 20:37:29 +07:00
Rifqi D. Panuluh	3e3de577ba	Merge pull request #94 from nuluh/latex/91-bug-expose-maketitle Maketitle Replaced with \input for Flexibility when integrated with latexdiff-latexpand Workflow	2025-06-03 20:16:30 +07:00