Compare commits

..

25 Commits

Author SHA1 Message Date
panuluh
bb9dee10de Revise README to simplify content and remove private visibility
Removed sections on access restrictions and copyright details.
2026-01-29 12:14:35 +07:00
Rifqi D. Panuluh
93d720b676 Update latexdiff.yml 2025-06-04 15:11:21 +07:00
Rifqi D. Panuluh
52dccce7e2 Update latexdiff.yml 2025-06-04 15:00:49 +07:00
Rifqi D. Panuluh
bf0de65fb7 Update latexdiff.yml 2025-06-04 14:21:39 +07:00
Rifqi D. Panuluh
7ca70fbdc3 Update latexdiff.yml 2025-06-04 13:41:33 +07:00
Rifqi D. Panuluh
b35944ee3e Update latexdiff.yml 2025-06-04 13:34:44 +07:00
Rifqi D. Panuluh
8f51963d0f Merge pull request #93 from nuluh/revert-92-latex/91-bug-expose-maketitle
Revert "Expose `maketitle` by just using `\input`"
2025-06-03 20:12:49 +07:00
Rifqi D. Panuluh
5c513e4629 Revert "Expose maketitle by just using \input" 2025-06-03 20:12:11 +07:00
Rifqi D. Panuluh
38ece73768 Merge pull request #92 from nuluh/latex/91-bug-expose-maketitle 2025-06-03 20:09:13 +07:00
Rifqi D. Panuluh
e5b9806462 Update latexdiff.yml 2025-06-03 18:09:18 +07:00
Rifqi D. Panuluh
8dbb448b32 Update latexdiff.yml 2025-06-03 18:00:43 +07:00
Rifqi D. Panuluh
033d949325 Update latexdiff.yml 2025-06-03 17:29:50 +07:00
Rifqi D. Panuluh
643c0ebce1 Update latexdiff.yml 2025-06-03 17:19:07 +07:00
Rifqi D. Panuluh
4851a9aa5d Update latexdiff.yml 2025-06-03 17:05:30 +07:00
Rifqi D. Panuluh
fd765b113f Update latex-lint.yml 2025-06-03 15:35:51 +07:00
Rifqi D. Panuluh
fe801b0a1c Update latex-lint.yml 2025-06-03 15:16:16 +07:00
Rifqi D. Panuluh
dbc62fea32 Update latex-lint.yml 2025-06-03 15:01:15 +07:00
Rifqi D. Panuluh
1ad235866e Update latexdiff.yml 2025-06-03 14:44:52 +07:00
Rifqi D. Panuluh
05796d0165 Create latex-lint.yml 2025-06-03 14:42:29 +07:00
Rifqi D. Panuluh
f8e9ac93a0 Update latexdiff.yml
fix path
2025-06-03 14:27:00 +07:00
Rifqi D. Panuluh
04546f8c35 Update latexdiff.yml
ensures that all \include{} or \input{} paths (which are relative to main.tex) resolve correctly
2025-06-03 14:20:23 +07:00
Rifqi D. Panuluh
26450026bb Update latexdiff.yml
fix  Alpine’s “externally‐managed‐environment” restriction by install flatex inside a virtual environment rather than system‐wide
2025-06-03 14:00:50 +07:00
Rifqi D. Panuluh
3a17cc1331 Update latexdiff.yml
using a pre-built TeX Live Docker image to avoid reinstalling texlive-full every run
2025-06-03 13:42:35 +07:00
Rifqi D. Panuluh
e9f953f731 Create latexmk.yml 2025-06-03 13:26:38 +07:00
Rifqi D. Panuluh
2c5c78b83c Create latexdiff.yml 2025-06-03 13:09:41 +07:00
20 changed files with 1359 additions and 1709 deletions

52
.github/workflows/latex-lint.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
name: LaTeX Lint
on:
push:
branches:
- main
- dev
paths:
- 'latex/**/*.tex'
- 'latex/main.tex'
workflow_dispatch:
jobs:
lint:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install chktex
run: |
sudo apt-get update
sudo apt-get install -y chktex
- name: Run chktex inside latex/
working-directory: latex
run: |
TEX_FILES=$(find . -type f -name "*.tex")
if [ -z "$TEX_FILES" ]; then
echo "No .tex files found in latex/. Skipping lint."
exit 0
fi
echo "🔍 Linting .tex files with chktex..."
FAIL=0
for f in $TEX_FILES; do
echo "▶ Checking $f"
# Run chktex and show output; capture error status
if ! chktex "$f"; then
echo "::warning file=$f::ChkTeX found issues in $f"
FAIL=1
fi
done
if [ $FAIL -ne 0 ]; then
echo "::error::❌ Lint errors or warnings were found in one or more .tex files above."
exit 1
else
echo "✅ All files passed chktex lint."
fi

102
.github/workflows/latexdiff.yml vendored Normal file
View File

@@ -0,0 +1,102 @@
name: LaTeX Diff
on:
workflow_dispatch:
inputs:
base_branch:
description: 'Base branch (older version)'
required: true
compare_branch:
description: 'Compare branch (new version)'
required: true
jobs:
latexdiff:
runs-on: ubuntu-latest
container:
image: ghcr.io/xu-cheng/texlive-full:latest
options: --user root
steps:
- name: Install latexpand (Perl script)
run: |
tlmgr init-usertree
tlmgr install latexpand
- name: Checkout base branch
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.base_branch }}
path: base
- name: Checkout compare branch
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.compare_branch }}
path: compare
- name: Create output folder
run: mkdir -p diff_output
- name: Flatten base/main.tex (with latexpand)
run: |
cd base/latex
echo "📂 Listing files in base/latex:"
ls -R
echo "🔄 Flattening with latexpand..."
latexpand --verbose --keep-comments --output=../../diff_output/base_flat.tex main.tex
echo "✅ Preview of base_flat.tex:"
head -n 50 ../../diff_output/base_flat.tex
- name: Flatten compare/main.tex (with latexpand)
run: |
cd compare/latex
echo "📂 Listing files in compare/latex:"
ls -R
echo "🔄 Flattening with latexpand..."
latexpand --verbose --keep-comments --output=../../diff_output/compare_flat.tex main.tex
echo "✅ Preview of compare_flat.tex:"
head -n 50 ../../diff_output/compare_flat.tex
- name: Generate diff.tex using latexdiff
run: |
latexdiff diff_output/base_flat.tex diff_output/compare_flat.tex > diff_output/diff.tex
- name: Copy thesis.cls to diff_output
run: cp compare/latex/thesis.cls diff_output/
- name: Copy chapters/img into diff_output
run: |
# Create the same chapters/img path inside diff_output
mkdir -p diff_output/chapters/img
# Copy all images from compare branch into diff_output
cp -R compare/latex/chapters/img/* diff_output/chapters/img/
- name: Copy .bib files into diff_output
run: |
mkdir -p diff_output
cp compare/latex/*.bib diff_output/
- name: Override “\input{preamble/fonts}” in diff.tex
run: |
sed -i "/\\input{preamble\/fonts}/c % — replaced by CI: use TeX Gyre fonts instead of Times New Roman\/Arial\n\\\setmainfont{TeX Gyre Termes}\n\\\setsansfont{TeX Gyre Heros}\n\\\setmonofont{TeX Gyre Cursor}" diff_output/diff.tex
- name: Print preview of diff.tex (after font override)
run: |
echo "📄 Preview of diff_output/diff.tex after font override:"
head -n 50 diff_output/diff.tex
- name: Compile diff.tex to PDF
working-directory: diff_output
continue-on-error: true
run: |
xelatex -interaction=nonstopmode diff.tex
xelatex -interaction=nonstopmode diff.tex
- name: Upload diff output files
uses: actions/upload-artifact@v4
with:
name: latex-diff-output
path: diff_output/

29
.github/workflows/latexmk.yml vendored Normal file
View File

@@ -0,0 +1,29 @@
name: Render XeLaTeX on PR to dev
on:
pull_request:
branches:
- dev
jobs:
build-pdf:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Compile XeLaTeX
uses: dante-ev/latex-action@2021-A
with:
root_file: main.tex
working_directory: latex
compiler: xelatex
args: -interaction=nonstopmode -halt-on-error -file-line-error
extra_system_packages: "fonts-freefont-otf"
- name: Upload compiled PDF
uses: actions/upload-artifact@v4
with:
name: compiled-pdf
path: latex/main.pdf

15
.gitignore vendored
View File

@@ -2,17 +2,4 @@
data/**/*.csv
.venv/
*.pyc
*.egg-info/
# Latex
*.aux
*.log
*.out
*.toc
*.bbl
*.blg
*.fdb_latexmk
*.fls
*.synctex.gz
*.dvi
*.egg-info/

View File

@@ -1,7 +1,4 @@
{
"python.analysis.extraPaths": [
"./code/src/features",
"${workspaceFolder}/code/src"
],
"python.analysis.extraPaths": ["./code/src/features"],
"jupyter.notebookFileRoot": "${workspaceFolder}/code"
}

View File

@@ -4,20 +4,14 @@ This repository contains the work related to my thesis, which focuses on damage
**Note:** This repository does not contain the secondary data used in the analysis. The code is designed to work with data from the [QUGS (Qatar University Grandstand Simulator)](https://www.structuralvibration.com/benchmark/qugs/) dataset, which is not included here.
The repository is private and access is restricted only to those who have been given explicit permission by the owner. Access is provided solely for the purpose of brief review or seeking technical guidance.
## Restrictions
- **No Derivative Works or Cloning:** Any form of copying, cloning, or creating derivative works based on this repository is strictly prohibited.
- **Limited Access:** Use beyond brief review or collaboration is not allowed without prior permission from the owner.
---
All contents of this repository, including the thesis idea, code, and associated data, are copyrighted © 2024 by Rifqi Panuluh. Unauthorized use or duplication is prohibited.
[LICENSE](https://github.com/nuluh/thesis?tab=License-1-ov-file#readme)
## How to Run `stft.ipynb`
1. run `pip install -e .` in root project first
2. run the notebook
2. run the notebook

File diff suppressed because one or more lines are too long

View File

@@ -1,357 +0,0 @@
import pandas as pd
import os
import re
import sys
import numpy as np
from colorama import Fore, Style, init
from typing import TypedDict, Dict, List
from joblib import load
from pprint import pprint
# class DamageFilesIndices(TypedDict):
# damage_index: int
# files: list[int]
OriginalSingleDamageScenarioFilePath = str
DamageScenarioGroupIndex = int
OriginalSingleDamageScenario = pd.DataFrame
SensorIndex = int
VectorColumnIndex = List[SensorIndex]
VectorColumnIndices = List[VectorColumnIndex]
DamageScenarioGroup = List[OriginalSingleDamageScenario]
GroupDataset = List[DamageScenarioGroup]
class DamageFilesIndices(TypedDict):
damage_index: int
files: List[str]
def complement_pairs(n, prefix, extension):
"""
Return the four complement tuples for zzzBD<n>.TXT
"""
filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
orig_a = (n - 1) % 5 + 1 # 1 … 5
for a in range(1, 6): # a = 1 … 5
if a != orig_a: # skip original a
yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset,
group_size=5, special_groups=None, group=True):
"""
Generate a structured list of tuples containing DataFrame references and column indices.
Parameters:
-----------
total_dfs : int, default 30
Total number of DataFrames to include in the tuples
group_size : int, default 5
Number of DataFrames in each group (determines the pattern repeat)
prefix : str
Prefix for DataFrame variable names
first_col_start : int, default 1
Starting value for the first column index (1-indexed)
last_col_offset : int, default 25
Offset to add to first_col_start to get the last column index
special_groups : list of dict, optional
List of special groups to insert, each dict should contain:
- 'df_name': The DataFrame name to use for all tuples in this group
- 'position': Where to insert this group (0 for beginning)
- 'size': Size of this group (default: same as group_size)
Returns:
--------
list
List of tuples, where each tuple contains (df_name, [first_col, last_col])
"""
result = []
if group:
# Group tuples into sublists of group_size
for g in range(6): # TODO: shouldnt be hardcoded
group = []
for i in range(1, 6): # TODO: shouldnt be hardcoded
n = g * 5 + i
bottom_end = i # 1, 2, 3, 4, 5
top_end = bottom_end + 25 # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
result.append(group)
# Add special groups at specified positions (other than beginning)
if special_groups:
result.insert(0, special_groups)
return result
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names
class DataProcessor:
def __init__(self, file_index, cache_path: str = None, base_path: str = None, include_time: bool = False):
self.file_index = file_index
self.base_path = base_path
self.include_time = include_time
if cache_path:
self.data = load(cache_path)
else:
self.data = self.load_data()
def load_data(self):
for idxs, group in enumerate(self.file_index):
for idx, tuple in enumerate(group):
file_path = os.path.join(self.base_path, tuple[0]) # ('zzzAD1.TXT')
if self.include_time:
col_indices = [0] + tuple[1] # [1, 26] + [0] -> [0, 1, 26]
else:
col_indices = tuple[1] # [1, 26]
try:
# Read the CSV file
df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
self.file_index[idxs][idx] = df.iloc[:, col_indices].copy() # Extract the specified columns
print(f"Processed {file_path}, extracted columns: {col_indices}")
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
"""
Loads a single data file into a pandas DataFrame.
:param file_path: Path to the data file.
:return: DataFrame containing the numerical data.
"""
df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True, nrows=1)
return df
def _load_all_data(self) -> GroupDataset:
"""
Loads all data files based on the grouping dictionary and returns a nested list.
:return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
"""
data = []
# Find the maximum group index to determine the list size
max_group_idx = len(self.file_index) if self.file_index else 0
# Handle case when file_index is empty
if max_group_idx == 0:
raise ValueError("No file index provided; file_index is empty.")
# Initialize empty lists
for _ in range(max_group_idx):
data.append([])
# Fill the list with data
for group_idx, file_list in self.file_index.items():
group_idx -= 1 # adjust due to undamage file
data[group_idx] = [self._load_dataframe(file) for file in file_list]
return data
def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
"""
Returns the list of DataFrames for the given group index.
:param group_idx: Index of the group.
:return: List of DataFrames.
"""
return self.data.get([group_idx, []])
def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
"""
Returns the column names for the given group and file indices.
:param group_idx: Index of the group.
:param file_idx: Index of the file in the group.
:return: List of column names.
"""
if group_idx in self.data and len(self.data[group_idx]) > file_idx:
return self.data[group_idx][file_idx].columns.tolist()
return []
def get_data_info(self):
"""
Print information about the loaded data structure.
Adapted for when self.data is a List instead of a Dictionary.
"""
if isinstance(self.data, list):
# For each sublist in self.data, get the type names of all elements
pprint(
[
(
[type(item).__name__ for item in sublist]
if isinstance(sublist, list)
else type(sublist).__name__
)
for sublist in self.data
]
)
else:
pprint(
{
key: [type(df).__name__ for df in value]
for key, value in self.data.items()
}
if isinstance(self.data, dict)
else type(self.data).__name__
)
def _create_vector_column_index(self) -> VectorColumnIndices:
vector_col_idx: VectorColumnIndices = []
y = 0
for data_group in self.data: # len(data_group[i]) = 5
for j in data_group: # len(j[i]) =
c: VectorColumnIndex = []
x = 0
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
c.append(x + y)
x += 5
vector_col_idx.append(c)
y += 1
return vector_col_idx # TODO: refactor this so that it returns just from first data_group without using for loops through the self.data that seems unnecessary
def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idxs = self._create_vector_column_index()
for i, group in enumerate(self.data):
# add 1 to all indices to account for 'Time' being at position 0
for j, df in enumerate(group):
idx = [_ + 1 for _ in idxs[j]]
# slice out the desired columns, copy into a fresh DataFrame,
# then overwrite self.data[i][j] with it
self.data[i][j] = df.iloc[:, idx].copy()
# TODO: if !overwrite:
def create_limited_sensor_vector_column(self, overwrite=True):
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)): # damage(s)
for j in range(len(self.data[i])): # col(s)
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specifid 'Real' colmns
# First, we add 1 to all indices to acount for 'Time' being at positiion 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjustedd Real indices
all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
"""
Export the processed data to CSV files in the required folder structure.
:param output_dir: Directory to save the CSV files.
:param file_prefix: Prefix for the output filenames.
"""
for group_idx, group in enumerate(self.file_index, start=0):
group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
os.makedirs(group_folder, exist_ok=True)
for test_idx, df in enumerate(group, start=1):
out1 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv")
cols_to_export = [0, 1] if self.include_time else [1]
df.iloc[:, cols_to_export].to_csv(out1, index=False)
out2 = os.path.join(group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv")
cols_to_export = [0, 2] if self.include_time else [2]
df.iloc[:, cols_to_export].to_csv(out2, index=False)
# def create_damage_files(base_path, output_base, prefix):
# # Initialize colorama
# init(autoreset=True)
# # Generate column labels based on expected duplication in input files
# columns = ["Real"] + [
# f"Real.{i}" for i in range(1, 30)
# ] # Explicitly setting column names
# sensor_end_map = {
# 1: "Real.25",
# 2: "Real.26",
# 3: "Real.27",
# 4: "Real.28",
# 5: "Real.29",
# }
# # Define the damage scenarios and the corresponding original file indices
# damage_scenarios = {
# 1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv
# 2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv
# 3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
# 4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
# 5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv
# 6: range(26, 31), # Damage 6 files from zzzAD26.csv to zzzAD30.csv
# }
# damage_pad = len(str(len(damage_scenarios)))
# test_pad = len(str(30))
# for damage, files in damage_scenarios.items():
# for i, file_index in enumerate(files, start=1):
# # Load original data file
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv(
# file_path, sep="\t", skiprows=10
# ) # Read with explicit column names
# top_sensor = columns[i - 1]
# print(top_sensor, type(top_sensor))
# output_file_1 = os.path.join(
# output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
# )
# print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
# print("Taking datetime column on index 0...")
# print(f"Taking `{top_sensor}`...")
# os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
# df[["Time", top_sensor]].to_csv(output_file_1, index=False)
# print(Fore.GREEN + "Done")
# bottom_sensor = sensor_end_map[i]
# output_file_2 = os.path.join(
# output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
# )
# print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
# print("Taking datetime column on index 0...")
# print(f"Taking `{bottom_sensor}`...")
# os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
# df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
# print(Fore.GREEN + "Done")
# print("---")
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>")
sys.exit(1)
base_path = sys.argv[1]
output_base = sys.argv[2]
prefix = sys.argv[3] # Define output directory
# Create output folders if they don't exist
# for i in range(1, 7):
# os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
create_damage_files(base_path, output_base, prefix)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__":
main()

View File

@@ -1,16 +0,0 @@
from src.ml.model_selection import inference_model
from joblib import load
x = 30
file = f"D:/thesis/data/dataset_B/zzzBD{x}.TXT"
sensor = 1
model = {"SVM": f"D:/thesis/models/sensor{sensor}/SVM.joblib",
"SVM with PCA": f"D:/thesis/models/sensor{sensor}/SVM with StandardScaler and PCA.joblib",
"XGBoost": f"D:/thesis/models/sensor{sensor}/XGBoost.joblib"}
index = ((x-1) % 5) + 1
inference_model(model["SVM"], file, column_question=index)
print("---")
inference_model(model["SVM with PCA"], file, column_question=index)
print("---")
inference_model(model["XGBoost"], file, column_question=index)

View File

@@ -1,14 +1,13 @@
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from joblib import load
from sklearn.model_selection import train_test_split as sklearn_split
def create_ready_data(
stft_data_path: str,
stratify: np.ndarray = None,
) -> tuple[pd.DataFrame, np.ndarray]:
) -> tuple:
"""
Create a stratified train-test split from STFT data.
@@ -22,13 +21,13 @@ def create_ready_data(
Returns:
--------
tuple
(pd.DataFrame, np.ndarray) - Combined data and corresponding labels
(X_train, X_test, y_train, y_test) - Split datasets
"""
ready_data = []
for file in os.listdir(stft_data_path):
ready_data.append(pd.read_csv(os.path.join(stft_data_path, file), skiprows=1))
ready_data.append(pd.read_csv(os.path.join(stft_data_path, file)))
y_data = [i for i in range(len(ready_data))] # TODO: Should be replaced with actual desired labels
y_data = [i for i in range(len(ready_data))]
# Combine all dataframes in ready_data into a single dataframe
if ready_data: # Check if the list is not empty
@@ -56,216 +55,3 @@ def create_ready_data(
y = np.array([])
return X, y
def train_and_evaluate_model(
model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None
):
"""
Train a machine learning model, evaluate its performance, and optionally export it.
This function trains the provided model on the training data, evaluates its
performance on test data using accuracy score, and can save the trained model
to disk if an export path is provided.
Parameters
----------
model : estimator object
The machine learning model to train.
model_name : str
Name of the model, used for the export filename and in the returned results.
sensor_label : str
Label identifying which sensor's data the model is being trained on.
x_train : array-like or pandas.DataFrame
The training input samples.
y_train : array-like
The target values for training.
x_test : array-like or pandas.DataFrame
The test input samples.
y_test : array-like
The target values for testing.
export : str, optional
Directory path where the trained model should be saved. If None, model won't be saved.
Returns
-------
dict
Dictionary containing:
- 'model': model_name (str)
- 'sensor': sensor_label (str)
- 'accuracy': accuracy percentage (float)
Example
-------
>>> from sklearn.svm import SVC
>>> from sklearn.model_selection import train_test_split
>>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
>>> result = train_and_evaluate_model(
... SVC(),
... "SVM",
... "sensor1",
... X_train,
... y_train,
... X_test,
... y_test,
... export="models/sensor1"
... )
>>> print(f"Model accuracy: {result['accuracy']:.2f}%")
"""
from sklearn.metrics import accuracy_score
result = {"model": model_name, "sensor": sensor_label, "success": False}
try:
import time
start_time = time.time()
# Train the model
model.fit(x_train, y_train)
result["elapsed_time_training"] = time.time() - start_time
try:
# Predict on the test set (validation)
start_time = time.time()
y_pred = model.predict(x_test)
result["elapsed_time_validation"] = time.time() - start_time
result["y_pred"] = y_pred # Convert to numpy array
except Exception as e:
result["error"] = f"Prediction error: {str(e)}"
return result
# Calculate accuracy
try:
accuracy = accuracy_score(y_test, y_pred) * 100
result["accuracy"] = accuracy
except Exception as e:
result["error"] = f"Accuracy calculation error: {str(e)}"
return result
# Export model if requested
if export:
try:
import joblib
full_path = os.path.join(export, f"{model_name}.joblib")
os.makedirs(os.path.dirname(full_path), exist_ok=True)
joblib.dump(model, full_path)
print(f"Model saved to {full_path}")
except Exception as e:
print(f"Warning: Failed to export model to {export}: {str(e)}")
result["export_error"] = str(e)
# Continue despite export error
result["success"] = True
return result
except Exception as e:
result["error"] = f"Training error: {str(e)}"
return result
def plot_confusion_matrix(results_sensor, y_test, title):
"""
Plot confusion matrices for each model in results_sensor1.
Parameters:
-----------
results_sensor1 : list
List of dictionaries containing model results.
x_test1 : array-like
Test input samples.
y_test : array-like
True labels for the test samples.
Returns:
--------
None
This function will display confusion matrices for each model in results_sensor1.
Example
-------
>>> results_sensor1 = [
... {'model': 'model1', 'accuracy': 95.0},
... {'model': 'model2', 'accuracy': 90.0}
... ]
>>> x_test1 = np.random.rand(100, 10) # Example test data
>>> y_test = np.random.randint(0, 2, size=100) # Example true labels
>>> plot_confusion_matrix(results_sensor1, x_test1, y_test)
"""
# Iterate through each model result and plot confusion matrix
for i in results_sensor:
model = load(f"D:/thesis/models/{i['sensor']}/{i['model']}.joblib")
cm = confusion_matrix(y_test, i['y_pred']) # -> ndarray
# get the class labels
labels = model.classes_
# Plot
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues) # You can change colormap
plt.title(f"{title}")
def calculate_label_percentages(labels):
"""
Calculate and print the percentage distribution of unique labels in a numpy array.
Parameters:
labels (np.array): Input array of labels.
Returns:
None
"""
# Count occurrences of each unique label
unique, counts = np.unique(labels, return_counts=True)
# Calculate percentages
percentages = (counts / len(labels)) * 100
# Build and print the result string
result = "\n".join([f"Label {label}: {percentage:.2f}%" for label, percentage in zip(unique, percentages)])
return print(result)
def inference_model(
models, raw_file, column_question: int = None
):
"""
Perform inference using a trained machine learning model on a raw vibration data file with questioned column grid.
Parameters
----------
model : dict with some exported model path
The trained machine learning model to use for inference.
x_test : array-like or pandas.DataFrame
The input samples for which predictions are to be made.
export : str, optional
Directory path where the predictions should be saved. If None, predictions won't be saved.
Returns
-------
np.ndarray
Array of predicted values.
Example
-------
>>> from sklearn.svm import SVC
>>> model = {"SVM": "models/sensor1/SVM.joblib", "SVM with PCA": "models/sensor1/SVM_with_PCA.joblib"}
>>> inference_model(model["SVM"], "zzzAD1.TXT", column_question=1)
"""
df = pd.read_csv(raw_file, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
col_idx = []
for i in range(1,6):
idx = [i, i+5, i+10, i+15, i+20, i+25]
col_idx.append(idx)
vibration_data = df.iloc[:, column_question].values
# Perform STFT
from scipy.signal import stft, hann
freq, times, Zxx = stft(
vibration_data,
fs=1024,
window=hann(1024),
nperseg=1024,
noverlap=1024-512
)
data = pd.DataFrame(np.abs(Zxx).T, columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, 1024/2, Zxx.shape[1])])
data = data.rename(columns={"Freq_0.00": "00"}) # To match the model input format
model = load(models) # Load the model from the provided path
return calculate_label_percentages(model.predict(data.iloc[:21,:]))

View File

@@ -25,10 +25,13 @@ window = hann(window_size)
Fs = 1024
# Number of damage cases (adjust as needed)
num_damage_cases = 0 # Change to 30 if you have 30 damage cases
num_damage_cases = 6 # Change to 30 if you have 30 damage cases
# Number of test runs per damage case
num_test_runs = 5
# Function to perform STFT and return magnitude
def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size):
def compute_stft(vibration_data):
frequencies, times, Zxx = stft(
vibration_data,
fs=Fs,
@@ -39,13 +42,9 @@ def compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_si
stft_magnitude = np.abs(Zxx)
return stft_magnitude.T # Transpose to have frequencies as columns
def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop_size, output_dirs=output_dirs):
def process_damage_case(damage_num):
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
if damage_num == 0:
# Number of test runs per damage case
num_test_runs = 120
else:
num_test_runs = 5
# Check if the damage folder exists
if not os.path.isdir(damage_folder):
print(f"Folder {damage_folder} does not exist. Skipping...")
@@ -80,29 +79,20 @@ def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
continue
# Extract vibration data (assuming the second column is sensor data)
vibration_data = df.iloc[:, 1].values
# Perform STFT
stft_magnitude = compute_stft(vibration_data, Fs=Fs, window_size=window_size, hop_size=hop_size)
stft_magnitude = compute_stft(vibration_data)
# Convert STFT result to DataFrame
df_stft = pd.DataFrame(
stft_magnitude,
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
)
# only inlcude 21 samples vector features for first 45 num_test_runs else include 22 samples vector features
if damage_num == 0:
print(f"Processing damage_num = 0, test_num = {test_num}")
if test_num <= 45:
df_stft = df_stft.iloc[:22, :]
print(f"Reduced df_stft shape (21 samples): {df_stft.shape}")
else:
df_stft = df_stft.iloc[:21, :]
print(f"Reduced df_stft shape (22 samples): {df_stft.shape}")
# Append to the aggregated list
aggregated_stft.append(df_stft)
print(sum(df.shape[0] for df in aggregated_stft))
# Concatenate all STFT DataFrames vertically
if aggregated_stft:
@@ -115,13 +105,11 @@ def process_damage_case(damage_num, Fs=Fs, window_size=window_size, hop_size=hop
)
# Save the aggregated STFT to CSV
with open(output_file, 'w') as file:
file.write('sep=,\n')
df_aggregated.to_csv(output_file, index=False)
df_aggregated.to_csv(output_file, index=False)
print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
else:
print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")
if __name__ == "__main__": # Added main guard for multiprocessing
with multiprocessing.Pool() as pool:
pool.map(process_damage_case, range(0, num_damage_cases + 1))
pool.map(process_damage_case, range(1, num_damage_cases + 1))

360
data/QUGS/convert.py Normal file
View File

@@ -0,0 +1,360 @@
import pandas as pd
import os
import re
import sys
import numpy as np
from colorama import Fore, Style, init
from typing import TypedDict, Dict, List
from joblib import load
from pprint import pprint
# class DamageFilesIndices(TypedDict):
# damage_index: int
# files: list[int]
OriginalSingleDamageScenarioFilePath = str
DamageScenarioGroupIndex = int
OriginalSingleDamageScenario = pd.DataFrame
SensorIndex = int
VectorColumnIndex = List[SensorIndex]
VectorColumnIndices = List[VectorColumnIndex]
DamageScenarioGroup = List[OriginalSingleDamageScenario]
GroupDataset = List[DamageScenarioGroup]
class DamageFilesIndices(TypedDict):
damage_index: int
files: List[str]
def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
prefix: str = kwargs.get("prefix", "zzzAD")
extension: str = kwargs.get("extension", ".TXT")
num_damage: int = kwargs.get("num_damage")
file_index_start: int = kwargs.get("file_index_start")
col: int = kwargs.get("col")
base_path: str = kwargs.get("base_path")
damage_scenarios = {}
a = file_index_start
b = col + 1
for i in range(1, num_damage + 1):
damage_scenarios[i] = range(a, b)
a += col
b += col
# return damage_scenarios
x = {}
for damage, files in damage_scenarios.items():
x[damage] = [] # Initialize each key with an empty list
for i, file_index in enumerate(files, start=1):
if base_path:
x[damage].append(
os.path.normpath(
os.path.join(base_path, f"{prefix}{file_index}{extension}")
)
)
# if not os.path.exists(file_path):
# print(Fore.RED + f"File {file_path} does not exist.")
# continue
else:
x[damage].append(f"{prefix}{file_index}{extension}")
return x
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv( file_path, sep="\t", skiprows=10) # Read with explicit column names
class DataProcessor:
def __init__(self, file_index: DamageFilesIndices, cache_path: str = None):
self.file_index = file_index
if cache_path:
self.data = load(cache_path)
else:
self.data = self._load_all_data()
def _extract_column_names(self, file_path: str) -> List[str]:
"""
Extracts column names from the header of the given file.
Assumes the 6th line contains column names.
:param file_path: Path to the data file.
:return: List of column names.
"""
with open(file_path, "r") as f:
header_lines = [next(f) for _ in range(12)]
# Extract column names from the 6th line
channel_line = header_lines[10].strip()
tokens = re.findall(r'"([^"]+)"', channel_line)
if not channel_line.startswith('"'):
first_token = channel_line.split()[0]
tokens = [first_token] + tokens
return tokens # Prepend 'Time' column if applicable
def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
"""
Loads a single data file into a pandas DataFrame.
:param file_path: Path to the data file.
:return: DataFrame containing the numerical data.
"""
col_names = self._extract_column_names(file_path)
df = pd.read_csv(
file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
)
df.columns = col_names
return df
def _load_all_data(self) -> GroupDataset:
"""
Loads all data files based on the grouping dictionary and returns a nested list.
:return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
"""
data = []
# Find the maximum group index to determine the list size
max_group_idx = max(self.file_index.keys()) if self.file_index else 0
# Initialize empty lists
for _ in range(max_group_idx):
data.append([])
# Fill the list with data
for group_idx, file_list in self.file_index.items():
# Adjust index to be 0-based
list_idx = group_idx - 1
data[list_idx] = [self._load_dataframe(file) for file in file_list]
return data
def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
"""
Returns the list of DataFrames for the given group index.
:param group_idx: Index of the group.
:return: List of DataFrames.
"""
return self.data.get([group_idx, []])
def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
"""
Returns the column names for the given group and file indices.
:param group_idx: Index of the group.
:param file_idx: Index of the file in the group.
:return: List of column names.
"""
if group_idx in self.data and len(self.data[group_idx]) > file_idx:
return self.data[group_idx][file_idx].columns.tolist()
return []
def get_data_info(self):
"""
Print information about the loaded data structure.
Adapted for when self.data is a List instead of a Dictionary.
"""
if isinstance(self.data, list):
# For each sublist in self.data, get the type names of all elements
pprint(
[
(
[type(item).__name__ for item in sublist]
if isinstance(sublist, list)
else type(sublist).__name__
)
for sublist in self.data
]
)
else:
pprint(
{
key: [type(df).__name__ for df in value]
for key, value in self.data.items()
}
if isinstance(self.data, dict)
else type(self.data).__name__
)
def _create_vector_column_index(self) -> VectorColumnIndices:
vector_col_idx: VectorColumnIndices = []
y = 0
for data_group in self.data: # len(data_group[i]) = 5
for j in data_group: # len(j[i]) =
c: VectorColumnIndex = [] # column vector c_{j}
x = 0
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
c.append(x + y)
x += 5
vector_col_idx.append(c)
y += 1
return vector_col_idx
def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)):
for j in range(len(self.data[i])):
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specified 'Real' columns
# First, we add 1 to all indices to account for 'Time' being at position 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjusted Real indices
all_indices = [0] + real_indices
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def create_limited_sensor_vector_column(self, overwrite=True):
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)): # damage(s)
for j in range(len(self.data[i])): # col(s)
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specifid 'Real' colmns
# First, we add 1 to all indices to acount for 'Time' being at positiion 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjustedd Real indices
all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
"""
Export the processed data to CSV files in the required folder structure.
:param output_dir: Directory to save the CSV files.
:param file_prefix: Prefix for the output filenames.
"""
for group_idx, group in enumerate(self.data, start=1):
group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
os.makedirs(group_folder, exist_ok=True)
for test_idx, df in enumerate(group, start=1):
# Ensure columns are named uniquely if duplicated
df = df.copy()
df.columns = ["Time", "Real_0", "Real_1"] # Rename
# Export first Real column
out1 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
)
df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
out1, index=False
)
# Export last Real column
out2 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
)
df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
out2, index=False
)
def create_damage_files(base_path, output_base, prefix):
# Initialize colorama
init(autoreset=True)
# Generate column labels based on expected duplication in input files
columns = ["Real"] + [
f"Real.{i}" for i in range(1, 30)
] # Explicitly setting column names
sensor_end_map = {
1: "Real.25",
2: "Real.26",
3: "Real.27",
4: "Real.28",
5: "Real.29",
}
# Define the damage scenarios and the corresponding original file indices
damage_scenarios = {
1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv
2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv
3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv
6: range(26, 31), # Damage 6 files from zzzAD26.csv to zzzAD30.csv
}
damage_pad = len(str(len(damage_scenarios)))
test_pad = len(str(30))
for damage, files in damage_scenarios.items():
for i, file_index in enumerate(files, start=1):
# Load original data file
file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
df = pd.read_csv(
file_path, sep="\t", skiprows=10
) # Read with explicit column names
top_sensor = columns[i - 1]
print(top_sensor, type(top_sensor))
output_file_1 = os.path.join(
output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
)
print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{top_sensor}`...")
os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
df[["Time", top_sensor]].to_csv(output_file_1, index=False)
print(Fore.GREEN + "Done")
bottom_sensor = sensor_end_map[i]
output_file_2 = os.path.join(
output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
)
print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
print("Taking datetime column on index 0...")
print(f"Taking `{bottom_sensor}`...")
os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
print(Fore.GREEN + "Done")
print("---")
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>")
sys.exit(1)
base_path = sys.argv[1]
output_base = sys.argv[2]
prefix = sys.argv[3] # Define output directory
# Create output folders if they don't exist
# for i in range(1, 7):
# os.makedirs(os.path.join(output_base, f'DAMAGE_{i}'), exist_ok=True)
create_damage_files(base_path, output_base, prefix)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__":
main()

View File

@@ -1,52 +1,25 @@
from data_preprocessing import *
from convert import *
from joblib import dump, load
# b = generate_damage_files_index(
# num_damage=6,
# file_index_start=1,
# col=5,
# base_path="D:/thesis/data/dataset_B",
# prefix="zzzBD",
# # undamage_file="zzzBU.TXT"
# )
# Example: Generate tuples with a special group of df0 at the beginning
special_groups_A = [
{'df_name': 'zzzAU.TXT', 'position': 0, 'size': 5} # Add at beginning
]
special_groups_B = [
{'df_name': 'zzzBU.TXT', 'position': 0, 'size': 5} # Add at beginning
]
# Generate the tuples with the special group
a_complement = [(comp)
for n in range(1, 31)
for comp in complement_pairs(n)]
a = generate_df_tuples(special_groups=a_complement, prefix="zzzAD")
# b_complement = [(comp)
# for n in range(1, 31)
# for comp in complement_pairs(n)]
# b = generate_df_tuples(special_groups=b_complement, prefix="zzzBD")
# a = generate_damage_files_index(
# num_damage=6,
# file_index_start=1,
# col=5,
# base_path="D:/thesis/data/dataset_A",
# prefix="zzzAD",
# # undamage_file="zzzBU.TXT"
# num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
# )
data_A = DataProcessor(file_index=a, base_path="D:/thesis/data/dataset_A", include_time=True)
# data_A.create_vector_column(overwrite=True)
# # data_A.create_limited_sensor_vector_column(overwrite=True)
data_A.export_to_csv("D:/thesis/data/converted/raw")
b = generate_damage_files_index(
num_damage=6,
file_index_start=1,
col=5,
base_path="D:/thesis/data/dataset_B",
prefix="zzzBD",
)
# data_A = DataProcessor(file_index=a)
# # data.create_vector_column(overwrite=True)
# data_A.create_limited_sensor_vector_column(overwrite=True)
# data_A.export_to_csv("D:/thesis/data/converted/raw")
# data_B = DataProcessor(file_index=b, base_path="D:/thesis/data/dataset_B", include_time=True)
# data_B.create_vector_column(overwrite=True)
# # data_B.create_limited_sensor_vector_column(overwrite=True)
# data_B.export_to_csv("D:/thesis/data/converted/raw_B")
data_B = DataProcessor(file_index=b)
# data.create_vector_column(overwrite=True)
data_B.create_limited_sensor_vector_column(overwrite=True)
data_B.export_to_csv("D:/thesis/data/converted/raw_B")
# a = load("D:/cache.joblib")
# breakpoint()
# breakpoint()

View File

@@ -3,7 +3,7 @@ Alur keseluruhan penelitian ini dilakukan melalui tahapan-tahapan sebagai beriku
\begin{figure}[H]
\centering
\includegraphics[width=0.3\linewidth]{chapters/img/flow.png}
\includegraphics[width=0.3\linewidth]{chapters/id/flow.png}
\caption{Diagram alir tahapan penelitian}
\label{fig:flowchart}
\end{figure}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 188 KiB

View File

@@ -1,78 +0,0 @@
% % A new command that enables us to enter bi-lingual (Slovene and English) terms
% % syntax: \addterm[options]{label}{Slovene}{Slovene first use}{English}{Slovene
% % description}
% \newcommand{\addterm}[6][]{
% \newglossaryentry{#2}{
% name={#3 (angl.\ #5)},
% first={#4 (\emph{#5})},
% text={#3},
% sort={#3},
% description={#6},
% #1 % pass additional options to \newglossaryentry
% }
% }
% % A new command that enables us to enter (English) acronyms with bi-lingual
% % (Slovene and English) long versions
% % syntax: \addacronym[options]{label}{abbreviation}{Slovene long}{Slovene first
% % use long}{English long}{Slovene description}
% \newcommand{\addacronym}[7][]{
% % Create the main glossary entry with \newacronym
% % \newacronym[key-val list]{label}{abbrv}{long}
% \newacronym[
% name={#4 (angl.\ #6,\ #3)},
% first={\emph{#5} (angl.\ \emph{#6},\ \emph{#3})},
% sort={#4},
% description={#7},
% #1 % pass additional options to \newglossaryentry
% ]
% {#2}{#3}{#4}
% % Create a cross-reference from the abbreviation to the main glossary entry by
% % creating an auxiliary glossary entry (note: we set the label of this entry
% % to '<original label>_auxiliary' to avoid clashes)
% \newglossaryentry{#2_auxiliary}{
% name={#3},
% sort={#3},
% description={\makefirstuc{#6}},
% see=[See:]{#2}
% }
% }
% % Change the text of the cross-reference links to the Slovene long version.
% \renewcommand*{\glsseeitemformat}[1]{\emph{\acrlong{#1}}.}
% Define the Indonesian term and link it to the English term
\newglossaryentry{jaringansaraf}{
name=Jaringan Saraf,
description={The Indonesian term for \gls{nn}}
}
% \newglossaryentry{pemelajaranmesin}{
% name=Pemelajaran Mesin,
% description={Lihat \gls{machinelearning}}
% }
% Define the English term and link it to its acronym
\newglossaryentry{neuralnetwork}{
name=Neural Network,
description={A computational model inspired by the human brain, see \gls{nn}}
}
% \newglossaryentry{machinelearning}{
% name=Machine Learning,
% description={A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}}
% \newglossaryentry{pemelajaranmesin}{
% name={pemelajaran mesin (angl.\ #5)},
% first={pemelajaran mesin (\emph{machine learning})},
% text={pemelajaran mesin},
% sort={ },
% description={#6},
% #1 % pass additional options to \newglossaryentry
% }
\longnewglossaryentry{machinelearning}{name={machine learning}}
{A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
\newterm[see={machinelearning}]{pemelajaranmesin}
% \newglossaryentry{pemelajaran mesin}{}
% \addterm{machinelearning}{pemelajaran mesin}{pemelajaran mesin}{machine learning}{A program or system that trains a model from input data. The trained model can make useful predictions from new (never-before-seen) data drawn from the same distribution as the one used to train the model.}
\newacronym
[description={statistical pattern recognition technique}]
{svm}{SVM}{support vector machine}

View File

@@ -1,18 +1,14 @@
\documentclass[draftmark]{thesis}
% Metadata
\title{Prediksi Lokasi Kerusakan dengan Machine Learning}
\author{Rifqi Damar Panuluh}
\date{\today}
\authorid{20210110224}
\firstadvisor{Ir. Muhammad Ibnu Syamsi, Ph.D.}
\secondadvisor{}
\headdepartement{Puji Harsanto, S.T., M.T., Ph.D.}
\headdepartementid{19740607201404123064}
\faculty{Fakultas Teknik}
\program{Program Studi Teknik Sipil}
\university{Universitas Muhammadiyah Yogyakarta}
\yearofsubmission{2025}
% Title Information
\setthesisinfo
{Prediksi Lokasi Kerusakan dengan Machine Learning}
{Rifqi Damar Panuluh}
{20210110224}
{PROGRAM STUDI TEKNIK SIPIL}
{FAKULTAS TEKNIK}
{UNIVERSITAS MUHAMMADIYAH YOGYAKARTA}
{2025}
% Input preamble
\input{preamble/packages}
@@ -20,19 +16,22 @@
\input{preamble/macros}
\begin{document}
% \input{frontmatter/maketitle}
% \input{frontmatter/maketitle_secondary}
\maketitle
\frontmatter
% \input{frontmatter/approval}\clearpage
% \input{frontmatter/originality}\clearpage
% \input{frontmatter/acknowledgement}\clearpage
% \tableofcontents
\input{frontmatter/approval}\clearpage
\input{frontmatter/originality}\clearpage
\input{frontmatter/acknowledgement}\clearpage
\tableofcontents
\clearpage
\mainmatter
\pagestyle{fancyplain}
% Include content
\include{content/abstract}
\include{content/introduction}
\include{chapters/01_introduction}
\include{chapters/id/02_literature_review/index}
\include{chapters/id/03_methodology/index}
\include{content/chapter2}
\include{content/conclusion}
% Bibliography
% \bibliographystyle{IEEEtran}

11
latex/metadata.tex Normal file
View File

@@ -0,0 +1,11 @@
\newcommand{\studentname}{Rifqi Damar Panuluh}
\newcommand{\studentid}{20210110224}
\newcommand{\thesistitle}{Prediksi Lokasi Kerusakan dengan Machine Learning}
\newcommand{\firstadvisor}{Ir. Muhammad Ibnu Syamsi, Ph.D.}
\newcommand{\secondadvisor}{}
\newcommand{\headdepartement}{Puji Harsanto, S.T. M.T., Ph.D.}
\newcommand{\headdepartementid}{19740607201404123064}
\newcommand{\faculty}{Fakultas Teknik}
\newcommand{\program}{Teknik Sipil}
\newcommand{\university}{Universitas Muhammadiyah Yogyakarta}
\newcommand{\yearofsubmission}{2025}

View File

@@ -1,7 +1,7 @@
\NeedsTeXFormat{LaTeX2e}
\ProvidesClass{thesis}[2025/05/10 Bachelor Thesis Class]
\newif\if@draftmark \@draftmarkfalse
\newif\if@draftmark
\@draftmarkfalse
\DeclareOption{draftmark}{\@draftmarktrue}
@@ -12,7 +12,6 @@
\RequirePackage{polyglossia}
\RequirePackage{fontspec}
\RequirePackage{titlesec}
\RequirePackage{titling}
\RequirePackage{fancyhdr}
\RequirePackage{geometry}
\RequirePackage{setspace}
@@ -25,31 +24,30 @@
\RequirePackage{svg} % Allows including SVG images directly
\RequirePackage{indentfirst} % Makes first paragraph after headings indented
\RequirePackage{float} % Provides [H] option to force figure/table placement
\RequirePackage[style=apa, backend=biber]{biblatex}
\RequirePackage[acronym, nogroupskip, toc]{glossaries}
% Polyglossia set language
\setdefaultlanguage[variant=indonesian]{malay} % Proper Indonesian language setup
\setotherlanguage{english} % Enables English as secondary language
\DefineBibliographyStrings{english}{% % Customizes bibliography text
andothers={dkk\adddot}, % Changes "et al." to "dkk."
pages={hlm\adddot}, % Changes "pp." to "hlm."
}
+ \setdefaultlanguage[variant=indonesian]{malay} % Proper Indonesian language setup
+ \setotherlanguage{english} % Enables English as secondary language
+ \DefineBibliographyStrings{english}{% % Customizes bibliography text
+ andothers={dkk\adddot}, % Changes "et al." to "dkk."
+ pages={hlm\adddot}, % Changes "pp." to "hlm."
+ }
% Conditionally load the watermark package and settings
\if@draftmark
\RequirePackage{draftwatermark}
\SetWatermarkText{nuluh/thesis (wip) [draft: \today]}
\SetWatermarkText{nuluh/thesis (wip) draft: \today}
\SetWatermarkColor[gray]{0.8} % Opacity: 0.8 = 20% transparent
\SetWatermarkFontSize{1.5cm}
\SetWatermarkAngle{90}
\SetWatermarkHorCenter{1.5cm}
\RequirePackage[left]{lineno}
\linenumbers
\fi
% Page layout
\geometry{left=4cm, top=3cm, right=3cm, bottom=3cm}
\geometry{left=3cm, top=3cm, right=3cm, bottom=3cm}
\setlength{\parskip}{0.5em}
\setlength{\parindent}{0pt}
\onehalfspacing
% Fonts
@@ -58,45 +56,19 @@
\setsansfont{Arial}
\setmonofont{Courier New}
\makeatletter
% Extracting the Year from \today
\newcommand{\theyear}{%
\expandafter\@car\expandafter\@gobble\the\year\@nil
% Metadata commands
\input{metadata}
\newcommand{\setthesisinfo}[7]{%
\renewcommand{\thesistitle}{#1}%
\renewcommand{\studentname}{#2}%
\renewcommand{\studentid}{#3}%
\renewcommand{\program}{#4}%
\renewcommand{\faculty}{#5}%
\renewcommand{\university}{#6}%
\renewcommand{\yearofsubmission}{#7}%
}
% Declare internal macros as initially empty
\newcommand{\@authorid}{}
\newcommand{\@firstadvisor}{}
\newcommand{\@secondadvisor}{}
\newcommand{\@headdepartement}{}
\newcommand{\@headdepartementid}{}
\newcommand{\@faculty}{}
\newcommand{\@program}{}
\newcommand{\@university}{}
\newcommand{\@yearofsubmission}{}
% Define user commands to set these values.
\newcommand{\authorid}[1]{\gdef\@authorid{#1}}
\newcommand{\firstadvisor}[1]{\gdef\@firstadvisor{#1}}
\newcommand{\secondadvisor}[1]{\gdef\@secondadvisor{#1}}
\newcommand{\headdepartement}[1]{\gdef\@headdepartement{#1}}
\newcommand{\headdepartementid}[1]{\gdef\@headdepartementid{#1}}
\newcommand{\faculty}[1]{\gdef\@faculty{#1}}
\newcommand{\program}[1]{\gdef\@program{#1}}
\newcommand{\university}[1]{\gdef\@university{#1}}
\newcommand{\yearofsubmission}[1]{\gdef\@yearofsubmission{#1}}
% Now expose robust the getters to access the values
\newcommand{\theauthorid}{\@authorid}
\newcommand{\thefirstadvisor}{\@firstadvisor}
\newcommand{\thesecondadvisor}{\@secondadvisor}
\newcommand{\theheaddepartement}{\@headdepartement}
\newcommand{\theheaddepartementid}{\@headdepartementid}
\newcommand{\thefaculty}{\@faculty}
\newcommand{\theprogram}{\@program}
\newcommand{\theuniversity}{\@university}
\newcommand{\theyearofsubmission}{\@yearofsubmission}
\makeatother
% % Header and footer
\fancypagestyle{fancy}{%
\fancyhf{}
@@ -138,6 +110,11 @@
\renewcommand{\cftchappresnum}{BAB~}
\renewcommand{\cftchapaftersnum}{\quad}
% \titlespacing*{\chapter}{0pt}{-10pt}{20pt}
% Redefine \maketitle
\renewcommand{\maketitle}{\input{frontmatter/maketitle}}
% Chapter & Section format
\renewcommand{\cftchapfont}{\normalsize\MakeUppercase}
% \renewcommand{\cftsecfont}{}
@@ -159,15 +136,11 @@
\setlength{\cftsubsecnumwidth}{2.5em}
\setlength{\cftfignumwidth}{5em}
\setlength{\cfttabnumwidth}{4em}
\renewcommand \cftchapdotsep{1} % https://tex.stackexchange.com/a/273764
\renewcommand \cftsecdotsep{1} % https://tex.stackexchange.com/a/273764
\renewcommand \cftsubsecdotsep{1} % https://tex.stackexchange.com/a/273764
\renewcommand \cftfigdotsep{1.5} % https://tex.stackexchange.com/a/273764
\renewcommand \cfttabdotsep{1.5} % https://tex.stackexchange.com/a/273764
\renewcommand \cftchapdotsep{1} % Denser dots (closer together) https://tex.stackexchange.com/a/273764
\renewcommand \cftsecdotsep{1} % Apply to sections too
\renewcommand \cftsubsecdotsep{1} % Apply to subsections too
\renewcommand{\cftchapleader}{\normalfont\cftdotfill{\cftsecdotsep}}
\renewcommand{\cftchappagefont}{\normalfont}
% Add Prefix in the Lof and LoT entries
\renewcommand{\cftfigpresnum}{\figurename~}
\renewcommand{\cfttabpresnum}{\tablename~}
@@ -192,147 +165,6 @@
% \renewcommand{\cfttoctitlefont}{\bfseries\MakeUppercase}
% \renewcommand{\cftaftertoctitle}{\vskip 2em}
% Defines a new glossary called notation
\newglossary[nlg]{notation}{not}{ntn}{Notation}
% Define the header for the location column
\providecommand*{\locationname}{Location}
% Define the new glossary style called 'mylistalt' for main glossaries
\makeatletter
\newglossarystyle{mylistalt}{%
% start the list, initializing glossaries internals
\renewenvironment{theglossary}%
{\glslistinit\begin{enumerate}}%
{\end{enumerate}}%
% suppress all headers/groupskips
\renewcommand*{\glossaryheader}{}%
\renewcommand*{\glsgroupheading}[1]{}%
\renewcommand*{\glsgroupskip}{}%
% main entries: let \item produce "1." etc., then break
\renewcommand*{\glossentry}[2]{%
\item \glstarget{##1}{\glossentryname{##1}}%
\mbox{}\\
\glossentrydesc{##1}\space
[##2] % appears on page x
}%
% sub-entries as separate paragraphs, still aligned
\renewcommand*{\subglossentry}[3]{%
\par
\glssubentryitem{##2}%
\glstarget{##2}{\strut}\space
\glossentrydesc{##2}\space ##3%
}%
}
% Define the new glossary style 'altlong3customheader' for notation
\newglossarystyle{altlong3customheader}{%
% The glossary will be a longtable environment with three columns:
% 1. Symbol (left-aligned)
% 2. Description (paragraph, width \glsdescwidth)
% 3. Location (paragraph, width \glspagelistwidth)
\renewenvironment{theglossary}%
{\begin{longtable}{lp{\glsdescwidth}p{\glspagelistwidth}}}%
{\end{longtable}}%
% Define the table header row
\renewcommand*{\symbolname}{Simbol}
\renewcommand*{\descriptionname}{Keterangan}
\renewcommand*{\locationname}{Halaman}
\renewcommand*{\glossaryheader}{%
\bfseries\symbolname & \bfseries\descriptionname & \bfseries\locationname \tabularnewline\endhead}%
% Suppress group headings (e.g., A, B, C...)
\renewcommand*{\glsgroupheading}[1]{}%
% Define how a main glossary entry is displayed
% ##1 is the entry label
% ##2 is the location list (page numbers)
\renewcommand{\glossentry}[2]{%
\glsentryitem{##1}% Inserts entry number if entrycounter option is used
\glstarget{##1}{\glossentryname{##1}} & % Column 1: Symbol (with hyperlink target)
\glossentrydesc{##1}\glspostdescription & % Column 2: Description (with post-description punctuation)
##2\tabularnewline % Column 3: Location list
}%
% Define how a sub-entry is displayed
% ##1 is the sub-entry level (e.g., 1 for first sub-level)
% ##2 is the entry label
% ##3 is the location list
\renewcommand{\subglossentry}[3]{%
& % Column 1 (Symbol) is left blank for sub-entries to create an indented look
\glssubentryitem{##2}% Inserts sub-entry number if subentrycounter is used
\glstarget{##2}{\strut}\glossentrydesc{##2}\glspostdescription & % Column 2: Description (target on strut for hyperlink)
##3\tabularnewline % Column 3: Location list
}%
% Define the skip between letter groups (if group headings were enabled)
% For 3 columns, we need 2 ampersands for a full blank row if not using \multicolumn
\ifglsnogroupskip
\renewcommand*{\glsgroupskip}{}%
\else
\renewcommand*{\glsgroupskip}{& & \tabularnewline}%
\fi
}
% Define a new style 'supercol' based on 'super' for acronyms glossaries
\newglossarystyle{supercol}{%
\setglossarystyle{super}% inherit everything from the original
% override just the main-entry format:
\renewcommand*{\glossentry}[2]{%
\glsentryitem{##1}%
\glstarget{##1}{\glossentryname{##1}}\space % <-- added colon here
&: \glossentrydesc{##1}\glspostdescription\space ##2\tabularnewline
}%
% likewise for subentries, if you want a colon there too:
\renewcommand*{\subglossentry}[3]{%
&:
\glssubentryitem{##2}%
\glstarget{##2}{\strut}\glossentryname{##2}\space % <-- and here
\glossentrydesc{##2}\glspostdescription\space ##3\tabularnewline
}%
}
\makeatother
% A new command that enables us to enter bi-lingual (Bahasa Indonesia and English) terms
% syntax: \addterm[options]{label}{Bahasa Indonesia}{Bahasa Indonesia first use}{English}{Bahasa Indonesia
% description}
\newcommand{\addterm}[6][]{
\newglossaryentry{#2}{
name={#3 (angl.\ #5)},
first={#4 (\emph{#5})},
text={#3},
sort={#3},
description={#6},
#1 % pass additional options to \newglossaryentry
}
}
% A new command that enables us to enter (English) acronyms with bi-lingual
% (Bahasa Indonesia and English) long versions
% syntax: \addacronym[options]{label}{abbreviation}{Bahasa Indonesia long}{Bahasa Indonesia first
% use long}{English long}{Bahasa Indonesia description}
\newcommand{\addacronym}[7][]{
% Create the main glossary entry with \newacronym
% \newacronym[key-val list]{label}{abbrv}{long}
\newacronym[
name={#4 (angl.\ #6,\ #3)},
first={\emph{#5} (angl.\ \emph{#6},\ \emph{#3})},
sort={#4},
description={#7},
#1 % pass additional options to \newglossaryentry
]
{#2}{#3}{#4}
% Create a cross-reference from the abbreviation to the main glossary entry by
% creating an auxiliary glossary entry (note: we set the label of this entry
% to '<original label>_auxiliary' to avoid clashes)
\newglossaryentry{#2_auxiliary}{
name={#3},
sort={#3},
description={\makefirstuc{#6}},
see=[See:]{#2}
}
}
% Change the text of the cross-reference links to the Bahasa Indonesia long version.
\renewcommand*{\glsseeitemformat}[1]{\emph{\acrlong{#1}}.}
% % Apply a custom fancyhdr layout only on the first page of each \chapter, and use no header/footer elsewhere
% % \let\oldchapter\chapter
% % \renewcommand{\chapter}{%