Revise README to simplify content and remove private visibility

Removed sections on access restrictions and copyright details.
Update latexdiff.yml
2026-01-29 12:14:35 +07:00 · 2025-06-04 15:11:21 +07:00 · 2025-06-04 15:00:49 +07:00 · 2025-06-04 14:21:39 +07:00 · 2025-06-04 13:41:33 +07:00 · 2025-06-04 13:34:44 +07:00
10 changed files with 489 additions and 139 deletions
--- a/.github/workflows/latex-lint.yml
+++ b/.github/workflows/latex-lint.yml
@@ -0,0 +1,52 @@
 name: LaTeX Lint
 on:
  push:
    branches:
      - main
      - dev
    paths:
      - 'latex/**/*.tex'
      - 'latex/main.tex'
  workflow_dispatch:
 jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Install chktex
        run: |
          sudo apt-get update
          sudo apt-get install -y chktex
      - name: Run chktex inside latex/
        working-directory: latex
        run: |
          TEX_FILES=$(find . -type f -name "*.tex")
          if [ -z "$TEX_FILES" ]; then
            echo "No .tex files found in latex/. Skipping lint."
            exit 0
          fi
          echo "🔍 Linting .tex files with chktex..."
          FAIL=0
          for f in $TEX_FILES; do
            echo "▶ Checking $f"
            # Run chktex and show output; capture error status
            if ! chktex "$f"; then
              echo "::warning file=$f::ChkTeX found issues in $f"
              FAIL=1
            fi
          done
          if [ $FAIL -ne 0 ]; then
            echo "::error::❌ Lint errors or warnings were found in one or more .tex files above."
            exit 1
          else
            echo "✅ All files passed chktex lint."
          fi
--- a/.github/workflows/latexdiff.yml
+++ b/.github/workflows/latexdiff.yml
@@ -0,0 +1,102 @@
 name: LaTeX Diff
 on:
  workflow_dispatch:
    inputs:
      base_branch:
        description: 'Base branch (older version)'
        required: true
      compare_branch:
        description: 'Compare branch (new version)'
        required: true
 jobs:
  latexdiff:
    runs-on: ubuntu-latest
    container:
      image: ghcr.io/xu-cheng/texlive-full:latest
      options: --user root
    steps:
      - name: Install latexpand (Perl script)
        run: |
          tlmgr init-usertree
          tlmgr install latexpand
      - name: Checkout base branch
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.base_branch }}
          path: base
      - name: Checkout compare branch
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.compare_branch }}
          path: compare
      - name: Create output folder
        run: mkdir -p diff_output
      - name: Flatten base/main.tex (with latexpand)
        run: |
          cd base/latex
          echo "📂 Listing files in base/latex:"
          ls -R
          echo "🔄 Flattening with latexpand..."
          latexpand --verbose --keep-comments --output=../../diff_output/base_flat.tex main.tex
          echo "✅ Preview of base_flat.tex:"
          head -n 50 ../../diff_output/base_flat.tex
      - name: Flatten compare/main.tex (with latexpand)
        run: |
          cd compare/latex
          echo "📂 Listing files in compare/latex:"
          ls -R
          echo "🔄 Flattening with latexpand..."
          latexpand --verbose --keep-comments --output=../../diff_output/compare_flat.tex main.tex
          echo "✅ Preview of compare_flat.tex:"
          head -n 50 ../../diff_output/compare_flat.tex
      - name: Generate diff.tex using latexdiff
        run: |
          latexdiff diff_output/base_flat.tex diff_output/compare_flat.tex > diff_output/diff.tex
      - name: Copy thesis.cls to diff_output
        run: cp compare/latex/thesis.cls diff_output/
      - name: Copy chapters/img into diff_output
        run: |
          # Create the same chapters/img path inside diff_output
          mkdir -p diff_output/chapters/img
          # Copy all images from compare branch into diff_output
          cp -R compare/latex/chapters/img/* diff_output/chapters/img/
      - name: Copy .bib files into diff_output
        run: |
          mkdir -p diff_output
          cp compare/latex/*.bib diff_output/
      - name: Override “\input{preamble/fonts}” in diff.tex
        run: |
          sed -i "/\\input{preamble\/fonts}/c % — replaced by CI: use TeX Gyre fonts instead of Times New Roman\/Arial\n\\\setmainfont{TeX Gyre Termes}\n\\\setsansfont{TeX Gyre Heros}\n\\\setmonofont{TeX Gyre Cursor}" diff_output/diff.tex
      - name: Print preview of diff.tex (after font override)
        run: |
          echo "📄 Preview of diff_output/diff.tex after font override:"
          head -n 50 diff_output/diff.tex
      - name: Compile diff.tex to PDF
        working-directory: diff_output
        continue-on-error: true
        run: |
          xelatex -interaction=nonstopmode diff.tex
          xelatex -interaction=nonstopmode diff.tex
      - name: Upload diff output files
        uses: actions/upload-artifact@v4
        with:
          name: latex-diff-output
          path: diff_output/
--- a/.github/workflows/latexmk.yml
+++ b/.github/workflows/latexmk.yml
@@ -0,0 +1,29 @@
 name: Render XeLaTeX on PR to dev
 on:
  pull_request:
    branches:
      - dev
 jobs:
  build-pdf:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Compile XeLaTeX
        uses: dante-ev/latex-action@2021-A
        with:
          root_file: main.tex
          working_directory: latex
          compiler: xelatex
          args: -interaction=nonstopmode -halt-on-error -file-line-error
          extra_system_packages: "fonts-freefont-otf"
      - name: Upload compiled PDF
        uses: actions/upload-artifact@v4
        with:
          name: compiled-pdf
          path: latex/main.pdf
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 data/**/*.csv
 .venv/
 *.pyc
 *.egg-info/
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,4 @@
 {
-  "python.analysis.extraPaths": ["./code/src/features"]
+  "python.analysis.extraPaths": ["./code/src/features"],
  "jupyter.notebookFileRoot": "${workspaceFolder}/code"
 }
--- a/README.md
+++ b/README.md
@@ -4,15 +4,14 @@ This repository contains the work related to my thesis, which focuses on damage
 **Note:** This repository does not contain the secondary data used in the analysis. The code is designed to work with data from the [QUGS (Qatar University Grandstand Simulator)](https://www.structuralvibration.com/benchmark/qugs/) dataset, which is not included here.
 The repository is private and access is restricted only to those who have been given explicit permission by the owner. Access is provided solely for the purpose of brief review or seeking technical guidance.
 ## Restrictions
 - **No Derivative Works or Cloning:** Any form of copying, cloning, or creating derivative works based on this repository is strictly prohibited.
 - **Limited Access:** Use beyond brief review or collaboration is not allowed without prior permission from the owner.
 ---
 All contents of this repository, including the thesis idea, code, and associated data, are copyrighted © 2024 by Rifqi Panuluh. Unauthorized use or duplication is prohibited.
 [LICENSE](https://github.com/nuluh/thesis?tab=License-1-ov-file#readme)
 ## How to Run `stft.ipynb`
 1. run `pip install -e .` in root project first
 2. run the notebook
--- a/code/notebooks/stft.ipynb
+++ b/code/notebooks/stft.ipynb
@@ -155,7 +155,7 @@
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.signal import stft, hann\n",
-    "from multiprocessing import Pool\n",
+    "# from multiprocessing import Pool\n",
    "\n",
    "# Function to compute and append STFT data\n",
    "def process_stft(args):\n",
@@ -321,9 +321,9 @@
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
-    "ready_data1 = []\n",
+    "ready_data1a = []\n",
    "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
-    "    ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n",
+    "    ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n",
    "# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
    "# Plotting the STFT Data"
   ]
@@ -334,8 +334,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ready_data1[0]\n",
+    "len(ready_data1a)\n",
-    "plt.pcolormesh(ready_data1[0])"
+    "# plt.pcolormesh(ready_data1[0])"
   ]
  },
  {
@@ -345,7 +345,7 @@
   "outputs": [],
   "source": [
    "for i in range(6):\n",
-    "    plt.pcolormesh(ready_data1[i])\n",
+    "    plt.pcolormesh(ready_data1a[i])\n",
    "    plt.title(f'STFT Magnitude for case {i} sensor 1')\n",
    "    plt.xlabel(f'Frequency [Hz]')\n",
    "    plt.ylabel(f'Time [sec]')\n",
@@ -358,9 +358,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ready_data2 = []\n",
+    "ready_data2a = []\n",
    "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
-    "    ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))"
+    "    ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))"
   ]
  },
  {
@@ -369,8 +369,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print(len(ready_data1))\n",
+    "print(len(ready_data1a))\n",
-    "print(len(ready_data2))"
+    "print(len(ready_data2a))"
   ]
  },
  {
@@ -379,10 +379,16 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "x1 = 0\n",
+    "x1a = 0\n",
-    "print(type(ready_data1[0]))\n",
+    "print(type(ready_data1a[0]))\n",
-    "ready_data1[0].iloc[:,0]\n",
+    "ready_data1a[0].iloc[:,0]"
-    "# x1 = x1 + ready_data1[0].shape[0]"
+   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Checking length of the total array"
   ]
  },
  {
@@ -391,16 +397,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "x1 = 0\n",
+    "x1a = 0\n",
-    "print(type(x1))\n",
+    "print(type(x1a))\n",
-    "for i in range(len(ready_data1)):\n",
+    "for i in range(len(ready_data1a)):\n",
-    "    # print(ready_data1[i].shape)\n",
+    "    print(type(ready_data1a[i].shape[0]))\n",
-    "    # print(ready_data1[i].)\n",
+    "    x1a = x1a + ready_data1a[i].shape[0]\n",
-    "    print(type(ready_data1[i].shape[0]))\n",
+    "    print(type(x1a))\n",
    "    x1 = x1 + ready_data1[i].shape[0]\n",
    "    print(type(x1))\n",
    "\n",
-    "print(x1)"
+    "print(x1a)"
   ]
  },
  {
@@ -409,13 +413,20 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "x2 = 0\n",
+    "x2a = 0\n",
    "\n",
-    "for i in range(len(ready_data2)):\n",
+    "for i in range(len(ready_data2a)):\n",
-    "    print(ready_data2[i].shape)\n",
+    "    print(ready_data2a[i].shape)\n",
-    "    x2 = x2 + ready_data2[i].shape[0]\n",
+    "    x2a = x2a + ready_data2a[i].shape[0]\n",
    "\n",
-    "print(x2)"
+    "print(x2a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Flatten 6 array into one array"
   ]
  },
  {
@@ -424,28 +435,22 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "x1 = ready_data1[0]\n",
+    "# Combine all dataframes in ready_data1a into a single dataframe\n",
-    "# print(x1)\n",
+    "if ready_data1a:  # Check if the list is not empty\n",
-    "print(type(x1))\n",
+    "    # Use pandas concat function instead of iterative concatenation\n",
-    "for i in range(len(ready_data1) - 1):\n",
+    "    combined_data = pd.concat(ready_data1a, axis=0, ignore_index=True)\n",
    "    #print(i)\n",
    "    x1 = np.concatenate((x1, ready_data1[i + 1]), axis=0)\n",
    "# print(x1)\n",
    "pd.DataFrame(x1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x2 = ready_data2[0]\n",
    "    \n",
-    "for i in range(len(ready_data2) - 1):\n",
+    "    print(f\"Type of combined data: {type(combined_data)}\")\n",
-    "    #print(i)\n",
+    "    print(f\"Shape of combined data: {combined_data.shape}\")\n",
-    "    x2 = np.concatenate((x2, ready_data2[i + 1]), axis=0)\n",
+    "    \n",
-    "pd.DataFrame(x2)"
+    "    # Display the combined dataframe\n",
    "    combined_data\n",
    "else:\n",
    "    print(\"No data available in ready_data1a list\")\n",
    "    combined_data = pd.DataFrame()\n",
    "\n",
    "# Store the result in x1a for compatibility with subsequent code\n",
    "x1a = combined_data"
   ]
  },
  {
@@ -454,20 +459,29 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print(x1.shape)\n",
+    "# Combine all dataframes in ready_data1a into a single dataframe\n",
-    "print(x2.shape)"
+    "if ready_data2a:  # Check if the list is not empty\n",
    "    # Use pandas concat function instead of iterative concatenation\n",
    "    combined_data = pd.concat(ready_data2a, axis=0, ignore_index=True)\n",
    "    \n",
    "    print(f\"Type of combined data: {type(combined_data)}\")\n",
    "    print(f\"Shape of combined data: {combined_data.shape}\")\n",
    "    \n",
    "    # Display the combined dataframe\n",
    "    combined_data\n",
    "else:\n",
    "    print(\"No data available in ready_data1a list\")\n",
    "    combined_data = pd.DataFrame()\n",
    "\n",
    "# Store the result in x1a for compatibility with subsequent code\n",
    "x2a = combined_data"
   ]
  },
  {
-   "cell_type": "code",
+   "cell_type": "markdown",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "y_1 = [1,1,1,1]\n",
+    "### Creating the label"
    "y_2 = [0,1,1,1]\n",
    "y_3 = [1,0,1,1]\n",
    "y_4 = [1,1,0,0]"
   ]
  },
  {
@@ -490,7 +504,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "y_data = [y_1, y_2, y_3, y_4, y_5, y_6]"
+    "y_data = [y_1, y_2, y_3, y_4, y_5, y_6]\n",
    "y_data"
   ]
  },
  {
@@ -500,7 +515,7 @@
   "outputs": [],
   "source": [
    "for i in range(len(y_data)):\n",
-    "    print(ready_data1[i].shape[0])"
+    "    print(ready_data1a[i].shape[0])"
   ]
  },
  {
@@ -509,9 +524,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "for i in range(len(y_data)):\n",
-    "    y_data[i] = [y_data[i]]*ready_data1[i].shape[0]\n",
+    "    y_data[i] = [y_data[i]]*ready_data1a[i].shape[0]"
    "    y_data[i] = np.array(y_data[i])"
   ]
  },
  {
@@ -520,6 +535,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
    "# len(y_data[0])\n",
    "y_data"
   ]
  },
@@ -552,10 +568,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from sklearn.model_selection import train_test_split\n",
+    "from src.ml.model_selection import create_ready_data\n",
    "\n",
-    "x_train1, x_test1, y_train, y_test = train_test_split(x1, y, test_size=0.2, random_state=2)\n",
+    "X1a, y = create_ready_data('D:/thesis/data/converted/raw/sensor1')\n",
-    "x_train2, x_test2, y_train, y_test = train_test_split(x2, y, test_size=0.2, random_state=2)"
+    "X2a, y = create_ready_data('D:/thesis/data/converted/raw/sensor2')"
   ]
  },
  {
@@ -565,6 +581,17 @@
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "x_train1, x_test1, y_train, y_test = train_test_split(X1a, y, test_size=0.2, random_state=2)\n",
    "x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
@@ -597,16 +624,17 @@
    "\n",
    "\n",
    "# 1. Random Forest\n",
-    "rf_model = RandomForestClassifier()\n",
+    "rf_model1 = RandomForestClassifier()\n",
-    "rf_model.fit(x_train1, y_train)\n",
+    "rf_model1.fit(x_train1, y_train)\n",
-    "rf_pred1 = rf_model.predict(x_test1)\n",
+    "rf_pred1 = rf_model1.predict(x_test1)\n",
    "acc1 = accuracy_score(y_test, rf_pred1) * 100\n",
    "accuracies1.append(acc1)\n",
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"Random Forest Accuracy for sensor 1:\", acc1)\n",
-    "rf_model.fit(x_train2, y_train)\n",
+    "rf_model2 = RandomForestClassifier()\n",
-    "rf_pred2 = rf_model.predict(x_test2)\n",
+    "rf_model2.fit(x_train2, y_train)\n",
    "rf_pred2 = rf_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, rf_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -616,16 +644,17 @@
    "# print(y_test)\n",
    "\n",
    "# 2. Bagged Trees\n",
-    "bagged_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n",
+    "bagged_model1 = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n",
-    "bagged_model.fit(x_train1, y_train)\n",
+    "bagged_model1.fit(x_train1, y_train)\n",
-    "bagged_pred1 = bagged_model.predict(x_test1)\n",
+    "bagged_pred1 = bagged_model1.predict(x_test1)\n",
    "acc1 = accuracy_score(y_test, bagged_pred1) * 100\n",
    "accuracies1.append(acc1)\n",
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"Bagged Trees Accuracy for sensor 1:\", acc1)\n",
-    "bagged_model.fit(x_train2, y_train)\n",
+    "bagged_model2 = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n",
-    "bagged_pred2 = bagged_model.predict(x_test2)\n",
+    "bagged_model2.fit(x_train2, y_train)\n",
    "bagged_pred2 = bagged_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, bagged_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -641,8 +670,9 @@
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"Decision Tree Accuracy for sensor 1:\", acc1)\n",
-    "dt_model.fit(x_train2, y_train)\n",
+    "dt_model2 = DecisionTreeClassifier()\n",
-    "dt_pred2 = dt_model.predict(x_test2)\n",
+    "dt_model2.fit(x_train2, y_train)\n",
    "dt_pred2 = dt_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, dt_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -658,8 +688,9 @@
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"KNeighbors Accuracy for sensor 1:\", acc1)\n",
-    "knn_model.fit(x_train2, y_train)\n",
+    "knn_model2 = KNeighborsClassifier()\n",
-    "knn_pred2 = knn_model.predict(x_test2)\n",
+    "knn_model2.fit(x_train2, y_train)\n",
    "knn_pred2 = knn_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, knn_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -675,8 +706,9 @@
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"Linear Discriminant Analysis Accuracy for sensor 1:\", acc1)\n",
-    "lda_model.fit(x_train2, y_train)\n",
+    "lda_model2 = LinearDiscriminantAnalysis()\n",
-    "lda_pred2 = lda_model.predict(x_test2)\n",
+    "lda_model2.fit(x_train2, y_train)\n",
    "lda_pred2 = lda_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, lda_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -692,8 +724,9 @@
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"Support Vector Machine Accuracy for sensor 1:\", acc1)\n",
-    "svm_model.fit(x_train2, y_train)\n",
+    "svm_model2 = SVC()\n",
-    "svm_pred2 = svm_model.predict(x_test2)\n",
+    "svm_model2.fit(x_train2, y_train)\n",
    "svm_pred2 = svm_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, svm_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -709,8 +742,9 @@
    "# format with color coded if acc1 > 90\n",
    "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
    "print(\"XGBoost Accuracy:\", acc1)\n",
-    "xgboost_model.fit(x_train2, y_train)\n",
+    "xgboost_model2 = XGBClassifier()\n",
-    "xgboost_pred2 = xgboost_model.predict(x_test2)\n",
+    "xgboost_model2.fit(x_train2, y_train)\n",
    "xgboost_pred2 = xgboost_model2.predict(x_test2)\n",
    "acc2 = accuracy_score(y_test, xgboost_pred2) * 100\n",
    "accuracies2.append(acc2)\n",
    "# format with color coded if acc2 > 90\n",
@@ -787,51 +821,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def spectograph(data_dir: str):\n",
+    "from src.ml.model_selection import create_ready_data\n",
    "    # print(os.listdir(data_dir))\n",
    "    for damage in os.listdir(data_dir):\n",
    "        # print(damage)\n",
    "        d = os.path.join(data_dir, damage)\n",
    "        # print(d)\n",
    "        for file in os.listdir(d):\n",
    "            # print(file)\n",
    "            f = os.path.join(d, file)\n",
    "            print(f)\n",
    "            # sensor1 = pd.read_csv(f, skiprows=1, sep=';')\n",
    "            # sensor2 = pd.read_csv(f, skiprows=1, sep=';')\n",
    "\n",
-    "            # df1 = pd.DataFrame()\n",
+    "X1b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor1')\n",
-    "\n",
+    "X2b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor2')"
    "            # df1['s1'] = sensor1[sensor1.columns[-1]]\n",
    "            # df1['s2'] = sensor2[sensor2.columns[-1]]\n",
    "ed\n",
    "            # # Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n",
    "\n",
    "            # plt.plot(df1['s2'], label='sensor 2')\n",
    "            # plt.plot(df1['s1'], label='sensor 1')\n",
    "            # plt.xlabel(\"Number of samples\")\n",
    "            # plt.ylabel(\"Amplitude\")\n",
    "            # plt.title(\"Raw vibration signal\")\n",
    "            # plt.legend()\n",
    "            # plt.show()\n",
    "\n",
    "            # from scipy import signal\n",
    "            # from scipy.signal.windows import hann\n",
    "\n",
    "            # vibration_data = df1['s1']\n",
    "\n",
    "            # # Applying STFT\n",
    "            # window_size = 1024\n",
    "            # hop_size = 512\n",
    "            # window = hann(window_size)  # Creating a Hanning window\n",
    "            # frequencies, times, Zxx = signal.stft(vibration_data, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
    "\n",
    "            # # Plotting the STFT Data\n",
    "            # plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n",
    "            # plt.title(f'STFT Magnitude for case 1 signal sensor 1 ')\n",
    "            # plt.ylabel('Frequency [Hz]')\n",
    "            # plt.xlabel('Time [sec]')\n",
    "            # plt.show()"
   ]
  },
  {
@@ -840,7 +833,115 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "spectograph('D:/thesis/data/converted/raw')"
+    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "# 4. Validate on Dataset B\n",
    "y_pred_svm = svm_model.predict(X1b)\n",
    "\n",
    "# 5. Evaluate\n",
    "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
    "print(classification_report(y, y_pred_svm))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "# 4. Validate on Dataset B\n",
    "y_pred = rf_model2.predict(X2b)\n",
    "\n",
    "# 5. Evaluate\n",
    "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred))\n",
    "print(classification_report(y, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predict = svm_model2.predict(X2b.iloc[[5312],:])\n",
    "print(y_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y[5312]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Confusion Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
    "\n",
    "\n",
    "cm = confusion_matrix(y, y_pred_svm) # -> ndarray\n",
    "\n",
    "# get the class labels\n",
    "labels = svm_model.classes_\n",
    "\n",
    "# Plot\n",
    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
    "disp.plot(cmap=plt.cm.Blues)  # You can change colormap\n",
    "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Self-test CM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Predict sensor 1 on Dataset A\n",
    "y_train_pred = svm_model.predict(x_train1)\n",
    "\n",
    "# 2. Import confusion matrix tools\n",
    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 3. Create and plot confusion matrix\n",
    "cm_train = confusion_matrix(y_train, y_train_pred)\n",
    "labels = svm_model.classes_\n",
    "\n",
    "disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n",
    "disp.plot(cmap=plt.cm.Blues)\n",
    "plt.title(\"Confusion Matrix: Train & Test on Dataset A\")\n",
    "plt.show()\n"
   ]
  }
 ],
--- a/code/src/ml/init.py
+++ b/code/src/ml/init.py
--- a/code/src/ml/model_selection.py
+++ b/code/src/ml/model_selection.py
@@ -0,0 +1,57 @@
 import numpy as np
 import pandas as pd
 import os
 from sklearn.model_selection import train_test_split as sklearn_split
 def create_ready_data(
    stft_data_path: str,
    stratify: np.ndarray = None,
 ) -> tuple:
    """
    Create a stratified train-test split from STFT data.
    Parameters:
    -----------
    stft_data_path : str
        Path to the directory containing STFT data files (e.g. 'data/converted/raw/sensor1')
    stratify : np.ndarray, optional
        Labels to use for stratified sampling
    Returns:
    --------
    tuple
        (X_train, X_test, y_train, y_test) - Split datasets
    """
    ready_data = []
    for file in os.listdir(stft_data_path):
        ready_data.append(pd.read_csv(os.path.join(stft_data_path, file)))
    y_data = [i for i in range(len(ready_data))]
    # Combine all dataframes in ready_data into a single dataframe
    if ready_data:  # Check if the list is not empty
        # Use pandas concat function instead of iterative concatenation
        combined_data = pd.concat(ready_data, axis=0, ignore_index=True)
        print(f"Type of combined data: {type(combined_data)}")
        print(f"Shape of combined data: {combined_data.shape}")
    else:
        print("No data available in ready_data list")
        combined_data = pd.DataFrame()
    # Store the result in x1a for compatibility with subsequent code
    X = combined_data
    for i in range(len(y_data)):
        y_data[i] = [y_data[i]] * ready_data[i].shape[0]
        y_data[i] = np.array(y_data[i])
    if y_data:
        # Use numpy concatenate function instead of iterative concatenation
        y = np.concatenate(y_data, axis=0)
    else:
        print("No labels available in y_data list")
        y = np.array([])
    return X, y
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,8 @@
 from setuptools import setup, find_packages
 setup(
    name="thesisrepo",
    version="0.1",
    packages=find_packages(where="code"),
    package_dir={"": "code"},
 )
Author	SHA1	Message	Date
panuluh	bb9dee10de	Revise README to simplify content and remove private visibility Removed sections on access restrictions and copyright details.	2026-01-29 12:14:35 +07:00
Rifqi D. Panuluh	93d720b676	Update latexdiff.yml	2025-06-04 15:11:21 +07:00
Rifqi D. Panuluh	52dccce7e2	Update latexdiff.yml	2025-06-04 15:00:49 +07:00
Rifqi D. Panuluh	bf0de65fb7	Update latexdiff.yml	2025-06-04 14:21:39 +07:00
Rifqi D. Panuluh	7ca70fbdc3	Update latexdiff.yml	2025-06-04 13:41:33 +07:00
Rifqi D. Panuluh	b35944ee3e	Update latexdiff.yml	2025-06-04 13:34:44 +07:00
Rifqi D. Panuluh	8f51963d0f	Merge pull request #93 from nuluh/revert-92-latex/91-bug-expose-maketitle Revert "Expose `maketitle` by just using `\input`"	2025-06-03 20:12:49 +07:00
Rifqi D. Panuluh	5c513e4629	Revert "Expose `maketitle` by just using `\input`"	2025-06-03 20:12:11 +07:00
Rifqi D. Panuluh	38ece73768	Merge pull request #92 from nuluh/latex/91-bug-expose-maketitle	2025-06-03 20:09:13 +07:00
nuluh	76a09c0219	refactor(documentclass): update title handling by using input files for maketitle Closes #91	2025-06-03 19:17:08 +07:00
nuluh	1a994fd59c	fix(documentclass): restore and customize English bibliography strings	2025-06-03 19:10:01 +07:00
nuluh	cdb3010b78	fix(documentclass): fix redefined bibliography strings error	2025-06-03 19:05:43 +07:00
Rifqi D. Panuluh	e5b9806462	Update latexdiff.yml	2025-06-03 18:09:18 +07:00
Rifqi D. Panuluh	8dbb448b32	Update latexdiff.yml	2025-06-03 18:00:43 +07:00
Rifqi D. Panuluh	033d949325	Update latexdiff.yml	2025-06-03 17:29:50 +07:00
Rifqi D. Panuluh	643c0ebce1	Update latexdiff.yml	2025-06-03 17:19:07 +07:00
Rifqi D. Panuluh	4851a9aa5d	Update latexdiff.yml	2025-06-03 17:05:30 +07:00
nuluh	8a3c1ae585	refactor(main): comment out unused input sections and update chapter includes	2025-06-03 16:37:15 +07:00
Rifqi D. Panuluh	fd765b113f	Update latex-lint.yml	2025-06-03 15:35:51 +07:00
Rifqi D. Panuluh	fe801b0a1c	Update latex-lint.yml	2025-06-03 15:16:16 +07:00
nuluh	7b934d3fba	fix(acknowledgement): fix file naming	2025-06-03 15:02:12 +07:00
Rifqi D. Panuluh	dbc62fea32	Update latex-lint.yml	2025-06-03 15:01:15 +07:00
Rifqi D. Panuluh	1ad235866e	Update latexdiff.yml	2025-06-03 14:44:52 +07:00
Rifqi D. Panuluh	05796d0165	Create latex-lint.yml	2025-06-03 14:42:29 +07:00
Rifqi D. Panuluh	f8e9ac93a0	Update latexdiff.yml fix path	2025-06-03 14:27:00 +07:00
Rifqi D. Panuluh	04546f8c35	Update latexdiff.yml ensures that all \include{} or \input{} paths (which are relative to main.tex) resolve correctly	2025-06-03 14:20:23 +07:00
Rifqi D. Panuluh	26450026bb	Update latexdiff.yml fix Alpine’s “externally‐managed‐environment” restriction by install flatex inside a virtual environment rather than system‐wide	2025-06-03 14:00:50 +07:00
Rifqi D. Panuluh	3a17cc1331	Update latexdiff.yml using a pre-built TeX Live Docker image to avoid reinstalling texlive-full every run	2025-06-03 13:42:35 +07:00
Rifqi D. Panuluh	e9f953f731	Create latexmk.yml	2025-06-03 13:26:38 +07:00
Rifqi D. Panuluh	2c5c78b83c	Create latexdiff.yml	2025-06-03 13:09:41 +07:00
nuluh	aaccad7ae8	feat(glossaries): wip	2025-06-01 16:47:32 +07:00
Rifqi D. Panuluh	2c453ec403	Merge pull request #89 from nuluh/feature/88-refactor-training-cell Closes #88	2025-05-29 23:04:24 +07:00
nuluh	7da3179d08	refactor(nb): Create and implement helper function `train_and_evaluate_model`	2025-05-29 22:57:28 +07:00
nuluh	254b24cb21	feat(viz): Update plotting for STFT data visualization with color map 'jet' and added color bar	2025-05-29 20:35:35 +07:00
Rifqi D. Panuluh	d151062115	Add Working Milestone with Initial Results and Model Inference (#82 ) * wip: add function to create stratified train-test split from STFT data * feat(src): implement working function for dataset B to create ready data from STFT files stft_files and add setup.py for package configuration * feat(notebook): Update variable names for clarity, remove unused imports, and streamline data processing. Implement data concatenation using pandas concat for efficiency. Add validation steps for Dataset B and improve model training consistency across sensors. * fix(.gitignore): add rule to ignore egg-info directories and ensure proper formatting * docs(README): add instructions for running stft.ipynb notebook * feat(notebook): Add evaluation metrics and confusion matrix visualizations for model predictions on Dataset B. Remove commented-out code and integrate data preparation using create_ready_data function. --------- Co-authored-by: nuluh <dam.ar@outlook.com>	2025-05-24 01:30:10 +07:00