Revert "Add Zero-Padding to CSV Filenames"

feat: Add launch.json for Python debugger configuration
This commit adds a new file, `.vscode/launch.json`, which contains the configuration for launching the Python debugger. The configuration includes the necessary attributes such as the debugger type, request type, program file, console type, and command-line arguments. This configuration allows developers to easily debug Python files in the integrated terminal.
2024-08-27 09:18:44 +07:00 · 2024-08-20 12:52:48 +07:00 · 2024-08-20 12:52:48 +07:00 · 2024-08-20 12:52:06 +07:00 · 2024-08-19 13:20:14 +07:00
6 changed files with 151 additions and 30 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python Debugger: Current File with Arguments",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal",
+      "args": ["data/raw", "data/raw"]
+    }
+  ]
+}
--- a/code/notebooks/03_feature_extraction.ipynb
+++ b/code/notebooks/03_feature_extraction.ipynb
@@ -25,7 +25,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -154,7 +154,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@@ -186,12 +186,12 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "### Print Time-domain Features"
+    "### Print Time-domain Features (Single Mockup Data)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@@ -264,7 +264,7 @@
       "0  2.067638  1.917716  0.412307  "
      ]
     },
-     "execution_count": 23,
+     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -272,10 +272,12 @@
   "source": [
    "import pandas as pd\n",
    "import sys\n",
+    "import os\n",
    "# Assuming the src directory is one level up from the notebooks directory\n",
    "sys.path.append('../src/features')\n",
    "from time_domain_features import FeatureExtractor\n",
    "\n",
+    "\n",
    "# Extract features\n",
    "extracted = FeatureExtractor(mock_df['SampleData'])\n",
    "\n",
@@ -283,6 +285,85 @@
    "features = pd.DataFrame(extracted.features, index=[0])\n",
    "features\n"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Print Time-domain Features (Multiple CSV Mockup Data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import sys\n",
+    "import os\n",
+    "# Assuming the src directory is one level up from the notebooks directory\n",
+    "sys.path.append('../src/features')\n",
+    "from time_domain_features import ExtractTimeFeatures # use wrapper function instead of class for easy use\n",
+    "\n",
+    "def build_features(input_dir):\n",
+    "    all_features = []\n",
+    "    for nth_damage in os.listdir(input_dir):\n",
+    "        nth_damage_path = os.path.join(input_dir, nth_damage)\n",
+    "        if os.path.isdir(nth_damage_path):\n",
+    "            # print(nth_damage)\n",
+    "            for nth_test in os.listdir(nth_damage_path):\n",
+    "                nth_test_path = os.path.join(nth_damage_path, nth_test)\n",
+    "                # print(nth_test_path)\n",
+    "                features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}\n",
+    "                all_features.append(features)\n",
+    "\n",
+    "    # Create a DataFrame from the list of dictionaries\n",
+    "    df = pd.DataFrame(all_features)\n",
+    "    return df\n",
+    "\n",
+    "data_dir = \"../../data/raw\"\n",
+    "# Extract features\n",
+    "df = build_features(data_dir)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 50 entries, 0 to 49\n",
+      "Data columns (total 14 columns):\n",
+      " #   Column              Non-Null Count  Dtype  \n",
+      "---  ------              --------------  -----  \n",
+      " 0   Mean                50 non-null     float64\n",
+      " 1   Max                 50 non-null     float64\n",
+      " 2   Peak (Pm)           50 non-null     float64\n",
+      " 3   Peak-to-Peak (Pk)   50 non-null     float64\n",
+      " 4   RMS                 50 non-null     float64\n",
+      " 5   Variance            50 non-null     float64\n",
+      " 6   Standard Deviation  50 non-null     float64\n",
+      " 7   Power               50 non-null     float64\n",
+      " 8   Crest Factor        50 non-null     float64\n",
+      " 9   Form Factor         50 non-null     float64\n",
+      " 10  Pulse Indicator     50 non-null     float64\n",
+      " 11  Margin              50 non-null     float64\n",
+      " 12  Kurtosis            50 non-null     float64\n",
+      " 13  Skewness            50 non-null     float64\n",
+      "dtypes: float64(14)\n",
+      "memory usage: 5.6 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
  }
 ],
 "metadata": {
--- a/code/src/features/build_features.py
+++ b/code/src/features/build_features.py
@@ -1,16 +1,39 @@
 # src/features/build_features.py
 import pandas as pd
-from time_domain_features import FeatureExtractor
-import numpy as np
+from time_domain_features import ExtractTimeFeatures
+import os
+import re

-def build_features(input_file, output_file):
-    data = pd.read_csv(input_file)
-    # Assuming the relevant data is in the first column
-    extractor = FeatureExtractor(data.iloc[:, 0].values)
-    features = extractor.features
+# define function, regex pattern for extracting the damage level and test number store in pairs array
+def extract_numbers(filename):
+    # Find all occurrences of one or more digits in the filename
+    numbers = re.findall(r'\d+', filename)
+    # Convert the list of number strings to integers
+    numbers = [int(num) for num in numbers]
+    # Convert to a tuple and return
+    return print(tuple(numbers))

+def build_features(input_dir, output_dir):
+    all_features = []
+    for nth_damage in os.listdir(input_dir):
+        nth_damage_path = os.path.join(input_dir, nth_damage)
+        if os.path.isdir(nth_damage_path):
+            print(nth_damage)
+            for nth_test in os.listdir(nth_damage_path):
+                nth_test_path = os.path.join(nth_damage_path, nth_test)
+                # print(nth_test_path)
+                features = ExtractTimeFeatures(nth_test_path) # return the one csv file feature in dictionary {}
+                all_features.append(features)
+
+    # Create a DataFrame from the list of dictionaries
+    df = pd.DataFrame(all_features)
+    print(df)
+    # Save the DataFrame to a CSV file in the output directory
+    output_file_path = os.path.join(output_dir, 'combined_features.csv')
+    df.to_csv(output_file_path, index=False)
+    print(f"Features saved to {output_file_path}")
    # Save features to a file
-    np.savez(output_file, **features)
+    # np.savez(output_file, **features)

 if __name__ == "__main__":
    import sys
@@ -18,4 +41,4 @@ if __name__ == "__main__":
    output_path = sys.argv[2]  # 'data/features/feature_matrix.npz'
    
    # Assuming only one file for simplicity; adapt as needed
-    build_features(f"{input_path}processed_data.csv", output_path)
+    build_features(input_path, output_path)
--- a/code/src/features/time_domain_features.py
+++ b/code/src/features/time_domain_features.py
@@ -36,6 +36,13 @@ class FeatureExtractor:
            result += f"{feature}: {value:.4f}\n"
        return result

+def ExtractTimeFeatures(object):
+    data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
+    extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
+    features = extractor.features
+    return features
+    # Save features to a file
+    # np.savez(output_file, **features)
 # Usage
 # Assume you have a CSV file with numerical data in the first column
 # Create an instance of the class and pass the path to your CSV file
--- a/data/processed/README.md
+++ b/data/processed/README.md
@@ -1,8 +1,8 @@
-# Raw Data Directory
+# Processed Data Directory

 ## Overview

-This `data/raw` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `raw` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.
+This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario.

 ## Directory Structure

@@ -12,12 +12,12 @@ The directory is organized as follows:
 data
 └── processed
 ├── DAMAGE_1
-│   ├── D1_TEST1.csv
-│   ├── D1_TEST2.csv
+│ ├── D1_TEST1.csv
+│ ├── D1_TEST2.csv
 │ ...
-│   └── D1_TEST10.csv
+│ └── D1_TEST10.csv
 ├── DAMAGE_2
-│   ├── D2_TEST1.csv
+│ ├── D2_TEST1.csv
 │ ...
 ├── DAMAGE_3
 │ ...
--- a/generate_dummy_data.py
+++ b/generate_dummy_data.py
@@ -13,20 +13,14 @@ processed_path = os.path.join(base_path, "processed")
 os.makedirs(raw_path, exist_ok=True)
 os.makedirs(processed_path, exist_ok=True)

-# Define the number of zeros to pad
-num_damages = 5
-num_tests = 10
-damage_pad = len(str(num_damages))
-test_pad = len(str(num_tests))
-
-for damage in range(1, num_damages + 1):  # 5 Damage levels starts from 1
-    damage_folder = f"DAMAGE_{damage:0{damage_pad}}"
-    damage_path = os.path.join(raw_path, damage_folder)
+for damage in range(1, 6):  # 5 Damage levels
+    damage_folder = f"DAMAGE_{damage}"
+    damage_path = os.path.join(processed_path, damage_folder)
    os.makedirs(damage_path, exist_ok=True)

    for test in range(1, 11):  # 10 Tests per damage level
        # Filename for the CSV
-        csv_filename = f"D{damage:0{damage_pad}}_TEST{test:0{test_pad}}.csv"
+        csv_filename = f"D{damage}_TEST{test}.csv"
        csv_path = os.path.join(damage_path, csv_filename)

        # Generate dummy data
Author	SHA1	Message	Date
Panuluh	88be76292b	Revert "Add Zero-Padding to CSV Filenames"	2024-08-27 09:18:44 +07:00
nuluh	de902b2a8c	feat: Add launch.json for Python debugger configuration This commit adds a new file, `.vscode/launch.json`, which contains the configuration for launching the Python debugger. The configuration includes the necessary attributes such as the debugger type, request type, program file, console type, and command-line arguments. This configuration allows developers to easily debug Python files in the integrated terminal.	2024-08-20 12:52:48 +07:00
nuluh	57c0e03a4f	docs(script): Update time-domain feature extraction to skip header row separator char info	2024-08-20 12:52:48 +07:00
nuluh	8ab934fe1c	feat(features): refactor feature extraction to handle multiple files and directories - Modify `build_features` function to support iterative processing across nested directories, enhancing the system's ability to handle larger datasets and varied input structures. - Replace direct usage of `FeatureExtractor` class with `ExtractTimeFeatures` function, which now acts as a wrapper to include this class, facilitating streamlined integration and maintenance of feature extraction processes. - Implement `extract_numbers` function using regex to parse filenames and extract numeric identifiers, used for labels when training with SVM - Switch output from `.npz` to `.csv` format in `build_features`, offering better compatibility with data analysis tools and readability. - Update documentation and comments within the code to reflect changes in functionality and usage of the new feature extraction setup. Closes #4	2024-08-20 12:52:06 +07:00
nuluh	55db5709a9	refactor(script): Add time-domain feature extraction functionality called `ExtractTimeFeatures` function returning features in {dictionary} that later called in `build_features.py`. This function will be called for each individual .`csv`. Each returning value later appended in `build_features.py`. This function approach rather than just assigning class ensure the flexibility and enhance maintainability.	2024-08-19 13:20:14 +07:00