Compare commits

...

16 Commits

Author SHA1 Message Date
nuluh
5b0b3dd4e5 feat(notebook): Add evaluation metrics and confusion matrix visualizations for model predictions on Dataset B. Remove commented-out code and integrate data preparation using create_ready_data function. 2025-04-24 16:13:50 +07:00
nuluh
00d1d55181 docs(README): add instructions for running stft.ipynb notebook 2025-04-24 10:23:31 +07:00
nuluh
8a166e8b11 fix(.gitignore): add rule to ignore egg-info directories and ensure proper formatting 2025-04-24 10:21:28 +07:00
nuluh
eb62c7e614 feat(notebook): Update variable names for clarity, remove unused imports, and streamline data processing. Implement data concatenation using pandas concat for efficiency. Add validation steps for Dataset B and improve model training consistency across sensors. 2025-04-24 10:21:07 +07:00
nuluh
cba4a00cd8 feat(src): implement working function for dataset B to create ready data from STFT files stft_files and add setup.py for package configuration 2025-04-24 09:32:22 +07:00
nuluh
90a5a76609 wip: add function to create stratified train-test split from STFT data 2025-04-23 12:48:15 +07:00
nuluh
c8509aa728 fix(notebooks): fix out of index stft plotting iteration 2025-04-22 10:55:34 +07:00
nuluh
4ebfb52635 Merge branch '40-feat-add-export-to-csv-method-for-dataprocessor-in-convertpy' 2025-04-21 00:16:39 +07:00
nuluh
cb380219f9 test(notebooks): update file paths for sensor data loading and add markdown for clarity 2025-04-21 00:07:06 +07:00
nuluh
804c178175 fix(notebooks): remove erroneous line and add markdown for testing outside training data 2025-04-20 16:32:31 +07:00
nuluh
1511012e11 refactor(test): update test script to generate damage files index for dataset_B and adjust export path for processed data 2025-04-20 16:02:16 +07:00
nuluh
db2947abdf fix(data): fix the incorrect output of scipy.stft() data to be pandas.DataFrame shaped (513,513) along with its frequencies as the index and times as the columns (transposed) instead of just the magnitude that being flattened out; add checks for empty data and correct file paths for sensor data loading.
Closes #43
2025-04-20 14:45:38 +07:00
nuluh
36b36c41ba feat(data): add export_to_csv method for saving processed data into individuals sensor end and update test script
Closes #40
2025-04-17 10:10:19 +07:00
Rifqi D. Panuluh
28681017ad Merge pull request #39 from nuluh/feature/38-feat-redesign-convertpy
Feature/38 feat redesign `convert.py`
2025-03-22 19:57:20 +07:00
nuluh
ff64f3a3ab refactor(data): update type annotations for damage files index and related classes. Need better implementation 2025-03-22 19:48:50 +07:00
nuluh
58a316d9c8 feat(data): implement damage files index generation and data processing
Closes #38
2025-03-21 15:58:50 +07:00
9 changed files with 667 additions and 168 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
data/**/*.csv data/**/*.csv
.venv/ .venv/
*.pyc *.pyc
*.egg-info/

View File

@@ -1,3 +1,4 @@
{ {
"python.analysis.extraPaths": ["./code/src/features"] "python.analysis.extraPaths": ["./code/src/features"],
"jupyter.notebookFileRoot": "${workspaceFolder}/code"
} }

View File

@@ -16,3 +16,8 @@ The repository is private and access is restricted only to those who have been g
All contents of this repository, including the thesis idea, code, and associated data, are copyrighted © 2024 by Rifqi Panuluh. Unauthorized use or duplication is prohibited. All contents of this repository, including the thesis idea, code, and associated data, are copyrighted © 2024 by Rifqi Panuluh. Unauthorized use or duplication is prohibited.
[LICENSE](https://github.com/nuluh/thesis?tab=License-1-ov-file#readme) [LICENSE](https://github.com/nuluh/thesis?tab=License-1-ov-file#readme)
## How to Run `stft.ipynb`
1. run `pip install -e .` in root project first
2. run the notebook

View File

@@ -121,8 +121,9 @@
"signal_sensor2_test1 = []\n", "signal_sensor2_test1 = []\n",
"\n", "\n",
"for data in df:\n", "for data in df:\n",
" signal_sensor1_test1.append(data['sensor 1'].values)\n", " if not data.empty and 'sensor 1' in data.columns and 'sensor 2' in data.columns:\n",
" signal_sensor2_test1.append(data['sensor 2'].values)\n", " signal_sensor1_test1.append(data['sensor 1'].values)\n",
" signal_sensor2_test1.append(data['sensor 2'].values)\n",
"\n", "\n",
"print(len(signal_sensor1_test1))\n", "print(len(signal_sensor1_test1))\n",
"print(len(signal_sensor2_test1))" "print(len(signal_sensor2_test1))"
@@ -154,9 +155,7 @@
"import pandas as pd\n", "import pandas as pd\n",
"import numpy as np\n", "import numpy as np\n",
"from scipy.signal import stft, hann\n", "from scipy.signal import stft, hann\n",
"from multiprocessing import Pool\n", "# from multiprocessing import Pool\n",
"\n",
"\n",
"\n", "\n",
"# Function to compute and append STFT data\n", "# Function to compute and append STFT data\n",
"def process_stft(args):\n", "def process_stft(args):\n",
@@ -199,23 +198,22 @@
" # Compute STFT\n", " # Compute STFT\n",
" frequencies, times, Zxx = stft(sensor_data, fs=Fs, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n", " frequencies, times, Zxx = stft(sensor_data, fs=Fs, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
" magnitude = np.abs(Zxx)\n", " magnitude = np.abs(Zxx)\n",
" flattened_stft = magnitude.flatten()\n", " df_stft = pd.DataFrame(magnitude, index=frequencies, columns=times).T\n",
" df_stft.columns = [f\"Freq_{i}\" for i in frequencies]\n",
" \n", " \n",
" # Define the output CSV file path\n", " # Define the output CSV file path\n",
" stft_file_name = f'stft_data{sensor_num}_{damage_num}.csv'\n", " stft_file_name = f'stft_data{sensor_num}_{damage_num}.csv'\n",
" sensor_output_dir = os.path.join(damage_base_path, sensor_name.lower())\n", " sensor_output_dir = os.path.join(damage_base_path, sensor_name.lower())\n",
" os.makedirs(sensor_output_dir, exist_ok=True)\n", " os.makedirs(sensor_output_dir, exist_ok=True)\n",
" stft_file_path = os.path.join(sensor_output_dir, stft_file_name)\n", " stft_file_path = os.path.join(sensor_output_dir, stft_file_name)\n",
" print(stft_file_path)\n",
" # Append the flattened STFT to the CSV\n", " # Append the flattened STFT to the CSV\n",
" try:\n", " try:\n",
" flattened_stft_df = pd.DataFrame([flattened_stft])\n",
" if not os.path.isfile(stft_file_path):\n", " if not os.path.isfile(stft_file_path):\n",
" # Create a new CSV\n", " # Create a new CSV\n",
" flattened_stft_df.to_csv(stft_file_path, index=False, header=False)\n", " df_stft.to_csv(stft_file_path, index=False, header=False)\n",
" else:\n", " else:\n",
" # Append to existing CSV\n", " # Append to existing CSV\n",
" flattened_stft_df.to_csv(stft_file_path, mode='a', index=False, header=False)\n", " df_stft.to_csv(stft_file_path, mode='a', index=False, header=False)\n",
" print(f\"Appended STFT data to {stft_file_path}\")\n", " print(f\"Appended STFT data to {stft_file_path}\")\n",
" except Exception as e:\n", " except Exception as e:\n",
" print(f\"Error writing to {stft_file_path}: {e}\")" " print(f\"Error writing to {stft_file_path}: {e}\")"
@@ -295,7 +293,7 @@
"\n", "\n",
"# get current y ticks in list\n", "# get current y ticks in list\n",
"print(len(frequencies))\n", "print(len(frequencies))\n",
"print(len(times))\n" "print(len(times))"
] ]
}, },
{ {
@@ -323,10 +321,9 @@
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"ready_data1 = []\n", "ready_data1a = []\n",
"for file in os.listdir('D:/thesis/data/working/sensor1'):\n", "for file in os.listdir('D:/thesis/data/converted/raw/sensor1'):\n",
" ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor1', file)))\n", " ready_data1a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor1', file)))\n",
"# ready_data1[1]\n",
"# colormesh give title x is frequency and y is time and rotate/transpose the data\n", "# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
"# Plotting the STFT Data" "# Plotting the STFT Data"
] ]
@@ -337,8 +334,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"ready_data1[1]\n", "len(ready_data1a)\n",
"plt.pcolormesh(ready_data1[1])" "# plt.pcolormesh(ready_data1[0])"
] ]
}, },
{ {
@@ -348,7 +345,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"for i in range(6):\n", "for i in range(6):\n",
" plt.pcolormesh(ready_data1[i])\n", " plt.pcolormesh(ready_data1a[i])\n",
" plt.title(f'STFT Magnitude for case {i} sensor 1')\n", " plt.title(f'STFT Magnitude for case {i} sensor 1')\n",
" plt.xlabel(f'Frequency [Hz]')\n", " plt.xlabel(f'Frequency [Hz]')\n",
" plt.ylabel(f'Time [sec]')\n", " plt.ylabel(f'Time [sec]')\n",
@@ -361,10 +358,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"ready_data2 = []\n", "ready_data2a = []\n",
"for file in os.listdir('D:/thesis/data/working/sensor2'):\n", "for file in os.listdir('D:/thesis/data/converted/raw/sensor2'):\n",
" ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor2', file)))\n", " ready_data2a.append(pd.read_csv(os.path.join('D:/thesis/data/converted/raw/sensor2', file)))"
"ready_data2[5]"
] ]
}, },
{ {
@@ -373,8 +369,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"print(len(ready_data1))\n", "print(len(ready_data1a))\n",
"print(len(ready_data2))" "print(len(ready_data2a))"
] ]
}, },
{ {
@@ -383,28 +379,78 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"x1 = 0\n", "x1a = 0\n",
"print(type(ready_data1a[0]))\n",
"ready_data1a[0].iloc[:,0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Checking length of the total array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x1a = 0\n",
"print(type(x1a))\n",
"for i in range(len(ready_data1a)):\n",
" print(type(ready_data1a[i].shape[0]))\n",
" x1a = x1a + ready_data1a[i].shape[0]\n",
" print(type(x1a))\n",
"\n", "\n",
"for i in range(len(ready_data1)):\n", "print(x1a)"
" print(ready_data1[i].shape)\n", ]
" x1 = x1 + ready_data1[i].shape[0]\n", },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x2a = 0\n",
"\n", "\n",
"print(x1)" "for i in range(len(ready_data2a)):\n",
] " print(ready_data2a[i].shape)\n",
}, " x2a = x2a + ready_data2a[i].shape[0]\n",
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x2 = 0\n",
"\n", "\n",
"for i in range(len(ready_data2)):\n", "print(x2a)"
" print(ready_data2[i].shape)\n", ]
" x2 = x2 + ready_data2[i].shape[0]\n", },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flatten 6 array into one array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine all dataframes in ready_data1a into a single dataframe\n",
"if ready_data1a: # Check if the list is not empty\n",
" # Use pandas concat function instead of iterative concatenation\n",
" combined_data = pd.concat(ready_data1a, axis=0, ignore_index=True)\n",
" \n",
" print(f\"Type of combined data: {type(combined_data)}\")\n",
" print(f\"Shape of combined data: {combined_data.shape}\")\n",
" \n",
" # Display the combined dataframe\n",
" combined_data\n",
"else:\n",
" print(\"No data available in ready_data1a list\")\n",
" combined_data = pd.DataFrame()\n",
"\n", "\n",
"print(x2)" "# Store the result in x1a for compatibility with subsequent code\n",
"x1a = combined_data"
] ]
}, },
{ {
@@ -413,50 +459,29 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"x1 = ready_data1[0]\n", "# Combine all dataframes in ready_data1a into a single dataframe\n",
"# print(x1)\n", "if ready_data2a: # Check if the list is not empty\n",
"print(type(x1))\n", " # Use pandas concat function instead of iterative concatenation\n",
"for i in range(len(ready_data1) - 1):\n", " combined_data = pd.concat(ready_data2a, axis=0, ignore_index=True)\n",
" #print(i)\n", " \n",
" x1 = np.concatenate((x1, ready_data1[i + 1]), axis=0)\n", " print(f\"Type of combined data: {type(combined_data)}\")\n",
"# print(x1)\n", " print(f\"Shape of combined data: {combined_data.shape}\")\n",
"pd.DataFrame(x1)" " \n",
] " # Display the combined dataframe\n",
}, " combined_data\n",
{ "else:\n",
"cell_type": "code", " print(\"No data available in ready_data1a list\")\n",
"execution_count": null, " combined_data = pd.DataFrame()\n",
"metadata": {},
"outputs": [],
"source": [
"x2 = ready_data2[0]\n",
"\n", "\n",
"for i in range(len(ready_data2) - 1):\n", "# Store the result in x1a for compatibility with subsequent code\n",
" #print(i)\n", "x2a = combined_data"
" x2 = np.concatenate((x2, ready_data2[i + 1]), axis=0)\n",
"pd.DataFrame(x2)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"print(x1.shape)\n", "### Creating the label"
"print(x2.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_1 = [1,1,1,1]\n",
"y_2 = [0,1,1,1]\n",
"y_3 = [1,0,1,1]\n",
"y_4 = [1,1,0,0]"
] ]
}, },
{ {
@@ -479,7 +504,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"y_data = [y_1, y_2, y_3, y_4, y_5, y_6]" "y_data = [y_1, y_2, y_3, y_4, y_5, y_6]\n",
"y_data"
] ]
}, },
{ {
@@ -489,7 +515,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"for i in range(len(y_data)):\n", "for i in range(len(y_data)):\n",
" print(ready_data1[i].shape[0])" " print(ready_data1a[i].shape[0])"
] ]
}, },
{ {
@@ -498,9 +524,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import numpy as np\n",
"for i in range(len(y_data)):\n", "for i in range(len(y_data)):\n",
" y_data[i] = [y_data[i]]*ready_data1[i].shape[0]\n", " y_data[i] = [y_data[i]]*ready_data1a[i].shape[0]"
" y_data[i] = np.array(y_data[i])"
] ]
}, },
{ {
@@ -509,6 +535,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# len(y_data[0])\n",
"y_data" "y_data"
] ]
}, },
@@ -541,10 +568,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.model_selection import train_test_split\n", "from src.ml.model_selection import create_ready_data\n",
"\n", "\n",
"x_train1, x_test1, y_train, y_test = train_test_split(x1, y, test_size=0.2, random_state=2)\n", "X1a, y = create_ready_data('D:/thesis/data/converted/raw/sensor1')\n",
"x_train2, x_test2, y_train, y_test = train_test_split(x2, y, test_size=0.2, random_state=2)" "X2a, y = create_ready_data('D:/thesis/data/converted/raw/sensor2')"
] ]
}, },
{ {
@@ -554,6 +581,17 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import train_test_split\n",
"\n",
"x_train1, x_test1, y_train, y_test = train_test_split(X1a, y, test_size=0.2, random_state=2)\n",
"x_train2, x_test2, y_train, y_test = train_test_split(X2a, y, test_size=0.2, random_state=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import accuracy_score\n",
"from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n", "from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n",
@@ -586,16 +624,17 @@
"\n", "\n",
"\n", "\n",
"# 1. Random Forest\n", "# 1. Random Forest\n",
"rf_model = RandomForestClassifier()\n", "rf_model1 = RandomForestClassifier()\n",
"rf_model.fit(x_train1, y_train)\n", "rf_model1.fit(x_train1, y_train)\n",
"rf_pred1 = rf_model.predict(x_test1)\n", "rf_pred1 = rf_model1.predict(x_test1)\n",
"acc1 = accuracy_score(y_test, rf_pred1) * 100\n", "acc1 = accuracy_score(y_test, rf_pred1) * 100\n",
"accuracies1.append(acc1)\n", "accuracies1.append(acc1)\n",
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"Random Forest Accuracy for sensor 1:\", acc1)\n", "print(\"Random Forest Accuracy for sensor 1:\", acc1)\n",
"rf_model.fit(x_train2, y_train)\n", "rf_model2 = RandomForestClassifier()\n",
"rf_pred2 = rf_model.predict(x_test2)\n", "rf_model2.fit(x_train2, y_train)\n",
"rf_pred2 = rf_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, rf_pred2) * 100\n", "acc2 = accuracy_score(y_test, rf_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -605,16 +644,17 @@
"# print(y_test)\n", "# print(y_test)\n",
"\n", "\n",
"# 2. Bagged Trees\n", "# 2. Bagged Trees\n",
"bagged_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n", "bagged_model1 = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n",
"bagged_model.fit(x_train1, y_train)\n", "bagged_model1.fit(x_train1, y_train)\n",
"bagged_pred1 = bagged_model.predict(x_test1)\n", "bagged_pred1 = bagged_model1.predict(x_test1)\n",
"acc1 = accuracy_score(y_test, bagged_pred1) * 100\n", "acc1 = accuracy_score(y_test, bagged_pred1) * 100\n",
"accuracies1.append(acc1)\n", "accuracies1.append(acc1)\n",
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"Bagged Trees Accuracy for sensor 1:\", acc1)\n", "print(\"Bagged Trees Accuracy for sensor 1:\", acc1)\n",
"bagged_model.fit(x_train2, y_train)\n", "bagged_model2 = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n",
"bagged_pred2 = bagged_model.predict(x_test2)\n", "bagged_model2.fit(x_train2, y_train)\n",
"bagged_pred2 = bagged_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, bagged_pred2) * 100\n", "acc2 = accuracy_score(y_test, bagged_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -630,8 +670,9 @@
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"Decision Tree Accuracy for sensor 1:\", acc1)\n", "print(\"Decision Tree Accuracy for sensor 1:\", acc1)\n",
"dt_model.fit(x_train2, y_train)\n", "dt_model2 = DecisionTreeClassifier()\n",
"dt_pred2 = dt_model.predict(x_test2)\n", "dt_model2.fit(x_train2, y_train)\n",
"dt_pred2 = dt_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, dt_pred2) * 100\n", "acc2 = accuracy_score(y_test, dt_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -647,8 +688,9 @@
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"KNeighbors Accuracy for sensor 1:\", acc1)\n", "print(\"KNeighbors Accuracy for sensor 1:\", acc1)\n",
"knn_model.fit(x_train2, y_train)\n", "knn_model2 = KNeighborsClassifier()\n",
"knn_pred2 = knn_model.predict(x_test2)\n", "knn_model2.fit(x_train2, y_train)\n",
"knn_pred2 = knn_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, knn_pred2) * 100\n", "acc2 = accuracy_score(y_test, knn_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -664,8 +706,9 @@
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"Linear Discriminant Analysis Accuracy for sensor 1:\", acc1)\n", "print(\"Linear Discriminant Analysis Accuracy for sensor 1:\", acc1)\n",
"lda_model.fit(x_train2, y_train)\n", "lda_model2 = LinearDiscriminantAnalysis()\n",
"lda_pred2 = lda_model.predict(x_test2)\n", "lda_model2.fit(x_train2, y_train)\n",
"lda_pred2 = lda_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, lda_pred2) * 100\n", "acc2 = accuracy_score(y_test, lda_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -681,8 +724,9 @@
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"Support Vector Machine Accuracy for sensor 1:\", acc1)\n", "print(\"Support Vector Machine Accuracy for sensor 1:\", acc1)\n",
"svm_model.fit(x_train2, y_train)\n", "svm_model2 = SVC()\n",
"svm_pred2 = svm_model.predict(x_test2)\n", "svm_model2.fit(x_train2, y_train)\n",
"svm_pred2 = svm_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, svm_pred2) * 100\n", "acc2 = accuracy_score(y_test, svm_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -698,8 +742,9 @@
"# format with color coded if acc1 > 90\n", "# format with color coded if acc1 > 90\n",
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n", "acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
"print(\"XGBoost Accuracy:\", acc1)\n", "print(\"XGBoost Accuracy:\", acc1)\n",
"xgboost_model.fit(x_train2, y_train)\n", "xgboost_model2 = XGBClassifier()\n",
"xgboost_pred2 = xgboost_model.predict(x_test2)\n", "xgboost_model2.fit(x_train2, y_train)\n",
"xgboost_pred2 = xgboost_model2.predict(x_test2)\n",
"acc2 = accuracy_score(y_test, xgboost_pred2) * 100\n", "acc2 = accuracy_score(y_test, xgboost_pred2) * 100\n",
"accuracies2.append(acc2)\n", "accuracies2.append(acc2)\n",
"# format with color coded if acc2 > 90\n", "# format with color coded if acc2 > 90\n",
@@ -776,51 +821,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def spectograph(data_dir: str):\n", "from src.ml.model_selection import create_ready_data\n",
" # print(os.listdir(data_dir))\n",
" for damage in os.listdir(data_dir):\n",
" # print(damage)\n",
" d = os.path.join(data_dir, damage)\n",
" # print(d)\n",
" for file in os.listdir(d):\n",
" # print(file)\n",
" f = os.path.join(d, file)\n",
" print(f)\n",
" # sensor1 = pd.read_csv(f, skiprows=1, sep=';')\n",
" # sensor2 = pd.read_csv(f, skiprows=1, sep=';')\n",
"\n", "\n",
" # df1 = pd.DataFrame()\n", "X1b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor1')\n",
"\n", "X2b, y = create_ready_data('D:/thesis/data/converted/raw_B/sensor2')"
" # df1['s1'] = sensor1[sensor1.columns[-1]]\n",
" # df1['s2'] = sensor2[sensor2.columns[-1]]\n",
"ed\n",
" # # Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n",
"\n",
" # plt.plot(df1['s2'], label='sensor 2')\n",
" # plt.plot(df1['s1'], label='sensor 1')\n",
" # plt.xlabel(\"Number of samples\")\n",
" # plt.ylabel(\"Amplitude\")\n",
" # plt.title(\"Raw vibration signal\")\n",
" # plt.legend()\n",
" # plt.show()\n",
"\n",
" # from scipy import signal\n",
" # from scipy.signal.windows import hann\n",
"\n",
" # vibration_data = df1['s1']\n",
"\n",
" # # Applying STFT\n",
" # window_size = 1024\n",
" # hop_size = 512\n",
" # window = hann(window_size) # Creating a Hanning window\n",
" # frequencies, times, Zxx = signal.stft(vibration_data, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
"\n",
" # # Plotting the STFT Data\n",
" # plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n",
" # plt.title(f'STFT Magnitude for case 1 signal sensor 1 ')\n",
" # plt.ylabel('Frequency [Hz]')\n",
" # plt.xlabel('Time [sec]')\n",
" # plt.show()"
] ]
}, },
{ {
@@ -829,7 +833,115 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"spectograph('D:/thesis/data/converted/raw')" "y.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score, classification_report\n",
"# 4. Validate on Dataset B\n",
"y_pred_svm = svm_model.predict(X1b)\n",
"\n",
"# 5. Evaluate\n",
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
"print(classification_report(y, y_pred_svm))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score, classification_report\n",
"# 4. Validate on Dataset B\n",
"y_pred = rf_model2.predict(X2b)\n",
"\n",
"# 5. Evaluate\n",
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred))\n",
"print(classification_report(y, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_predict = svm_model2.predict(X2b.iloc[[5312],:])\n",
"print(y_predict)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y[5312]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Confusion Matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
"\n",
"\n",
"cm = confusion_matrix(y, y_pred_svm) # -> ndarray\n",
"\n",
"# get the class labels\n",
"labels = svm_model.classes_\n",
"\n",
"# Plot\n",
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
"disp.plot(cmap=plt.cm.Blues) # You can change colormap\n",
"plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Self-test CM"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. Predict sensor 1 on Dataset A\n",
"y_train_pred = svm_model.predict(x_train1)\n",
"\n",
"# 2. Import confusion matrix tools\n",
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# 3. Create and plot confusion matrix\n",
"cm_train = confusion_matrix(y_train, y_train_pred)\n",
"labels = svm_model.classes_\n",
"\n",
"disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n",
"disp.plot(cmap=plt.cm.Blues)\n",
"plt.title(\"Confusion Matrix: Train & Test on Dataset A\")\n",
"plt.show()\n"
] ]
} }
], ],

0
code/src/ml/__init__.py Normal file
View File

View File

@@ -0,0 +1,57 @@
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split as sklearn_split
def create_ready_data(
stft_data_path: str,
stratify: np.ndarray = None,
) -> tuple:
"""
Create a stratified train-test split from STFT data.
Parameters:
-----------
stft_data_path : str
Path to the directory containing STFT data files (e.g. 'data/converted/raw/sensor1')
stratify : np.ndarray, optional
Labels to use for stratified sampling
Returns:
--------
tuple
(X_train, X_test, y_train, y_test) - Split datasets
"""
ready_data = []
for file in os.listdir(stft_data_path):
ready_data.append(pd.read_csv(os.path.join(stft_data_path, file)))
y_data = [i for i in range(len(ready_data))]
# Combine all dataframes in ready_data into a single dataframe
if ready_data: # Check if the list is not empty
# Use pandas concat function instead of iterative concatenation
combined_data = pd.concat(ready_data, axis=0, ignore_index=True)
print(f"Type of combined data: {type(combined_data)}")
print(f"Shape of combined data: {combined_data.shape}")
else:
print("No data available in ready_data list")
combined_data = pd.DataFrame()
# Store the result in x1a for compatibility with subsequent code
X = combined_data
for i in range(len(y_data)):
y_data[i] = [y_data[i]] * ready_data[i].shape[0]
y_data[i] = np.array(y_data[i])
if y_data:
# Use numpy concatenate function instead of iterative concatenation
y = np.concatenate(y_data, axis=0)
else:
print("No labels available in y_data list")
y = np.array([])
return X, y

View File

@@ -1,25 +1,307 @@
import pandas as pd import pandas as pd
import os import os
import re
import sys import sys
import numpy as np
from colorama import Fore, Style, init from colorama import Fore, Style, init
from typing import TypedDict, Dict, List
from joblib import load
from pprint import pprint
# class DamageFilesIndices(TypedDict):
# damage_index: int
# files: list[int]
OriginalSingleDamageScenarioFilePath = str
DamageScenarioGroupIndex = int
OriginalSingleDamageScenario = pd.DataFrame
SensorIndex = int
VectorColumnIndex = List[SensorIndex]
VectorColumnIndices = List[VectorColumnIndex]
DamageScenarioGroup = List[OriginalSingleDamageScenario]
GroupDataset = List[DamageScenarioGroup]
class DamageFilesIndices(TypedDict):
damage_index: int
files: List[str]
def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
prefix: str = kwargs.get("prefix", "zzzAD")
extension: str = kwargs.get("extension", ".TXT")
num_damage: int = kwargs.get("num_damage")
file_index_start: int = kwargs.get("file_index_start")
col: int = kwargs.get("col")
base_path: str = kwargs.get("base_path")
damage_scenarios = {}
a = file_index_start
b = col + 1
for i in range(1, num_damage + 1):
damage_scenarios[i] = range(a, b)
a += col
b += col
# return damage_scenarios
x = {}
for damage, files in damage_scenarios.items():
x[damage] = [] # Initialize each key with an empty list
for i, file_index in enumerate(files, start=1):
if base_path:
x[damage].append(
os.path.normpath(
os.path.join(base_path, f"{prefix}{file_index}{extension}")
)
)
# if not os.path.exists(file_path):
# print(Fore.RED + f"File {file_path} does not exist.")
# continue
else:
x[damage].append(f"{prefix}{file_index}{extension}")
return x
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv( file_path, sep="\t", skiprows=10) # Read with explicit column names
class DataProcessor:
def __init__(self, file_index: DamageFilesIndices, cache_path: str = None):
self.file_index = file_index
if cache_path:
self.data = load(cache_path)
else:
self.data = self._load_all_data()
def _extract_column_names(self, file_path: str) -> List[str]:
"""
Extracts column names from the header of the given file.
Assumes the 6th line contains column names.
:param file_path: Path to the data file.
:return: List of column names.
"""
with open(file_path, "r") as f:
header_lines = [next(f) for _ in range(12)]
# Extract column names from the 6th line
channel_line = header_lines[10].strip()
tokens = re.findall(r'"([^"]+)"', channel_line)
if not channel_line.startswith('"'):
first_token = channel_line.split()[0]
tokens = [first_token] + tokens
return tokens # Prepend 'Time' column if applicable
def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
"""
Loads a single data file into a pandas DataFrame.
:param file_path: Path to the data file.
:return: DataFrame containing the numerical data.
"""
col_names = self._extract_column_names(file_path)
df = pd.read_csv(
file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
)
df.columns = col_names
return df
def _load_all_data(self) -> GroupDataset:
"""
Loads all data files based on the grouping dictionary and returns a nested list.
:return: A nested list of DataFrames where the outer index corresponds to group_idx - 1.
"""
data = []
# Find the maximum group index to determine the list size
max_group_idx = max(self.file_index.keys()) if self.file_index else 0
# Initialize empty lists
for _ in range(max_group_idx):
data.append([])
# Fill the list with data
for group_idx, file_list in self.file_index.items():
# Adjust index to be 0-based
list_idx = group_idx - 1
data[list_idx] = [self._load_dataframe(file) for file in file_list]
return data
def get_group_data(self, group_idx: int) -> List[pd.DataFrame]:
"""
Returns the list of DataFrames for the given group index.
:param group_idx: Index of the group.
:return: List of DataFrames.
"""
return self.data.get([group_idx, []])
def get_column_names(self, group_idx: int, file_idx: int = 0) -> List[str]:
"""
Returns the column names for the given group and file indices.
:param group_idx: Index of the group.
:param file_idx: Index of the file in the group.
:return: List of column names.
"""
if group_idx in self.data and len(self.data[group_idx]) > file_idx:
return self.data[group_idx][file_idx].columns.tolist()
return []
def get_data_info(self):
"""
Print information about the loaded data structure.
Adapted for when self.data is a List instead of a Dictionary.
"""
if isinstance(self.data, list):
# For each sublist in self.data, get the type names of all elements
pprint(
[
(
[type(item).__name__ for item in sublist]
if isinstance(sublist, list)
else type(sublist).__name__
)
for sublist in self.data
]
)
else:
pprint(
{
key: [type(df).__name__ for df in value]
for key, value in self.data.items()
}
if isinstance(self.data, dict)
else type(self.data).__name__
)
def _create_vector_column_index(self) -> VectorColumnIndices:
vector_col_idx: VectorColumnIndices = []
y = 0
for data_group in self.data: # len(data_group[i]) = 5
for j in data_group: # len(j[i]) =
c: VectorColumnIndex = [] # column vector c_{j}
x = 0
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
c.append(x + y)
x += 5
vector_col_idx.append(c)
y += 1
return vector_col_idx
def create_vector_column(self, overwrite=True) -> List[List[List[pd.DataFrame]]]:
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)):
for j in range(len(self.data[i])):
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specified 'Real' columns
# First, we add 1 to all indices to account for 'Time' being at position 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjusted Real indices
all_indices = [0] + real_indices
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def create_limited_sensor_vector_column(self, overwrite=True):
"""
Create a vector column from the loaded data.
:param overwrite: Overwrite the original data with vector column-based data.
"""
idx = self._create_vector_column_index()
# if overwrite:
for i in range(len(self.data)): # damage(s)
for j in range(len(self.data[i])): # col(s)
# Get the appropriate indices for slicing from idx
indices = idx[j]
# Get the current DataFrame
df = self.data[i][j]
# Keep the 'Time' column and select only specifid 'Real' colmns
# First, we add 1 to all indices to acount for 'Time' being at positiion 0
real_indices = [index + 1 for index in indices]
# Create list with Time column index (0) and the adjustedd Real indices
all_indices = [0] + [real_indices[0]] + [real_indices[-1]]
# Apply the slicing
self.data[i][j] = df.iloc[:, all_indices]
# TODO: if !overwrite:
def export_to_csv(self, output_dir: str, file_prefix: str = "DAMAGE"):
"""
Export the processed data to CSV files in the required folder structure.
:param output_dir: Directory to save the CSV files.
:param file_prefix: Prefix for the output filenames.
"""
for group_idx, group in enumerate(self.data, start=1):
group_folder = os.path.join(output_dir, f"{file_prefix}_{group_idx}")
os.makedirs(group_folder, exist_ok=True)
for test_idx, df in enumerate(group, start=1):
# Ensure columns are named uniquely if duplicated
df = df.copy()
df.columns = ["Time", "Real_0", "Real_1"] # Rename
# Export first Real column
out1 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_01.csv"
)
df[["Time", "Real_0"]].rename(columns={"Real_0": "Real"}).to_csv(
out1, index=False
)
# Export last Real column
out2 = os.path.join(
group_folder, f"{file_prefix}_{group_idx}_TEST{test_idx}_02.csv"
)
df[["Time", "Real_1"]].rename(columns={"Real_1": "Real"}).to_csv(
out2, index=False
)
def create_damage_files(base_path, output_base, prefix): def create_damage_files(base_path, output_base, prefix):
# Initialize colorama # Initialize colorama
init(autoreset=True) init(autoreset=True)
# Generate column labels based on expected duplication in input files # Generate column labels based on expected duplication in input files
columns = ['Real'] + [f'Real.{i}' for i in range(1, 30)] # Explicitly setting column names columns = ["Real"] + [
f"Real.{i}" for i in range(1, 30)
] # Explicitly setting column names
sensor_end_map = {1: 'Real.25', 2: 'Real.26', 3: 'Real.27', 4: 'Real.28', 5: 'Real.29'} sensor_end_map = {
1: "Real.25",
2: "Real.26",
3: "Real.27",
4: "Real.28",
5: "Real.29",
}
# Define the damage scenarios and the corresponding original file indices # Define the damage scenarios and the corresponding original file indices
damage_scenarios = { damage_scenarios = {
1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv 1: range(1, 6), # Damage 1 files from zzzAD1.csv to zzzAD5.csv
2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv 2: range(6, 11), # Damage 2 files from zzzAD6.csv to zzzAD10.csv
3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs 3: range(11, 16), # Damage 3 files from zzzAD11.csv to zzzAD15.csvs
4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv 4: range(16, 21), # Damage 4 files from zzzAD16.csv to zzzAD20.csv
5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv 5: range(21, 26), # Damage 5 files from zzzAD21.csv to zzzAD25.csv
6: range(26, 31) # Damage 6 files from zzzAD26.csv to zzzAD30.csv 6: range(26, 31), # Damage 6 files from zzzAD26.csv to zzzAD30.csv
} }
damage_pad = len(str(len(damage_scenarios))) damage_pad = len(str(len(damage_scenarios)))
test_pad = len(str(30)) test_pad = len(str(30))
@@ -27,29 +309,36 @@ def create_damage_files(base_path, output_base, prefix):
for damage, files in damage_scenarios.items(): for damage, files in damage_scenarios.items():
for i, file_index in enumerate(files, start=1): for i, file_index in enumerate(files, start=1):
# Load original data file # Load original data file
file_path = os.path.join(base_path, f'zzz{prefix}D{file_index}.TXT') file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
df = pd.read_csv(file_path, sep='\t', skiprows=10) # Read with explicit column names df = pd.read_csv(
file_path, sep="\t", skiprows=10
) # Read with explicit column names
top_sensor = columns[i-1] top_sensor = columns[i - 1]
print(top_sensor, type(top_sensor)) print(top_sensor, type(top_sensor))
output_file_1 = os.path.join(output_base, f'DAMAGE_{damage}', f'DAMAGE{damage}_TEST{i}_01.csv') output_file_1 = os.path.join(
output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_01.csv"
)
print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT") print(f"Creating {output_file_1} from taking zzz{prefix}D{file_index}.TXT")
print("Taking datetime column on index 0...") print("Taking datetime column on index 0...")
print(f"Taking `{top_sensor}`...") print(f"Taking `{top_sensor}`...")
os.makedirs(os.path.dirname(output_file_1), exist_ok=True) os.makedirs(os.path.dirname(output_file_1), exist_ok=True)
df[['Time', top_sensor]].to_csv(output_file_1, index=False) df[["Time", top_sensor]].to_csv(output_file_1, index=False)
print(Fore.GREEN + "Done") print(Fore.GREEN + "Done")
bottom_sensor = sensor_end_map[i] bottom_sensor = sensor_end_map[i]
output_file_2 = os.path.join(output_base, f'DAMAGE_{damage}', f'DAMAGE{damage}_TEST{i}_02.csv') output_file_2 = os.path.join(
output_base, f"DAMAGE_{damage}", f"DAMAGE{damage}_TEST{i}_02.csv"
)
print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT") print(f"Creating {output_file_2} from taking zzz{prefix}D{file_index}.TXT")
print("Taking datetime column on index 0...") print("Taking datetime column on index 0...")
print(f"Taking `{bottom_sensor}`...") print(f"Taking `{bottom_sensor}`...")
os.makedirs(os.path.dirname(output_file_2), exist_ok=True) os.makedirs(os.path.dirname(output_file_2), exist_ok=True)
df[['Time', bottom_sensor]].to_csv(output_file_2, index=False) df[["Time", bottom_sensor]].to_csv(output_file_2, index=False)
print(Fore.GREEN + "Done") print(Fore.GREEN + "Done")
print("---") print("---")
def main(): def main():
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: python convert.py <path_to_csv_files>") print("Usage: python convert.py <path_to_csv_files>")
@@ -66,5 +355,6 @@ def main():
create_damage_files(base_path, output_base, prefix) create_damage_files(base_path, output_base, prefix)
print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.") print(Fore.YELLOW + Style.BRIGHT + "All files have been created successfully.")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

25
data/QUGS/test.py Normal file
View File

@@ -0,0 +1,25 @@
from convert import *
from joblib import dump, load
# a = generate_damage_files_index(
# num_damage=6, file_index_start=1, col=5, base_path="D:/thesis/data/dataset_A"
# )
b = generate_damage_files_index(
num_damage=6,
file_index_start=1,
col=5,
base_path="D:/thesis/data/dataset_B",
prefix="zzzBD",
)
# data_A = DataProcessor(file_index=a)
# # data.create_vector_column(overwrite=True)
# data_A.create_limited_sensor_vector_column(overwrite=True)
# data_A.export_to_csv("D:/thesis/data/converted/raw")
data_B = DataProcessor(file_index=b)
# data.create_vector_column(overwrite=True)
data_B.create_limited_sensor_vector_column(overwrite=True)
data_B.export_to_csv("D:/thesis/data/converted/raw_B")
# a = load("D:/cache.joblib")
# breakpoint()

8
setup.py Normal file
View File

@@ -0,0 +1,8 @@
from setuptools import setup, find_packages
setup(
name="thesisrepo",
version="0.1",
packages=find_packages(where="code"),
package_dir={"": "code"},
)