diff --git a/.vscode/settings.json b/.vscode/settings.json index a8b3783..5db57d3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,7 @@ { - "python.analysis.extraPaths": ["./code/src/features"], + "python.analysis.extraPaths": [ + "./code/src/features", + "${workspaceFolder}/code/src" + ], "jupyter.notebookFileRoot": "${workspaceFolder}/code" } diff --git a/code/notebooks/stft.ipynb b/code/notebooks/stft.ipynb index e32eda5..107bcd2 100644 --- a/code/notebooks/stft.ipynb +++ b/code/notebooks/stft.ipynb @@ -17,8 +17,8 @@ "metadata": {}, "outputs": [], "source": [ - "sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_1_TEST1_01.csv',sep=',')\n", - "sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_1_TEST1_02.csv',sep=',')" + "sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_0_TEST1_01.csv',sep=',')\n", + "sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_0_TEST1_02.csv',sep=',')" ] }, { @@ -101,13 +101,16 @@ "source": [ "# Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n", "\n", - "plt.plot(df1['s2'], label='sensor 2')\n", - "plt.plot(df1['s1'], label='sensor 1', alpha=0.5)\n", + "plt.plot(df1['s2'], label='Sensor 1', color='C1', alpha=0.6)\n", + "plt.plot(df1['s1'], label='Sensor 2', color='C0', alpha=0.6)\n", "plt.xlabel(\"Number of samples\")\n", "plt.ylabel(\"Amplitude\")\n", "plt.title(\"Raw vibration signal\")\n", "plt.ylim(-7.5, 5)\n", "plt.legend()\n", + "plt.locator_params(axis='x', nbins=8)\n", + "plt.ylim(-1, 1) # Adjust range as needed\n", + "plt.grid(True, linestyle='--', alpha=0.5)\n", "plt.show()" ] }, @@ -334,9 +337,44 @@ "metadata": {}, "outputs": [], "source": [ - "# len(ready_data1a)\n", - "# plt.pcolormesh(ready_data1[0])\n", - "ready_data1a[0].max().max()" + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from mpl_toolkits.mplot3d import Axes3D\n", + "\n", + "# Assuming ready_data1a[0] is a DataFrame or 2D array\n", + "spectrogram_data = ready_data1a[0].values # Convert to NumPy array if it's a DataFrame\n", + "\n", + "# Get the dimensions of the spectrogram\n", + "num_frequencies, num_time_frames = spectrogram_data.shape\n", + "\n", + "# Create frequency and time arrays\n", + "frequencies = np.arange(num_frequencies) # Replace with actual frequency values if available\n", + "time_frames = np.arange(num_time_frames) # Replace with actual time values if available\n", + "\n", + "# Create a meshgrid for plotting\n", + "T, F = np.meshgrid(time_frames, frequencies)\n", + "\n", + "# Create a 3D plot\n", + "fig = plt.figure(figsize=(12, 8))\n", + "ax = fig.add_subplot(111, projection='3d')\n", + "\n", + "# Plot the surface\n", + "surf = ax.plot_surface(T, F, spectrogram_data, cmap='bwr', edgecolor='none')\n", + "\n", + "# Add labels and a color bar\n", + "ax.set_xlabel('Time Frames')\n", + "ax.set_ylabel('Frequency [Hz]')\n", + "ax.set_zlabel('Magnitude')\n", + "ax.set_title('3D Spectrogram')\n", + "# Resize the z-axis (shrink it)\n", + "z_min, z_max = 0, 0.1 # Replace with your desired range\n", + "ax.set_zlim(z_min, z_max)\n", + "ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1, 1, 0.5, 1])) # Shrink z-axis by 50%\n", + "ax.set_facecolor('white')\n", + "fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\n", + "\n", + "# Show the plot\n", + "plt.show()" ] }, { @@ -345,13 +383,32 @@ "metadata": {}, "outputs": [], "source": [ + "from cmcrameri import cm\n", + "# Create a figure and subplots\n", + "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)\n", + "\n", + "# Flatten the axes array for easier iteration\n", + "axes = axes.flatten()\n", + "\n", + "# Loop through each subplot and plot the data\n", "for i in range(6):\n", - " plt.pcolormesh(ready_data1a[i], cmap=\"jet\", vmax=0.03, vmin=0.0)\n", - " plt.colorbar() \n", - " plt.title(f'STFT Magnitude for case {i} sensor 1')\n", - " plt.xlabel(f'Frequency [Hz]')\n", - " plt.ylabel(f'Time [sec]')\n", - " plt.show()" + " pcm = axes[i].pcolormesh(ready_data1a[i].transpose(), cmap='bwr', vmax=0.03, vmin=0.0)\n", + " axes[i].set_title(f'Case {i} Sensor A', fontsize=12)\n", + "\n", + "# Add a single color bar for all subplots\n", + "# Use the first `pcolormesh` object (or any valid one) for the color bar\n", + "cbar = fig.colorbar(pcm, ax=axes, orientation='vertical')\n", + "# cbar.set_label('Magnitude')\n", + "\n", + "# Set shared labels\n", + "fig.text(0.5, 0.04, 'Time Frames', ha='center', fontsize=12)\n", + "fig.text(0.04, 0.5, 'Frequency [Hz]', va='center', rotation='vertical', fontsize=12)\n", + "\n", + "# Adjust layout\n", + "# plt.tight_layout(rect=[0.05, 0.05, 1, 1]) # Leave space for shared labels\n", + "plt.subplots_adjust(left=0.1, right=0.75, top=0.9, bottom=0.1, wspace=0.2, hspace=0.2)\n", + "\n", + "plt.show()" ] }, { @@ -576,6 +633,16 @@ "X2a, y = create_ready_data('D:/thesis/data/converted/raw/sensor2')" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X1a.iloc[-1,:]\n", + "# y[2565]" + ] + }, { "cell_type": "code", "execution_count": null, @@ -621,23 +688,8 @@ "metadata": {}, "outputs": [], "source": [ - "def train_and_evaluate_model(model, model_name, sensor_label, x_train, y_train, x_test, y_test):\n", - " model.fit(x_train, y_train)\n", - " y_pred = model.predict(x_test)\n", - " accuracy = accuracy_score(y_test, y_pred) * 100\n", - " return {\n", - " \"model\": model_name,\n", - " \"sensor\": sensor_label,\n", - " \"accuracy\": accuracy\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "from src.ml.model_selection import train_and_evaluate_model\n", + "from sklearn.svm import SVC\n", "# Define models for sensor1\n", "models_sensor1 = {\n", " # \"Random Forest\": RandomForestClassifier(),\n", @@ -646,12 +698,12 @@ " # \"KNN\": KNeighborsClassifier(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n", " \"SVM\": SVC(),\n", - " \"XGBoost\": XGBClassifier()\n", + " # \"XGBoost\": XGBClassifier()\n", "}\n", "\n", "results_sensor1 = []\n", "for name, model in models_sensor1.items():\n", - " res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test)\n", + " res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n", " results_sensor1.append(res)\n", " print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n" ] @@ -669,12 +721,12 @@ " # \"KNN\": KNeighborsClassifier(),\n", " # \"LDA\": LinearDiscriminantAnalysis(),\n", " \"SVM\": SVC(),\n", - " \"XGBoost\": XGBClassifier()\n", + " # \"XGBoost\": XGBClassifier()\n", "}\n", "\n", "results_sensor2 = []\n", "for name, model in models_sensor2.items():\n", - " res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train, x_test2, y_test)\n", + " res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train, x_test2, y_test, export='D:/thesis/models/sensor2')\n", " results_sensor2.append(res)\n", " print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n" ] @@ -787,6 +839,8 @@ "source": [ "from sklearn.metrics import accuracy_score, classification_report\n", "# 4. Validate on Dataset B\n", + "from joblib import load\n", + "svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n", "y_pred_svm = svm_model.predict(X1b)\n", "\n", "# 5. Evaluate\n", @@ -794,6 +848,30 @@ "print(classification_report(y, y_pred_svm))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model sensor 1 to predict sensor 2 data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score, classification_report\n", + "# 4. Validate on Dataset B\n", + "from joblib import load\n", + "svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n", + "y_pred_svm = svm_model.predict(X2b)\n", + "\n", + "# 5. Evaluate\n", + "print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n", + "print(classification_report(y, y_pred_svm))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -853,7 +931,7 @@ "# Plot\n", "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n", "disp.plot(cmap=plt.cm.Blues) # You can change colormap\n", - "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n", + "plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B from Sensor2 readings\")\n", "plt.show()" ] }, @@ -871,14 +949,14 @@ "outputs": [], "source": [ "# 1. Predict sensor 1 on Dataset A\n", - "y_train_pred = svm_model.predict(x_train1)\n", + "y_test_pred = svm_model.predict(x_test1)\n", "\n", "# 2. Import confusion matrix tools\n", "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", "import matplotlib.pyplot as plt\n", "\n", "# 3. Create and plot confusion matrix\n", - "cm_train = confusion_matrix(y_train, y_train_pred)\n", + "cm_train = confusion_matrix(y_test, y_test_pred)\n", "labels = svm_model.classes_\n", "\n", "disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n", diff --git a/code/src/ml/model_selection.py b/code/src/ml/model_selection.py index 6f35487..76768dd 100644 --- a/code/src/ml/model_selection.py +++ b/code/src/ml/model_selection.py @@ -55,3 +55,101 @@ def create_ready_data( y = np.array([]) return X, y + + +def train_and_evaluate_model( + model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None +): + """ + Train a machine learning model, evaluate its performance, and optionally export it. + + This function trains the provided model on the training data, evaluates its + performance on test data using accuracy score, and can save the trained model + to disk if an export path is provided. + + Parameters + ---------- + model : estimator object + The machine learning model to train. + model_name : str + Name of the model, used for the export filename and in the returned results. + sensor_label : str + Label identifying which sensor's data the model is being trained on. + x_train : array-like or pandas.DataFrame + The training input samples. + y_train : array-like + The target values for training. + x_test : array-like or pandas.DataFrame + The test input samples. + y_test : array-like + The target values for testing. + export : str, optional + Directory path where the trained model should be saved. If None, model won't be saved. + + Returns + ------- + dict + Dictionary containing: + - 'model': model_name (str) + - 'sensor': sensor_label (str) + - 'accuracy': accuracy percentage (float) + + Example + ------- + >>> from sklearn.svm import SVC + >>> from sklearn.model_selection import train_test_split + >>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2) + >>> result = train_and_evaluate_model( + ... SVC(), + ... "SVM", + ... "sensor1", + ... X_train, + ... y_train, + ... X_test, + ... y_test, + ... export="models/sensor1" + ... ) + >>> print(f"Model accuracy: {result['accuracy']:.2f}%") + """ + from sklearn.metrics import accuracy_score + + result = {"model": model_name, "sensor": sensor_label, "success": False} + + try: + # Train the model + model.fit(x_train, y_train) + + try: + y_pred = model.predict(x_test) + except Exception as e: + result["error"] = f"Prediction error: {str(e)}" + return result + + # Calculate accuracy + try: + accuracy = accuracy_score(y_test, y_pred) * 100 + result["accuracy"] = accuracy + except Exception as e: + result["error"] = f"Accuracy calculation error: {str(e)}" + return result + + # Export model if requested + if export: + try: + import joblib + + full_path = os.path.join(export, f"{model_name}.joblib") + os.makedirs(os.path.dirname(full_path), exist_ok=True) + joblib.dump(model, full_path) + print(f"Model saved to {full_path}") + except Exception as e: + print(f"Warning: Failed to export model to {export}: {str(e)}") + result["export_error"] = str(e) + # Continue despite export error + + result["success"] = True + return result + + except Exception as e: + result["error"] = f"Training error: {str(e)}" + return result