Merge branch 'feat/90-feat-preserve-trained-model' into dev
This commit is contained in:
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
@@ -1,4 +1,7 @@
|
||||
{
|
||||
"python.analysis.extraPaths": ["./code/src/features"],
|
||||
"python.analysis.extraPaths": [
|
||||
"./code/src/features",
|
||||
"${workspaceFolder}/code/src"
|
||||
],
|
||||
"jupyter.notebookFileRoot": "${workspaceFolder}/code"
|
||||
}
|
||||
|
||||
@@ -688,23 +688,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_and_evaluate_model(model, model_name, sensor_label, x_train, y_train, x_test, y_test):\n",
|
||||
" model.fit(x_train, y_train)\n",
|
||||
" y_pred = model.predict(x_test)\n",
|
||||
" accuracy = accuracy_score(y_test, y_pred) * 100\n",
|
||||
" return {\n",
|
||||
" \"model\": model_name,\n",
|
||||
" \"sensor\": sensor_label,\n",
|
||||
" \"accuracy\": accuracy\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from src.ml.model_selection import train_and_evaluate_model\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"# Define models for sensor1\n",
|
||||
"models_sensor1 = {\n",
|
||||
" # \"Random Forest\": RandomForestClassifier(),\n",
|
||||
@@ -718,7 +703,7 @@
|
||||
"\n",
|
||||
"results_sensor1 = []\n",
|
||||
"for name, model in models_sensor1.items():\n",
|
||||
" res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test)\n",
|
||||
" res = train_and_evaluate_model(model, name, \"sensor1\", x_train1, y_train, x_test1, y_test, export='D:/thesis/models/sensor1')\n",
|
||||
" results_sensor1.append(res)\n",
|
||||
" print(f\"{name} on sensor1: Accuracy = {res['accuracy']:.2f}%\")\n"
|
||||
]
|
||||
@@ -741,7 +726,7 @@
|
||||
"\n",
|
||||
"results_sensor2 = []\n",
|
||||
"for name, model in models_sensor2.items():\n",
|
||||
" res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train, x_test2, y_test)\n",
|
||||
" res = train_and_evaluate_model(model, name, \"sensor2\", x_train2, y_train, x_test2, y_test, export='D:/thesis/models/sensor2')\n",
|
||||
" results_sensor2.append(res)\n",
|
||||
" print(f\"{name} on sensor2: Accuracy = {res['accuracy']:.2f}%\")\n"
|
||||
]
|
||||
@@ -854,6 +839,8 @@
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score, classification_report\n",
|
||||
"# 4. Validate on Dataset B\n",
|
||||
"from joblib import load\n",
|
||||
"svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
|
||||
"y_pred_svm = svm_model.predict(X1b)\n",
|
||||
"\n",
|
||||
"# 5. Evaluate\n",
|
||||
@@ -861,6 +848,30 @@
|
||||
"print(classification_report(y, y_pred_svm))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Model sensor 1 to predict sensor 2 data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score, classification_report\n",
|
||||
"# 4. Validate on Dataset B\n",
|
||||
"from joblib import load\n",
|
||||
"svm_model = load('D:/thesis/models/sensor1/SVM.joblib')\n",
|
||||
"y_pred_svm = svm_model.predict(X2b)\n",
|
||||
"\n",
|
||||
"# 5. Evaluate\n",
|
||||
"print(\"Accuracy on Dataset B:\", accuracy_score(y, y_pred_svm))\n",
|
||||
"print(classification_report(y, y_pred_svm))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -920,7 +931,7 @@
|
||||
"# Plot\n",
|
||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)\n",
|
||||
"disp.plot(cmap=plt.cm.Blues) # You can change colormap\n",
|
||||
"plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B\")\n",
|
||||
"plt.title(\"SVM Sensor1 CM Train w/ Dataset A Val w/ Dataset B from Sensor2 readings\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -938,14 +949,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Predict sensor 1 on Dataset A\n",
|
||||
"y_train_pred = svm_model.predict(x_train1)\n",
|
||||
"y_test_pred = svm_model.predict(x_test1)\n",
|
||||
"\n",
|
||||
"# 2. Import confusion matrix tools\n",
|
||||
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# 3. Create and plot confusion matrix\n",
|
||||
"cm_train = confusion_matrix(y_train, y_train_pred)\n",
|
||||
"cm_train = confusion_matrix(y_test, y_test_pred)\n",
|
||||
"labels = svm_model.classes_\n",
|
||||
"\n",
|
||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=labels)\n",
|
||||
|
||||
@@ -55,3 +55,101 @@ def create_ready_data(
|
||||
y = np.array([])
|
||||
|
||||
return X, y
|
||||
|
||||
|
||||
def train_and_evaluate_model(
|
||||
model, model_name, sensor_label, x_train, y_train, x_test, y_test, export=None
|
||||
):
|
||||
"""
|
||||
Train a machine learning model, evaluate its performance, and optionally export it.
|
||||
|
||||
This function trains the provided model on the training data, evaluates its
|
||||
performance on test data using accuracy score, and can save the trained model
|
||||
to disk if an export path is provided.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : estimator object
|
||||
The machine learning model to train.
|
||||
model_name : str
|
||||
Name of the model, used for the export filename and in the returned results.
|
||||
sensor_label : str
|
||||
Label identifying which sensor's data the model is being trained on.
|
||||
x_train : array-like or pandas.DataFrame
|
||||
The training input samples.
|
||||
y_train : array-like
|
||||
The target values for training.
|
||||
x_test : array-like or pandas.DataFrame
|
||||
The test input samples.
|
||||
y_test : array-like
|
||||
The target values for testing.
|
||||
export : str, optional
|
||||
Directory path where the trained model should be saved. If None, model won't be saved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Dictionary containing:
|
||||
- 'model': model_name (str)
|
||||
- 'sensor': sensor_label (str)
|
||||
- 'accuracy': accuracy percentage (float)
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> from sklearn.svm import SVC
|
||||
>>> from sklearn.model_selection import train_test_split
|
||||
>>> X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
|
||||
>>> result = train_and_evaluate_model(
|
||||
... SVC(),
|
||||
... "SVM",
|
||||
... "sensor1",
|
||||
... X_train,
|
||||
... y_train,
|
||||
... X_test,
|
||||
... y_test,
|
||||
... export="models/sensor1"
|
||||
... )
|
||||
>>> print(f"Model accuracy: {result['accuracy']:.2f}%")
|
||||
"""
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
result = {"model": model_name, "sensor": sensor_label, "success": False}
|
||||
|
||||
try:
|
||||
# Train the model
|
||||
model.fit(x_train, y_train)
|
||||
|
||||
try:
|
||||
y_pred = model.predict(x_test)
|
||||
except Exception as e:
|
||||
result["error"] = f"Prediction error: {str(e)}"
|
||||
return result
|
||||
|
||||
# Calculate accuracy
|
||||
try:
|
||||
accuracy = accuracy_score(y_test, y_pred) * 100
|
||||
result["accuracy"] = accuracy
|
||||
except Exception as e:
|
||||
result["error"] = f"Accuracy calculation error: {str(e)}"
|
||||
return result
|
||||
|
||||
# Export model if requested
|
||||
if export:
|
||||
try:
|
||||
import joblib
|
||||
|
||||
full_path = os.path.join(export, f"{model_name}.joblib")
|
||||
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||
joblib.dump(model, full_path)
|
||||
print(f"Model saved to {full_path}")
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to export model to {export}: {str(e)}")
|
||||
result["export_error"] = str(e)
|
||||
# Continue despite export error
|
||||
|
||||
result["success"] = True
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = f"Training error: {str(e)}"
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user