Merge branch 'main' of https://github.com/nuluh/thesis
This commit is contained in:
@@ -157,6 +157,19 @@
|
||||
"source": [
|
||||
"# Define a function to extract numbers from a filename that later used as labels features\n",
|
||||
"def extract_numbers(filename):\n",
|
||||
" '''\n",
|
||||
" Extract numbers from a filename\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
" filename : str\n",
|
||||
" The filename to extract numbers from\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
" list\n",
|
||||
" A list of extracted numbers: [damage_number, test_number, sensor_number]\n",
|
||||
" '''\n",
|
||||
" # Find all occurrences of one or more digits in the filename\n",
|
||||
" numbers = re.findall(r'\\d+', filename)\n",
|
||||
" # Convert the list of number strings to integers\n",
|
||||
@@ -168,6 +181,7 @@
|
||||
" all_features = []\n",
|
||||
" for nth_damage in os.listdir(input_dir):\n",
|
||||
" nth_damage_path = os.path.join(input_dir, nth_damage)\n",
|
||||
" print(f'Extracting features from damage folder {nth_damage_path}')\n",
|
||||
" if os.path.isdir(nth_damage_path):\n",
|
||||
" for nth_test in os.listdir(nth_damage_path):\n",
|
||||
" nth_test_path = os.path.join(nth_damage_path, nth_test)\n",
|
||||
@@ -348,6 +362,430 @@
|
||||
"sns.pairplot(subset_df, hue='label', diag_kind='kde')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## QUGS Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To test the `FeatureExtractor` class from the `time_domain_features.py` script with real data from QUGS that has been converted purposed for the thesis."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Importing Modules\n",
|
||||
"\n",
|
||||
"Use relative imports or modify the path to include the directory where the module is stored. In this example, we’ll simulate the relative import setup."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Real DataFrame\n",
|
||||
"\n",
|
||||
"Create one DataFrame from one of the raw data file. Simulate importing the `FeatureExtractor` from its relative path in the notebooks directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Convert to DataFrame (simulating processed data input)\n",
|
||||
"single_data_dir = \"D:/thesis/data/converted/raw/DAMAGE_2/D2_TEST05_01.csv\"\n",
|
||||
"df = pd.read_csv(single_data_dir)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Absolute the data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[df.columns[-1]] = df[df.columns[-1]].abs()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Visualize Data Points"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Plotting the data points\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.plot(df['Time'], df[df.columns[-1]], marker='o', color='blue', label='Data Points')\n",
|
||||
"plt.title('Scatter Plot of Data Points')\n",
|
||||
"plt.xlabel('Time')\n",
|
||||
"plt.ylabel('Amp')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Downsampled Plot with Alpha Blending\n",
|
||||
"\n",
|
||||
"Reduce the number of data points by sampling a subset of the data and use transparency to help visualize the density of overlapping points."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Downsample the data by taking every nth point\n",
|
||||
"n = 1 # Adjust this value as needed\n",
|
||||
"downsampled_df = df.iloc[::n, :]\n",
|
||||
"\n",
|
||||
"# Plotting the downsampled data points with alpha blending\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.plot(downsampled_df['Time'], downsampled_df[downsampled_df.columns[-1]], alpha=0.5, color='blue', label='Data Points')\n",
|
||||
"plt.title('Scatter Plot of Downsampled Data Points')\n",
|
||||
"plt.xlabel('Time')\n",
|
||||
"plt.ylabel('Amp')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Line Plot with Rolling Avg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Calculate the rolling average\n",
|
||||
"window_size = 50 # Adjust this value as needed\n",
|
||||
"rolling_avg = df[df.columns[-1]].rolling(window=window_size).mean()\n",
|
||||
"\n",
|
||||
"# Plotting the original data points and the rolling average\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.plot(df['Time'], df[df.columns[-1]], alpha=0.3, color='blue', label='Original Data')\n",
|
||||
"plt.plot(df['Time'], rolling_avg, color='red', label='Rolling Average')\n",
|
||||
"plt.title('Line Plot with Rolling Average')\n",
|
||||
"plt.xlabel('Time')\n",
|
||||
"plt.ylabel('Amp')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Print Time-domain Features (Single CSV Real Data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"# Assuming the src directory is one level up from the notebooks directory\n",
|
||||
"sys.path.append('../src/features')\n",
|
||||
"from time_domain_features import FeatureExtractor\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Extract features\n",
|
||||
"extracted = FeatureExtractor(df[df.columns[-1]])\n",
|
||||
"\n",
|
||||
"# Format with pandas DataFramw\n",
|
||||
"features = pd.DataFrame(extracted.features, index=[0])\n",
|
||||
"features\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Print Time-domain Features (Multiple CSV Real Data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"# Assuming the src directory is one level up from the notebooks directory\n",
|
||||
"sys.path.append('../src/features')\n",
|
||||
"from time_domain_features import ExtractTimeFeatures # use wrapper function instead of class for easy use\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### The function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define a function to extract numbers from a filename that later used as labels features\n",
|
||||
"def extract_numbers(filename):\n",
|
||||
" '''\n",
|
||||
" Extract numbers from a filename\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
" filename : str\n",
|
||||
" The filename to extract numbers from\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
" list\n",
|
||||
" A list of extracted numbers: [damage_number, test_number, sensor_number]\n",
|
||||
" '''\n",
|
||||
" # Find all occurrences of one or more digits in the filename\n",
|
||||
" numbers = re.findall(r'\\d+', filename)\n",
|
||||
" # Convert the list of number strings to integers\n",
|
||||
" numbers = [int(num) for num in numbers]\n",
|
||||
" # Convert to a tuple and return\n",
|
||||
" return numbers\n",
|
||||
"\n",
|
||||
"def build_features(input_dir:str, sensor:int=None, verbose:bool=False, absolute:bool=False):\n",
|
||||
" all_features = []\n",
|
||||
" for nth_damage in os.listdir(input_dir):\n",
|
||||
" nth_damage_path = os.path.join(input_dir, nth_damage)\n",
|
||||
" if verbose:\n",
|
||||
" print(f'Extracting features from damage folder {nth_damage_path}')\n",
|
||||
" if os.path.isdir(nth_damage_path):\n",
|
||||
" for nth_test in os.listdir(nth_damage_path):\n",
|
||||
" nth_test_path = os.path.join(nth_damage_path, nth_test)\n",
|
||||
" # if verbose:\n",
|
||||
" # print(f'Extracting features from {nth_test_path}')\n",
|
||||
" if sensor is not None:\n",
|
||||
" # Check if the file has the specified sensor suffix\n",
|
||||
" if not nth_test.endswith(f'_{sensor:02}.csv'):\n",
|
||||
" continue\n",
|
||||
" # if verbose:\n",
|
||||
" # print(f'Extracting features from {nth_test_path}')\n",
|
||||
" features = ExtractTimeFeatures(nth_test_path, absolute=absolute) # return the one csv file feature in dictionary {}\n",
|
||||
" if verbose:\n",
|
||||
" print(features)\n",
|
||||
" features['label'] = extract_numbers(nth_test)[0] # add labels to the dictionary\n",
|
||||
" features['filename'] = nth_test # add filename to the dictionary\n",
|
||||
" all_features.append(features)\n",
|
||||
"\n",
|
||||
" # Create a DataFrame from the list of dictionaries\n",
|
||||
" df = pd.DataFrame(all_features)\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Execute the automation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data_dir = \"D:/thesis/data/converted/raw\"\n",
|
||||
"# Extract features\n",
|
||||
"df1 = build_features(data_dir, sensor=1, verbose=True, absolute=True)\n",
|
||||
"df2 = build_features(data_dir, sensor=2, verbose=True, absolute=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df1.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Assuming your DataFrame is named 'df'\n",
|
||||
"\n",
|
||||
"# Subsetting the DataFrame to include only the first 3 columns and the label\n",
|
||||
"subset_df = df1[['Mean', 'Max', 'Peak (Pm)', 'label']]\n",
|
||||
"\n",
|
||||
"# Plotting the pairplot\n",
|
||||
"g = sns.pairplot(subset_df, hue='label', diag_kind='kde')\n",
|
||||
"\n",
|
||||
"# Adjusting the axis limits\n",
|
||||
"# for ax in g.axes.flatten():\n",
|
||||
"# ax.set_xlim(-10, 10) # Adjust these limits based on your data\n",
|
||||
"# ax.set_ylim(-10, 10) # Adjust these limits based on your data\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df2.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Assuming your DataFrame is named 'df'\n",
|
||||
"\n",
|
||||
"# Subsetting the DataFrame to include only the first 3 columns and the label\n",
|
||||
"subset_df = df2[['Mean', 'Max', 'Standard Deviation', 'Kurtosis', 'label']]\n",
|
||||
"\n",
|
||||
"# Plotting the pairplot\n",
|
||||
"g = sns.pairplot(subset_df, hue='label', diag_kind='kde')\n",
|
||||
"\n",
|
||||
"# Adjusting the axis limits\n",
|
||||
"# for ax in g.axes.flatten():\n",
|
||||
"# ax.set_xlim(-10, 10) # Adjust these limits based on your data\n",
|
||||
"# ax.set_ylim(-10, 10) # Adjust these limits based on your data\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Perform division"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Separate the label column\n",
|
||||
"label_column = df1.iloc[:, -2]\n",
|
||||
"\n",
|
||||
"# Perform the relative value by operate division on all the features\n",
|
||||
"df_relative = df2.iloc[:, :-2] / df1.iloc[:, :-2]\n",
|
||||
"\n",
|
||||
"# Add the label column back to the resulting DataFrame\n",
|
||||
"df_relative['label'] = label_column\n",
|
||||
"\n",
|
||||
"# Append a string to all column names\n",
|
||||
"suffix = '_rel'\n",
|
||||
"df_relative.columns = [col + suffix if col != 'label' else col for col in df_relative.columns]\n",
|
||||
"\n",
|
||||
"# Display the first 5 rows of the resulting DataFrame\n",
|
||||
"df_relative"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Subsetting DataFrame to see the pair plots due to many features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Assuming your DataFrame is named 'df'\n",
|
||||
"\n",
|
||||
"# Subsetting the DataFrame to include only the first 3 columns and the label\n",
|
||||
"subset_df = df_relative[['Mean_rel', 'Max_rel', 'Peak (Pm)_rel', 'label']]\n",
|
||||
"\n",
|
||||
"# Plotting the pairplot\n",
|
||||
"g = sns.pairplot(subset_df, hue='label', diag_kind='kde')\n",
|
||||
"\n",
|
||||
"# Adjusting the axis limits\n",
|
||||
"# for ax in g.axes.flatten():\n",
|
||||
"# ax.set_xlim(-10, 10) # Adjust these limits based on your data\n",
|
||||
"# ax.set_ylim(-10, 10) # Adjust these limits based on your data\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
857
code/notebooks/stft.ipynb
Normal file
857
code/notebooks/stft.ipynb
Normal file
@@ -0,0 +1,857 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sensor1 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_1_TEST1_01.csv',sep=',')\n",
|
||||
"sensor2 = pd.read_csv('D:/thesis/data/converted/raw/DAMAGE_1/DAMAGE_1_TEST1_02.csv',sep=',')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sensor1.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df1 = pd.DataFrame()\n",
|
||||
"df1['s1'] = sensor1[sensor1.columns[-1]]\n",
|
||||
"df1['s2'] = sensor2[sensor2.columns[-1]]\n",
|
||||
"df1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def merge_two_sensors(damage_path, damage):\n",
|
||||
" df = pd.DataFrame()\n",
|
||||
" for file in os.listdir(damage_path):\n",
|
||||
" pattern = re.compile(r'DAMAGE_\\d+_TEST\\d+_\\d{2}\\.csv')\n",
|
||||
" try:\n",
|
||||
" assert pattern.match(file), f\"File {file} does not match the required format, skipping...\"\n",
|
||||
" # assert \"TEST01\" in file, f\"File {file} does not contain 'TEST01', skipping...\" #TODO: should be trained using the whole test file\n",
|
||||
" print(f\"Processing file: {file}\")\n",
|
||||
" # Append the full path of the file to sensor1 or sensor2 based on the filename\n",
|
||||
" if file.endswith('_01.csv'):\n",
|
||||
" df['sensor 1'] = pd.read_csv(os.path.join('D:/thesis/data/converted/raw', damage, file), sep=',', usecols=[1])\n",
|
||||
" elif file.endswith('_02.csv'):\n",
|
||||
" df['sensor 2'] = pd.read_csv(os.path.join('D:/thesis/data/converted/raw', damage, file), sep=',', usecols=[1])\n",
|
||||
" except AssertionError as e:\n",
|
||||
" print(e)\n",
|
||||
" continue # Skip to the next iteration\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"df = []\n",
|
||||
"for damage in os.listdir('D:/thesis/data/converted/raw'):\n",
|
||||
" damage_path = os.path.join('D:/thesis/data/converted/raw', damage)\n",
|
||||
" df.append(merge_two_sensors(damage_path, damage))\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(df)\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n",
|
||||
"\n",
|
||||
"plt.plot(df1['s2'], label='sensor 2')\n",
|
||||
"plt.plot(df1['s1'], label='sensor 1', alpha=0.5)\n",
|
||||
"plt.xlabel(\"Number of samples\")\n",
|
||||
"plt.ylabel(\"Amplitude\")\n",
|
||||
"plt.title(\"Raw vibration signal\")\n",
|
||||
"plt.ylim(-7.5, 5)\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"signal_sensor1_test1 = []\n",
|
||||
"signal_sensor2_test1 = []\n",
|
||||
"\n",
|
||||
"for data in df:\n",
|
||||
" signal_sensor1_test1.append(data['sensor 1'].values)\n",
|
||||
" signal_sensor2_test1.append(data['sensor 2'].values)\n",
|
||||
"\n",
|
||||
"print(len(signal_sensor1_test1))\n",
|
||||
"print(len(signal_sensor2_test1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Applying Short-Time Fourier Transform (STFT)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.getcwd()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from scipy.signal import stft, hann\n",
|
||||
"from multiprocessing import Pool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Function to compute and append STFT data\n",
|
||||
"def process_stft(args):\n",
|
||||
" # Define STFT parameters\n",
|
||||
" window_size = 1024\n",
|
||||
" hop_size = 512\n",
|
||||
" window = hann(window_size)\n",
|
||||
"\n",
|
||||
" Fs = 1024 # Sampling frequency in Hz\n",
|
||||
" \n",
|
||||
" damage_num, test_num, sensor_suffix = args\n",
|
||||
" sensor_name = active_sensors[sensor_suffix]\n",
|
||||
" sensor_num = sensor_suffix[-1] # '1' or '2'\n",
|
||||
" \n",
|
||||
" # Construct the file path\n",
|
||||
" file_name = f'DAMAGE_{damage_num}_TEST{test_num}_{sensor_suffix}.csv'\n",
|
||||
" file_path = os.path.join(damage_base_path, f'DAMAGE_{damage_num}', file_name)\n",
|
||||
" \n",
|
||||
" # Check if the file exists\n",
|
||||
" if not os.path.isfile(file_path):\n",
|
||||
" print(f\"File {file_path} does not exist. Skipping...\")\n",
|
||||
" return\n",
|
||||
" \n",
|
||||
" # Read the CSV\n",
|
||||
" try:\n",
|
||||
" df = pd.read_csv(file_path)\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error reading {file_path}: {e}. Skipping...\")\n",
|
||||
" return\n",
|
||||
" \n",
|
||||
" # Ensure the CSV has exactly two columns\n",
|
||||
" if df.shape[1] != 2:\n",
|
||||
" print(f\"Unexpected number of columns in {file_path}. Skipping...\")\n",
|
||||
" return\n",
|
||||
" \n",
|
||||
" # Extract sensor data\n",
|
||||
" sensor_column = df.columns[1]\n",
|
||||
" sensor_data = df[sensor_column].values\n",
|
||||
" \n",
|
||||
" # Compute STFT\n",
|
||||
" frequencies, times, Zxx = stft(sensor_data, fs=Fs, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
|
||||
" magnitude = np.abs(Zxx)\n",
|
||||
" flattened_stft = magnitude.flatten()\n",
|
||||
" \n",
|
||||
" # Define the output CSV file path\n",
|
||||
" stft_file_name = f'stft_data{sensor_num}_{damage_num}.csv'\n",
|
||||
" sensor_output_dir = os.path.join(damage_base_path, sensor_name.lower())\n",
|
||||
" os.makedirs(sensor_output_dir, exist_ok=True)\n",
|
||||
" stft_file_path = os.path.join(sensor_output_dir, stft_file_name)\n",
|
||||
" print(stft_file_path)\n",
|
||||
" # Append the flattened STFT to the CSV\n",
|
||||
" try:\n",
|
||||
" flattened_stft_df = pd.DataFrame([flattened_stft])\n",
|
||||
" if not os.path.isfile(stft_file_path):\n",
|
||||
" # Create a new CSV\n",
|
||||
" flattened_stft_df.to_csv(stft_file_path, index=False, header=False)\n",
|
||||
" else:\n",
|
||||
" # Append to existing CSV\n",
|
||||
" flattened_stft_df.to_csv(stft_file_path, mode='a', index=False, header=False)\n",
|
||||
" print(f\"Appended STFT data to {stft_file_path}\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error writing to {stft_file_path}: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define the base path where DAMAGE_X folders are located\n",
|
||||
"damage_base_path = 'D:/thesis/data/converted/raw/'\n",
|
||||
"\n",
|
||||
"# Define active sensors\n",
|
||||
"active_sensors = {\n",
|
||||
" '01': 'sensor1', # Beginning map sensor\n",
|
||||
" '02': 'sensor2' # End map sensor\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Define damage cases and test runs\n",
|
||||
"damage_cases = range(1, 7) # Adjust based on actual number of damage cases\n",
|
||||
"test_runs = range(1, 6) # TEST01 to TEST05\n",
|
||||
"args_list = []\n",
|
||||
"\n",
|
||||
"# Prepare the list of arguments for parallel processing\n",
|
||||
"for damage_num in damage_cases:\n",
|
||||
" for test_num in test_runs:\n",
|
||||
" for sensor_suffix in active_sensors.keys():\n",
|
||||
" args_list.append((damage_num, test_num, sensor_suffix))\n",
|
||||
"\n",
|
||||
"print(len(args_list))\n",
|
||||
"args_list"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Process STFTs sequentially instead of in parallel\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" print(f\"Starting sequential STFT processing...\")\n",
|
||||
" for i, arg in enumerate(args_list, 1):\n",
|
||||
" process_stft(arg)\n",
|
||||
" print(f\"Processed {i}/{len(args_list)} files\")\n",
|
||||
" print(\"STFT processing completed.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from scipy.signal import stft, hann\n",
|
||||
"\n",
|
||||
"# Applying STFT\n",
|
||||
"vibration_data = signal_sensor1_test1[1]\n",
|
||||
"window_size = 1024\n",
|
||||
"hop_size = 512\n",
|
||||
"window = hann(window_size) # Creating a Hanning window\n",
|
||||
"Fs = 1024\n",
|
||||
"\n",
|
||||
"frequencies, times, Zxx = stft(vibration_data, \n",
|
||||
" fs=Fs, \n",
|
||||
" window=window, \n",
|
||||
" nperseg=window_size, \n",
|
||||
" noverlap=window_size - hop_size)\n",
|
||||
"# Plotting the STFT Data\n",
|
||||
"plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n",
|
||||
"plt.title(f'STFT Magnitude for case {1} signal sensor 2')\n",
|
||||
"plt.ylabel(f'Frequency [Hz]')\n",
|
||||
"plt.xlabel(f'Time [sec]')\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# get current y ticks in list\n",
|
||||
"print(len(frequencies))\n",
|
||||
"print(len(times))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading STFT Data from CSV Files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.listdir('D:/thesis/data/working')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"ready_data1 = []\n",
|
||||
"for file in os.listdir('D:/thesis/data/working/sensor1'):\n",
|
||||
" ready_data1.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor1', file)))\n",
|
||||
"# ready_data1[1]\n",
|
||||
"# colormesh give title x is frequency and y is time and rotate/transpose the data\n",
|
||||
"# Plotting the STFT Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ready_data1[1]\n",
|
||||
"plt.pcolormesh(ready_data1[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in range(6):\n",
|
||||
" plt.pcolormesh(ready_data1[i])\n",
|
||||
" plt.title(f'STFT Magnitude for case {i} sensor 1')\n",
|
||||
" plt.xlabel(f'Frequency [Hz]')\n",
|
||||
" plt.ylabel(f'Time [sec]')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ready_data2 = []\n",
|
||||
"for file in os.listdir('D:/thesis/data/working/sensor2'):\n",
|
||||
" ready_data2.append(pd.read_csv(os.path.join('D:/thesis/data/working/sensor2', file)))\n",
|
||||
"ready_data2[5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(len(ready_data1))\n",
|
||||
"print(len(ready_data2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x1 = 0\n",
|
||||
"\n",
|
||||
"for i in range(len(ready_data1)):\n",
|
||||
" print(ready_data1[i].shape)\n",
|
||||
" x1 = x1 + ready_data1[i].shape[0]\n",
|
||||
"\n",
|
||||
"print(x1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x2 = 0\n",
|
||||
"\n",
|
||||
"for i in range(len(ready_data2)):\n",
|
||||
" print(ready_data2[i].shape)\n",
|
||||
" x2 = x2 + ready_data2[i].shape[0]\n",
|
||||
"\n",
|
||||
"print(x2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x1 = ready_data1[0]\n",
|
||||
"# print(x1)\n",
|
||||
"print(type(x1))\n",
|
||||
"for i in range(len(ready_data1) - 1):\n",
|
||||
" #print(i)\n",
|
||||
" x1 = np.concatenate((x1, ready_data1[i + 1]), axis=0)\n",
|
||||
"# print(x1)\n",
|
||||
"pd.DataFrame(x1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x2 = ready_data2[0]\n",
|
||||
"\n",
|
||||
"for i in range(len(ready_data2) - 1):\n",
|
||||
" #print(i)\n",
|
||||
" x2 = np.concatenate((x2, ready_data2[i + 1]), axis=0)\n",
|
||||
"pd.DataFrame(x2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(x1.shape)\n",
|
||||
"print(x2.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_1 = [1,1,1,1]\n",
|
||||
"y_2 = [0,1,1,1]\n",
|
||||
"y_3 = [1,0,1,1]\n",
|
||||
"y_4 = [1,1,0,0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_1 = 0\n",
|
||||
"y_2 = 1\n",
|
||||
"y_3 = 2\n",
|
||||
"y_4 = 3\n",
|
||||
"y_5 = 4\n",
|
||||
"y_6 = 5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_data = [y_1, y_2, y_3, y_4, y_5, y_6]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in range(len(y_data)):\n",
|
||||
" print(ready_data1[i].shape[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in range(len(y_data)):\n",
|
||||
" y_data[i] = [y_data[i]]*ready_data1[i].shape[0]\n",
|
||||
" y_data[i] = np.array(y_data[i])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y = y_data[0]\n",
|
||||
"\n",
|
||||
"for i in range(len(y_data) - 1):\n",
|
||||
" #print(i)\n",
|
||||
" y = np.concatenate((y, y_data[i+1]), axis=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(y.shape)\n",
|
||||
"print(np.unique(y))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"x_train1, x_test1, y_train, y_test = train_test_split(x1, y, test_size=0.2, random_state=2)\n",
|
||||
"x_train2, x_test2, y_train, y_test = train_test_split(x2, y, test_size=0.2, random_state=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier, BaggingClassifier\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"from xgboost import XGBClassifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check the shapes of x_train and y_train\n",
|
||||
"print(\"Shape of x1_train:\", x_train1.shape)\n",
|
||||
"print(\"Shape of x2_train:\", x_train2.shape)\n",
|
||||
"print(\"Shape of y_train:\", y_train.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"accuracies1 = []\n",
|
||||
"accuracies2 = []\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 1. Random Forest\n",
|
||||
"rf_model = RandomForestClassifier()\n",
|
||||
"rf_model.fit(x_train1, y_train)\n",
|
||||
"rf_pred1 = rf_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, rf_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"Random Forest Accuracy for sensor 1:\", acc1)\n",
|
||||
"rf_model.fit(x_train2, y_train)\n",
|
||||
"rf_pred2 = rf_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, rf_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"Random Forest Accuracy for sensor 2:\", acc2)\n",
|
||||
"# print(rf_pred)\n",
|
||||
"# print(y_test)\n",
|
||||
"\n",
|
||||
"# 2. Bagged Trees\n",
|
||||
"bagged_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10)\n",
|
||||
"bagged_model.fit(x_train1, y_train)\n",
|
||||
"bagged_pred1 = bagged_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, bagged_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"Bagged Trees Accuracy for sensor 1:\", acc1)\n",
|
||||
"bagged_model.fit(x_train2, y_train)\n",
|
||||
"bagged_pred2 = bagged_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, bagged_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"Bagged Trees Accuracy for sensor 2:\", acc2)\n",
|
||||
"\n",
|
||||
"# 3. Decision Tree\n",
|
||||
"dt_model = DecisionTreeClassifier()\n",
|
||||
"dt_model.fit(x_train1, y_train)\n",
|
||||
"dt_pred1 = dt_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, dt_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"Decision Tree Accuracy for sensor 1:\", acc1)\n",
|
||||
"dt_model.fit(x_train2, y_train)\n",
|
||||
"dt_pred2 = dt_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, dt_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"Decision Tree Accuracy for sensor 2:\", acc2)\n",
|
||||
"\n",
|
||||
"# 4. KNeighbors\n",
|
||||
"knn_model = KNeighborsClassifier()\n",
|
||||
"knn_model.fit(x_train1, y_train)\n",
|
||||
"knn_pred1 = knn_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, knn_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"KNeighbors Accuracy for sensor 1:\", acc1)\n",
|
||||
"knn_model.fit(x_train2, y_train)\n",
|
||||
"knn_pred2 = knn_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, knn_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"KNeighbors Accuracy for sensor 2:\", acc2)\n",
|
||||
"\n",
|
||||
"# 5. Linear Discriminant Analysis\n",
|
||||
"lda_model = LinearDiscriminantAnalysis()\n",
|
||||
"lda_model.fit(x_train1, y_train)\n",
|
||||
"lda_pred1 = lda_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, lda_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"Linear Discriminant Analysis Accuracy for sensor 1:\", acc1)\n",
|
||||
"lda_model.fit(x_train2, y_train)\n",
|
||||
"lda_pred2 = lda_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, lda_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"Linear Discriminant Analysis Accuracy for sensor 2:\", acc2)\n",
|
||||
"\n",
|
||||
"# 6. Support Vector Machine\n",
|
||||
"svm_model = SVC()\n",
|
||||
"svm_model.fit(x_train1, y_train)\n",
|
||||
"svm_pred1 = svm_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, svm_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"Support Vector Machine Accuracy for sensor 1:\", acc1)\n",
|
||||
"svm_model.fit(x_train2, y_train)\n",
|
||||
"svm_pred2 = svm_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, svm_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"Support Vector Machine Accuracy for sensor 2:\", acc2)\n",
|
||||
"\n",
|
||||
"# 7. XGBoost\n",
|
||||
"xgboost_model = XGBClassifier()\n",
|
||||
"xgboost_model.fit(x_train1, y_train)\n",
|
||||
"xgboost_pred1 = xgboost_model.predict(x_test1)\n",
|
||||
"acc1 = accuracy_score(y_test, xgboost_pred1) * 100\n",
|
||||
"accuracies1.append(acc1)\n",
|
||||
"# format with color coded if acc1 > 90\n",
|
||||
"acc1 = f\"\\033[92m{acc1:.2f}\\033[00m\" if acc1 > 90 else f\"{acc1:.2f}\"\n",
|
||||
"print(\"XGBoost Accuracy:\", acc1)\n",
|
||||
"xgboost_model.fit(x_train2, y_train)\n",
|
||||
"xgboost_pred2 = xgboost_model.predict(x_test2)\n",
|
||||
"acc2 = accuracy_score(y_test, xgboost_pred2) * 100\n",
|
||||
"accuracies2.append(acc2)\n",
|
||||
"# format with color coded if acc2 > 90\n",
|
||||
"acc2 = f\"\\033[92m{acc2:.2f}\\033[00m\" if acc2 > 90 else f\"{acc2:.2f}\"\n",
|
||||
"print(\"XGBoost Accuracy:\", acc2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(accuracies1)\n",
|
||||
"print(accuracies2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"models = [rf_model, bagged_model, dt_model, knn_model, lda_model, svm_model, xgboost_model]\n",
|
||||
"model_names = [\"Random Forest\", \"Bagged Trees\", \"Decision Tree\", \"KNN\", \"LDA\", \"SVM\", \"XGBoost\"]\n",
|
||||
"\n",
|
||||
"bar_width = 0.35 # Width of each bar\n",
|
||||
"index = np.arange(len(model_names)) # Index for the bars\n",
|
||||
"\n",
|
||||
"# Plotting the bar graph\n",
|
||||
"plt.figure(figsize=(14, 8))\n",
|
||||
"\n",
|
||||
"# Bar plot for Sensor 1\n",
|
||||
"plt.bar(index, accuracies1, width=bar_width, color='blue', label='Sensor 1')\n",
|
||||
"\n",
|
||||
"# Bar plot for Sensor 2\n",
|
||||
"plt.bar(index + bar_width, accuracies2, width=bar_width, color='orange', label='Sensor 2')\n",
|
||||
"\n",
|
||||
"# Add values on top of each bar\n",
|
||||
"for i, acc1, acc2 in zip(index, accuracies1, accuracies2):\n",
|
||||
" plt.text(i, acc1 + .1, f'{acc1:.2f}%', ha='center', va='bottom', color='black')\n",
|
||||
" plt.text(i + bar_width, acc2 + 1, f'{acc2:.2f}%', ha='center', va='bottom', color='black')\n",
|
||||
"\n",
|
||||
"# Customize the plot\n",
|
||||
"plt.xlabel('Model Name →')\n",
|
||||
"plt.ylabel('Accuracy →')\n",
|
||||
"plt.title('Accuracy of classifiers for Sensors 1 and 2 with 513 features')\n",
|
||||
"plt.xticks(index + bar_width / 2, model_names) # Set x-tick positions\n",
|
||||
"plt.legend()\n",
|
||||
"plt.ylim(0, 100)\n",
|
||||
"\n",
|
||||
"# Show the plot\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def spectograph(data_dir: str):\n",
|
||||
" # print(os.listdir(data_dir))\n",
|
||||
" for damage in os.listdir(data_dir):\n",
|
||||
" # print(damage)\n",
|
||||
" d = os.path.join(data_dir, damage)\n",
|
||||
" # print(d)\n",
|
||||
" for file in os.listdir(d):\n",
|
||||
" # print(file)\n",
|
||||
" f = os.path.join(d, file)\n",
|
||||
" print(f)\n",
|
||||
" # sensor1 = pd.read_csv(f, skiprows=1, sep=';')\n",
|
||||
" # sensor2 = pd.read_csv(f, skiprows=1, sep=';')\n",
|
||||
"\n",
|
||||
" # df1 = pd.DataFrame()\n",
|
||||
"\n",
|
||||
" # df1['s1'] = sensor1[sensor1.columns[-1]]\n",
|
||||
" # df1['s2'] = sensor2[sensor2.columns[-1]]\n",
|
||||
"ed\n",
|
||||
" # # Combined Plot for sensor 1 and sensor 2 from data1 file in which motor is operated at 800 rpm\n",
|
||||
"\n",
|
||||
" # plt.plot(df1['s2'], label='sensor 2')\n",
|
||||
" # plt.plot(df1['s1'], label='sensor 1')\n",
|
||||
" # plt.xlabel(\"Number of samples\")\n",
|
||||
" # plt.ylabel(\"Amplitude\")\n",
|
||||
" # plt.title(\"Raw vibration signal\")\n",
|
||||
" # plt.legend()\n",
|
||||
" # plt.show()\n",
|
||||
"\n",
|
||||
" # from scipy import signal\n",
|
||||
" # from scipy.signal.windows import hann\n",
|
||||
"\n",
|
||||
" # vibration_data = df1['s1']\n",
|
||||
"\n",
|
||||
" # # Applying STFT\n",
|
||||
" # window_size = 1024\n",
|
||||
" # hop_size = 512\n",
|
||||
" # window = hann(window_size) # Creating a Hanning window\n",
|
||||
" # frequencies, times, Zxx = signal.stft(vibration_data, window=window, nperseg=window_size, noverlap=window_size - hop_size)\n",
|
||||
"\n",
|
||||
" # # Plotting the STFT Data\n",
|
||||
" # plt.pcolormesh(times, frequencies, np.abs(Zxx), shading='gouraud')\n",
|
||||
" # plt.title(f'STFT Magnitude for case 1 signal sensor 1 ')\n",
|
||||
" # plt.ylabel('Frequency [Hz]')\n",
|
||||
" # plt.xlabel('Time [sec]')\n",
|
||||
" # plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spectograph('D:/thesis/data/converted/raw')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
192
code/src/features/frequency_domain_features.py
Normal file
192
code/src/features/frequency_domain_features.py
Normal file
@@ -0,0 +1,192 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.fft import fft, fftfreq
|
||||
|
||||
def get_mean_freq(signal, frame_size, hop_length):
|
||||
mean = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
current_mean = np.sum(y)/frame_size
|
||||
mean.append(current_mean)
|
||||
return np.array(mean)
|
||||
|
||||
def get_variance_freq(signal, frame_size, hop_length):
|
||||
var = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
current_var = (np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)
|
||||
var.append(current_var)
|
||||
return np.array(var)
|
||||
|
||||
def get_third_freq(signal, frame_size, hop_length):
|
||||
third = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
current_third = (np.sum((y - (np.sum(y)/frame_size))**3))/(frame_size * (np.sqrt((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1)))**3)
|
||||
third.append(current_third)
|
||||
return np.array(third)
|
||||
|
||||
def get_forth_freq(signal, frame_size, hop_length):
|
||||
forth = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
current_forth = (np.sum((y - (np.sum(y)/frame_size))**4))/(frame_size * ((np.sum((y - (np.sum(y)/frame_size))**2))/(frame_size-1))**2)
|
||||
forth.append(current_forth)
|
||||
return np.array(forth)
|
||||
|
||||
def get_grand_freq(signal, frame_size, hop_length):
|
||||
grand = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_grand = np.sum(f * y)/np.sum(y)
|
||||
grand.append(current_grand)
|
||||
return np.array(grand)
|
||||
|
||||
def get_std_freq(signal, frame_size, hop_length):
|
||||
std = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_std = np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)
|
||||
std.append(current_std)
|
||||
return np.array(std)
|
||||
|
||||
def get_Cfactor_freq(signal, frame_size, hop_length):
|
||||
cfactor = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_cfactor = np.sqrt(np.sum(f**2 * y)/np.sum(y))
|
||||
cfactor.append(current_cfactor)
|
||||
return np.array(cfactor)
|
||||
|
||||
def get_Dfactor_freq(signal, frame_size, hop_length):
|
||||
dfactor = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_dfactor = np.sqrt(np.sum(f**4 * y)/np.sum(f**2 * y))
|
||||
dfactor.append(current_dfactor)
|
||||
return np.array(dfactor)
|
||||
|
||||
def get_Efactor_freq(signal, frame_size, hop_length):
|
||||
efactor = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_efactor = np.sqrt(np.sum(f**2 * y)/np.sqrt(np.sum(y) * np.sum(f**4 * y)))
|
||||
efactor.append(current_efactor)
|
||||
return np.array(efactor)
|
||||
|
||||
def get_Gfactor_freq(signal, frame_size, hop_length):
|
||||
gfactor = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_gfactor = (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))/(np.sum(f * y)/np.sum(y))
|
||||
gfactor.append(current_gfactor)
|
||||
return np.array(gfactor)
|
||||
|
||||
def get_third1_freq(signal, frame_size, hop_length):
|
||||
third1 = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_third1 = np.sum((f - (np.sum(f * y)/np.sum(y)))**3 * y)/(frame_size * (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))**3)
|
||||
third1.append(current_third1)
|
||||
return np.array(third1)
|
||||
|
||||
def get_forth1_freq(signal, frame_size, hop_length):
|
||||
forth1 = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_forth1 = np.sum((f - (np.sum(f * y)/np.sum(y)))**4 * y)/(frame_size * (np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size))**4)
|
||||
forth1.append(current_forth1)
|
||||
return np.array(forth1)
|
||||
|
||||
def get_Hfactor_freq(signal, frame_size, hop_length):
|
||||
hfactor = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_hfactor = np.sum(np.sqrt(abs(f - (np.sum(f * y)/np.sum(y)))) * y)/(frame_size * np.sqrt(np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)))
|
||||
hfactor.append(current_hfactor)
|
||||
return np.array(hfactor)
|
||||
|
||||
def get_Jfactor_freq(signal, frame_size, hop_length):
|
||||
jfactor = []
|
||||
for i in range(0, len(signal), hop_length):
|
||||
L = len(signal[i:i+frame_size])
|
||||
y = abs(np.fft.fft(signal[i:i+frame_size]/L))[:int(L/2)]
|
||||
f = np.fft.fftfreq (L,.1/25600)[:int(L/2)]
|
||||
current_jfactor = np.sum(np.sqrt(abs(f - (np.sum(f * y)/np.sum(y)))) * y)/(frame_size * np.sqrt(np.sqrt(np.sum((f-(np.sum(f * y)/np.sum(y)))**2 * y)/frame_size)))
|
||||
jfactor.append(current_jfactor)
|
||||
return np.array(jfactor)
|
||||
|
||||
class FrequencyFeatureExtractor:
|
||||
def __init__(self, data):
|
||||
# Assuming data is a numpy array
|
||||
self.x = data
|
||||
# Perform FFT and compute magnitude of frequency components
|
||||
self.frequency_spectrum = np.abs(fft(self.x))
|
||||
self.n = len(self.frequency_spectrum)
|
||||
self.mean_freq = np.mean(self.frequency_spectrum)
|
||||
self.variance_freq = np.var(self.frequency_spectrum)
|
||||
self.std_freq = np.std(self.frequency_spectrum)
|
||||
|
||||
# Calculate the required frequency features
|
||||
self.features = self.calculate_features()
|
||||
|
||||
def calculate_features(self):
|
||||
S_mu = self.mean_freq
|
||||
S_MAX = np.max(self.frequency_spectrum)
|
||||
S_SBP = np.sum(self.frequency_spectrum)
|
||||
S_Peak = np.max(self.frequency_spectrum)
|
||||
S_V = np.sum((self.frequency_spectrum - S_mu) ** 2) / (self.n - 1)
|
||||
S_Sigma = np.sqrt(S_V)
|
||||
S_Skewness = np.sum((self.frequency_spectrum - S_mu) ** 3) / (self.n * S_Sigma ** 3)
|
||||
S_Kurtosis = np.sum((self.frequency_spectrum - S_mu) ** 4) / (self.n * S_Sigma ** 4)
|
||||
S_RSPPB = S_Peak / S_mu
|
||||
|
||||
return {
|
||||
'Mean of band Power Spectrum (S_mu)': S_mu,
|
||||
'Max of band power spectrum (S_MAX)': S_MAX,
|
||||
'Sum of total band power (S_SBP)': S_SBP,
|
||||
'Peak of band power (S_Peak)': S_Peak,
|
||||
'Variance of band power (S_V)': S_V,
|
||||
'Standard Deviation of band power (S_Sigma)': S_Sigma,
|
||||
'Skewness of band power (S_Skewness)': S_Skewness,
|
||||
'Kurtosis of band power (S_Kurtosis)': S_Kurtosis,
|
||||
'Relative Spectral Peak per Band Power (S_RSPPB)': S_RSPPB
|
||||
}
|
||||
|
||||
def __repr__(self):
|
||||
result = "Frequency Domain Feature Extraction Results:\n"
|
||||
for feature, value in self.features.items():
|
||||
result += f"{feature}: {value:.4f}\n"
|
||||
return result
|
||||
|
||||
def ExtractFrequencyFeatures(object):
|
||||
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
||||
extractor = FrequencyFeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
|
||||
features = extractor.features
|
||||
return features
|
||||
|
||||
# Usage Example
|
||||
# extractor = FrequencyFeatureExtractor('path_to_your_data.csv')
|
||||
# print(extractor)
|
||||
@@ -36,9 +36,12 @@ class FeatureExtractor:
|
||||
result += f"{feature}: {value:.4f}\n"
|
||||
return result
|
||||
|
||||
def ExtractTimeFeatures(object):
|
||||
def ExtractTimeFeatures(object, absolute):
|
||||
data = pd.read_csv(object, skiprows=1) # Skip the header row separator char info
|
||||
extractor = FeatureExtractor(data.iloc[:, 1].values) # Assuming the data is in the second column
|
||||
if absolute:
|
||||
extractor = FeatureExtractor(np.abs(data.iloc[:, 1].values)) # Assuming the data is in the second column
|
||||
else:
|
||||
extractor = FeatureExtractor(data.iloc[:, 1].values)
|
||||
features = extractor.features
|
||||
return features
|
||||
# Save features to a file
|
||||
|
||||
115
code/src/process_stft.py
Normal file
115
code/src/process_stft.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.signal import stft, hann
|
||||
import glob
|
||||
import multiprocessing # Added import for multiprocessing
|
||||
|
||||
# Define the base directory where DAMAGE_X folders are located
|
||||
damage_base_path = 'D:/thesis/data/converted/raw'
|
||||
|
||||
# Define output directories for each sensor
|
||||
output_dirs = {
|
||||
'sensor1': os.path.join(damage_base_path, 'sensor1'),
|
||||
'sensor2': os.path.join(damage_base_path, 'sensor2')
|
||||
}
|
||||
|
||||
# Create output directories if they don't exist
|
||||
for dir_path in output_dirs.values():
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
# Define STFT parameters
|
||||
window_size = 1024
|
||||
hop_size = 512
|
||||
window = hann(window_size)
|
||||
Fs = 1024
|
||||
|
||||
# Number of damage cases (adjust as needed)
|
||||
num_damage_cases = 6 # Change to 30 if you have 30 damage cases
|
||||
|
||||
# Number of test runs per damage case
|
||||
num_test_runs = 5
|
||||
|
||||
# Function to perform STFT and return magnitude
|
||||
def compute_stft(vibration_data):
|
||||
frequencies, times, Zxx = stft(
|
||||
vibration_data,
|
||||
fs=Fs,
|
||||
window=window,
|
||||
nperseg=window_size,
|
||||
noverlap=window_size - hop_size
|
||||
)
|
||||
stft_magnitude = np.abs(Zxx)
|
||||
return stft_magnitude.T # Transpose to have frequencies as columns
|
||||
|
||||
def process_damage_case(damage_num):
|
||||
damage_folder = os.path.join(damage_base_path, f'DAMAGE_{damage_num}')
|
||||
|
||||
# Check if the damage folder exists
|
||||
if not os.path.isdir(damage_folder):
|
||||
print(f"Folder {damage_folder} does not exist. Skipping...")
|
||||
return
|
||||
|
||||
# Process Sensor 1 and Sensor 2 separately
|
||||
for sensor_num in [1, 2]:
|
||||
aggregated_stft = [] # List to hold STFTs from all test runs
|
||||
|
||||
# Iterate over all test runs
|
||||
for test_num in range(1, num_test_runs + 1):
|
||||
# Construct the filename based on sensor number
|
||||
# Sensor 1 corresponds to '_01', Sensor 2 corresponds to '_02'
|
||||
sensor_suffix = f'_0{sensor_num}'
|
||||
file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
|
||||
file_path = os.path.join(damage_folder, file_name)
|
||||
|
||||
# Check if the file exists
|
||||
if not os.path.isfile(file_path):
|
||||
print(f"File {file_path} does not exist. Skipping...")
|
||||
continue
|
||||
|
||||
# Read the CSV file
|
||||
try:
|
||||
df = pd.read_csv(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}. Skipping...")
|
||||
continue
|
||||
|
||||
# Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
|
||||
if df.shape[1] != 2:
|
||||
print(f"Unexpected number of columns in {file_path}. Expected 2, got {df.shape[1]}. Skipping...")
|
||||
continue
|
||||
|
||||
# Extract vibration data (assuming the second column is sensor data)
|
||||
vibration_data = df.iloc[:, 1].values
|
||||
|
||||
# Perform STFT
|
||||
stft_magnitude = compute_stft(vibration_data)
|
||||
|
||||
# Convert STFT result to DataFrame
|
||||
df_stft = pd.DataFrame(
|
||||
stft_magnitude,
|
||||
columns=[f"Freq_{freq:.2f}" for freq in np.linspace(0, Fs/2, stft_magnitude.shape[1])]
|
||||
)
|
||||
|
||||
# Append to the aggregated list
|
||||
aggregated_stft.append(df_stft)
|
||||
|
||||
# Concatenate all STFT DataFrames vertically
|
||||
if aggregated_stft:
|
||||
df_aggregated = pd.concat(aggregated_stft, ignore_index=True)
|
||||
|
||||
# Define output filename
|
||||
output_file = os.path.join(
|
||||
output_dirs[f'sensor{sensor_num}'],
|
||||
f'stft_data{sensor_num}_{damage_num}.csv'
|
||||
)
|
||||
|
||||
# Save the aggregated STFT to CSV
|
||||
df_aggregated.to_csv(output_file, index=False)
|
||||
print(f"Saved aggregated STFT for Sensor {sensor_num}, Damage {damage_num} to {output_file}")
|
||||
else:
|
||||
print(f"No STFT data aggregated for Sensor {sensor_num}, Damage {damage_num}.")
|
||||
|
||||
if __name__ == "__main__": # Added main guard for multiprocessing
|
||||
with multiprocessing.Pool() as pool:
|
||||
pool.map(process_damage_case, range(1, num_damage_cases + 1))
|
||||
133
code/src/verify_stft.py
Normal file
133
code/src/verify_stft.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.signal import stft, hann
|
||||
import glob
|
||||
|
||||
# Define the base directory where DAMAGE_X folders are located
|
||||
damage_base_path = 'D:/thesis/data/converted/raw/'
|
||||
|
||||
# Define sensor directories
|
||||
sensor_dirs = {
|
||||
'sensor1': os.path.join(damage_base_path, 'sensor1'),
|
||||
'sensor2': os.path.join(damage_base_path, 'sensor2')
|
||||
}
|
||||
|
||||
# Define STFT parameters
|
||||
window_size = 1024
|
||||
hop_size = 512
|
||||
window = hann(window_size)
|
||||
Fs = 1024
|
||||
|
||||
def verify_stft(damage_num, test_num, sensor_num):
|
||||
"""
|
||||
Verifies the STFT of an individual test run against the aggregated STFT data.
|
||||
|
||||
Parameters:
|
||||
- damage_num (int): Damage case number.
|
||||
- test_num (int): Test run number.
|
||||
- sensor_num (int): Sensor number (1 or 2).
|
||||
"""
|
||||
# Mapping sensor number to suffix
|
||||
sensor_suffix = f'_0{sensor_num}'
|
||||
|
||||
# Construct the file name for the individual test run
|
||||
individual_file_name = f'DAMAGE_{damage_num}_TEST{test_num}{sensor_suffix}.csv'
|
||||
individual_file_path = os.path.join(damage_base_path, f'DAMAGE_{damage_num}', individual_file_name)
|
||||
|
||||
# Check if the individual file exists
|
||||
if not os.path.isfile(individual_file_path):
|
||||
print(f"File {individual_file_path} does not exist. Skipping verification for this test run.")
|
||||
return
|
||||
|
||||
# Read the individual test run CSV
|
||||
try:
|
||||
df_individual = pd.read_csv(individual_file_path)
|
||||
except Exception as e:
|
||||
print(f"Error reading {individual_file_path}: {e}. Skipping verification for this test run.")
|
||||
return
|
||||
|
||||
# Ensure the CSV has exactly two columns: 'Timestamp (s)' and 'Sensor X'
|
||||
if df_individual.shape[1] != 2:
|
||||
print(f"Unexpected number of columns in {individual_file_path}. Expected 2, got {df_individual.shape[1]}. Skipping.")
|
||||
return
|
||||
|
||||
# Extract vibration data
|
||||
vibration_data = df_individual.iloc[:, 1].values
|
||||
|
||||
# Perform STFT
|
||||
frequencies, times, Zxx = stft(
|
||||
vibration_data,
|
||||
fs=Fs,
|
||||
window=window,
|
||||
nperseg=window_size,
|
||||
noverlap=window_size - hop_size
|
||||
)
|
||||
|
||||
# Compute magnitude and transpose
|
||||
stft_magnitude = np.abs(Zxx).T # Shape: (513, 513)
|
||||
|
||||
# Select random row indices to verify (e.g., 3 random rows)
|
||||
np.random.seed(42) # For reproducibility
|
||||
sample_row_indices = np.random.choice(stft_magnitude.shape[0], size=3, replace=False)
|
||||
|
||||
# Read the aggregated STFT CSV
|
||||
aggregated_file_name = f'stft_data{sensor_num}_{damage_num}.csv'
|
||||
aggregated_file_path = os.path.join(sensor_dirs[f'sensor{sensor_num}'], aggregated_file_name)
|
||||
|
||||
if not os.path.isfile(aggregated_file_path):
|
||||
print(f"Aggregated file {aggregated_file_path} does not exist. Skipping verification for this test run.")
|
||||
return
|
||||
|
||||
try:
|
||||
df_aggregated = pd.read_csv(aggregated_file_path)
|
||||
except Exception as e:
|
||||
print(f"Error reading {aggregated_file_path}: {e}. Skipping verification for this test run.")
|
||||
return
|
||||
|
||||
# Calculate the starting row index in the aggregated CSV
|
||||
# Each test run contributes 513 rows
|
||||
start_row = (test_num - 1) * 513
|
||||
end_row = start_row + 513 # Exclusive
|
||||
|
||||
# Ensure the aggregated CSV has enough rows
|
||||
if df_aggregated.shape[0] < end_row:
|
||||
print(f"Aggregated file {aggregated_file_path} does not have enough rows for Test {test_num}. Skipping.")
|
||||
return
|
||||
|
||||
# Extract the corresponding STFT block from the aggregated CSV
|
||||
df_aggregated_block = df_aggregated.iloc[start_row:end_row].values # Shape: (513, 513)
|
||||
|
||||
# Compare selected rows
|
||||
all_match = True
|
||||
for row_idx in sample_row_indices:
|
||||
individual_row = stft_magnitude[row_idx]
|
||||
aggregated_row = df_aggregated_block[row_idx]
|
||||
|
||||
# Check if the rows are almost equal within a tolerance
|
||||
if np.allclose(individual_row, aggregated_row, atol=1e-6):
|
||||
verification_status = "MATCH"
|
||||
else:
|
||||
verification_status = "MISMATCH"
|
||||
all_match = False
|
||||
|
||||
# Print the comparison details
|
||||
print(f"Comparing Damage {damage_num}, Test {test_num}, Sensor {sensor_num}, Row {row_idx}: {verification_status}")
|
||||
print(f"Individual STFT Row {row_idx}: {individual_row[:5]} ... {individual_row[-5:]}")
|
||||
print(f"Aggregated STFT Row {row_idx + start_row}: {aggregated_row[:5]} ... {aggregated_row[-5:]}\n")
|
||||
|
||||
# If all sampled rows match, print a verification success message
|
||||
if all_match:
|
||||
print(f"STFT of DAMAGE_{damage_num}_TEST{test_num}_{sensor_num}.csv is verified. On `stft_data{sensor_num}_{damage_num}.csv` start at rows {start_row} to {end_row} with 513 rows.\n")
|
||||
else:
|
||||
print(f"STFT of DAMAGE_{damage_num}_TEST{test_num}_{sensor_num}.csv has discrepancies in `stft_data{sensor_num}_{damage_num}.csv` start at rows {start_row} to {end_row} with 513 rows.\n")
|
||||
|
||||
# Define the number of damage cases and test runs
|
||||
num_damage_cases = 6 # Adjust to 30 as per your dataset
|
||||
num_test_runs = 5
|
||||
|
||||
# Iterate through all damage cases, test runs, and sensors
|
||||
for damage_num in range(1, num_damage_cases + 1):
|
||||
for test_num in range(1, num_test_runs + 1):
|
||||
for sensor_num in [1, 2]:
|
||||
verify_stft(damage_num, test_num, sensor_num)
|
||||
Reference in New Issue
Block a user