diff --git a/.gitignore b/.gitignore index e69de29..4c6df9e 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,2 @@ +# Ignore CSV files in the data directory and all its subdirectories +data/**/*.csv diff --git a/data/processed/README.md b/data/processed/README.md new file mode 100644 index 0000000..ceabcd8 --- /dev/null +++ b/data/processed/README.md @@ -0,0 +1,56 @@ +# Processed Data Directory + +## Overview + +This `data/processed` directory contains structured data that has been processed and formatted for analysis. Each subdirectory within `processed` represents a different level of simulated damage, and each contains multiple test files from experiments conducted under that specific damage scenario. + +## Directory Structure + +The directory is organized as follows: + +``` +data +└── processed +├── DAMAGE_1 +│ ├── D1_TEST1.csv +│ ├── D1_TEST2.csv +│ ... +│ └── D1_TEST10.csv +├── DAMAGE_2 +│ ├── D2_TEST1.csv +│ ... +├── DAMAGE_3 +│ ... +├── DAMAGE_4 +│ ... +└── DAMAGE_5 +``` + +Each subdirectory (`DAMAGE_1`, `DAMAGE_2`, etc.) represents a specific damage scenario. Inside each damage directory, there are CSV files named in the format `Dx_TESTy.csv` where `x` is the damage level and `y` is the test number. There are 10 tests for each damage level. + +## File Description + +Each `.csv` file contains the following columns: + +- **Time**: Timestamp of the measurement in the format `MM/DD/YY HH:MM:SS.sssssssss`. +- **Value**: A float value representing the measurement taken at the corresponding timestamp. + +The files begin with a custom header indicating the separator used (`sep=,`), ensuring compatibility with various CSV parsers that support separator hints. + +## Purpose + +The processed data is intended for use in analytical models that assess structural integrity under various simulated damage scenarios. The tests vary by parameters such as the intensity and type of applied stress, allowing for comprehensive analysis across different conditions. + +## Usage + +To use these files in your analysis: + +1. Ensure your CSV parser can interpret the custom header. +2. Load the data from the desired `Dx_TESTy.csv` file. +3. Analyze the data according to your specific needs (e.g., regression analysis, classification). + +The data can be directly imported into most data analysis software packages or programming environments that support CSV file input. + +## Note + +Please do not manually edit the CSV files to ensure data integrity. If processing adjustments are necessary, consider scripting the changes and documenting any transformations applied. diff --git a/generate_dummy_data.py b/generate_dummy_data.py new file mode 100644 index 0000000..a24bbc8 --- /dev/null +++ b/generate_dummy_data.py @@ -0,0 +1,41 @@ +import os +import pandas as pd +from datetime import datetime, timedelta +import numpy as np + +# Base path for the folder structure +base_path = "data" + +# Create the folder structure +raw_path = os.path.join(base_path, "raw") +processed_path = os.path.join(base_path, "processed") + +os.makedirs(raw_path, exist_ok=True) +os.makedirs(processed_path, exist_ok=True) + +for damage in range(1, 6): # 5 Damage levels + damage_folder = f"DAMAGE_{damage}" + damage_path = os.path.join(processed_path, damage_folder) + os.makedirs(damage_path, exist_ok=True) + + for test in range(1, 11): # 10 Tests per damage level + # Filename for the CSV + csv_filename = f"D{damage}_TEST{test}.csv" + csv_path = os.path.join(damage_path, csv_filename) + + # Generate dummy data + num_rows = 10 + start_time = datetime.now() + timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)] + values = np.random.randn(num_rows) # Random float values + + # Create DataFrame + df = pd.DataFrame({ + "Time": timestamps, + "Value": values + }) + + # Save the CSV file with a custom header + with open(csv_path, 'w') as file: + file.write('sep=,\n') # Writing the separator hint + df.to_csv(file, index=False)