feat(data): Initialize dummy data

- Create a Python script to generate CSV files in a structured folder hierarchy under `data/processed` with specific damage levels and tests.
- Add a `.gitignore` file to exclude CSV files from Git tracking, enhancing data privacy and reducing repository size.
- Include a `README.md` in the `data` directory to clearly document the directory structure, file content, and their intended use for clarity and better usability.

Closes #7
This commit is contained in:
nuluh
2024-08-14 18:15:08 +07:00
parent feb3c85340
commit 153e8cb109
3 changed files with 99 additions and 0 deletions

41
generate_dummy_data.py Normal file
View File

@@ -0,0 +1,41 @@
import os
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
# Base path for the folder structure
base_path = "data"
# Create the folder structure
raw_path = os.path.join(base_path, "raw")
processed_path = os.path.join(base_path, "processed")
os.makedirs(raw_path, exist_ok=True)
os.makedirs(processed_path, exist_ok=True)
for damage in range(1, 6): # 5 Damage levels
damage_folder = f"DAMAGE_{damage}"
damage_path = os.path.join(processed_path, damage_folder)
os.makedirs(damage_path, exist_ok=True)
for test in range(1, 11): # 10 Tests per damage level
# Filename for the CSV
csv_filename = f"D{damage}_TEST{test}.csv"
csv_path = os.path.join(damage_path, csv_filename)
# Generate dummy data
num_rows = 10
start_time = datetime.now()
timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)]
values = np.random.randn(num_rows) # Random float values
# Create DataFrame
df = pd.DataFrame({
"Time": timestamps,
"Value": values
})
# Save the CSV file with a custom header
with open(csv_path, 'w') as file:
file.write('sep=,\n') # Writing the separator hint
df.to_csv(file, index=False)