Files
thesis/generate_dummy_data.py
nuluh 153e8cb109 feat(data): Initialize dummy data
- Create a Python script to generate CSV files in a structured folder hierarchy under `data/processed` with specific damage levels and tests.
- Add a `.gitignore` file to exclude CSV files from Git tracking, enhancing data privacy and reducing repository size.
- Include a `README.md` in the `data` directory to clearly document the directory structure, file content, and their intended use for clarity and better usability.

Closes #7
2024-08-14 23:26:06 +07:00

42 lines
1.3 KiB
Python

import os
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
# Base path for the folder structure
base_path = "data"
# Create the folder structure
raw_path = os.path.join(base_path, "raw")
processed_path = os.path.join(base_path, "processed")
os.makedirs(raw_path, exist_ok=True)
os.makedirs(processed_path, exist_ok=True)
for damage in range(1, 6): # 5 Damage levels
damage_folder = f"DAMAGE_{damage}"
damage_path = os.path.join(processed_path, damage_folder)
os.makedirs(damage_path, exist_ok=True)
for test in range(1, 11): # 10 Tests per damage level
# Filename for the CSV
csv_filename = f"D{damage}_TEST{test}.csv"
csv_path = os.path.join(damage_path, csv_filename)
# Generate dummy data
num_rows = 10
start_time = datetime.now()
timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)]
values = np.random.randn(num_rows) # Random float values
# Create DataFrame
df = pd.DataFrame({
"Time": timestamps,
"Value": values
})
# Save the CSV file with a custom header
with open(csv_path, 'w') as file:
file.write('sep=,\n') # Writing the separator hint
df.to_csv(file, index=False)