feat(data): Initialize dummy data

- Create a Python script to generate CSV files in a structured folder hierarchy under `data/processed` with specific damage levels and tests. - Add a `.gitignore` file to exclude CSV files from Git tracking, enhancing data privacy and reducing repository size. - Include a `README.md` in the `data` directory to clearly document the directory structure, file content, and their intended use for clarity and better usability. Closes #7
2024-08-14 18:15:08 +07:00
parent feb3c85340
commit 153e8cb109
3 changed files with 99 additions and 0 deletions
--- a/generate_dummy_data.py
+++ b/generate_dummy_data.py
@@ -0,0 +1,41 @@
+import os
+import pandas as pd
+from datetime import datetime, timedelta
+import numpy as np
+
+# Base path for the folder structure
+base_path = "data"
+
+# Create the folder structure
+raw_path = os.path.join(base_path, "raw")
+processed_path = os.path.join(base_path, "processed")
+
+os.makedirs(raw_path, exist_ok=True)
+os.makedirs(processed_path, exist_ok=True)
+
+for damage in range(1, 6):  # 5 Damage levels
+    damage_folder = f"DAMAGE_{damage}"
+    damage_path = os.path.join(processed_path, damage_folder)
+    os.makedirs(damage_path, exist_ok=True)
+
+    for test in range(1, 11):  # 10 Tests per damage level
+        # Filename for the CSV
+        csv_filename = f"D{damage}_TEST{test}.csv"
+        csv_path = os.path.join(damage_path, csv_filename)
+
+        # Generate dummy data
+        num_rows = 10
+        start_time = datetime.now()
+        timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)]
+        values = np.random.randn(num_rows)  # Random float values
+
+        # Create DataFrame
+        df = pd.DataFrame({
+            "Time": timestamps,
+            "Value": values
+        })
+
+        # Save the CSV file with a custom header
+        with open(csv_path, 'w') as file:
+            file.write('sep=,\n')  # Writing the separator hint
+            df.to_csv(file, index=False)