- Create a Python script to generate CSV files in a structured folder hierarchy under `data/processed` with specific damage levels and tests. - Add a `.gitignore` file to exclude CSV files from Git tracking, enhancing data privacy and reducing repository size. - Include a `README.md` in the `data` directory to clearly document the directory structure, file content, and their intended use for clarity and better usability. Closes #7
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
import os
|
|
import pandas as pd
|
|
from datetime import datetime, timedelta
|
|
import numpy as np
|
|
|
|
# Base path for the folder structure
|
|
base_path = "data"
|
|
|
|
# Create the folder structure
|
|
raw_path = os.path.join(base_path, "raw")
|
|
processed_path = os.path.join(base_path, "processed")
|
|
|
|
os.makedirs(raw_path, exist_ok=True)
|
|
os.makedirs(processed_path, exist_ok=True)
|
|
|
|
for damage in range(1, 6): # 5 Damage levels
|
|
damage_folder = f"DAMAGE_{damage}"
|
|
damage_path = os.path.join(processed_path, damage_folder)
|
|
os.makedirs(damage_path, exist_ok=True)
|
|
|
|
for test in range(1, 11): # 10 Tests per damage level
|
|
# Filename for the CSV
|
|
csv_filename = f"D{damage}_TEST{test}.csv"
|
|
csv_path = os.path.join(damage_path, csv_filename)
|
|
|
|
# Generate dummy data
|
|
num_rows = 10
|
|
start_time = datetime.now()
|
|
timestamps = [start_time + timedelta(seconds=i*0.0078125) for i in range(num_rows)]
|
|
values = np.random.randn(num_rows) # Random float values
|
|
|
|
# Create DataFrame
|
|
df = pd.DataFrame({
|
|
"Time": timestamps,
|
|
"Value": values
|
|
})
|
|
|
|
# Save the CSV file with a custom header
|
|
with open(csv_path, 'w') as file:
|
|
file.write('sep=,\n') # Writing the separator hint
|
|
df.to_csv(file, index=False)
|