feat(data): Propose new damage file index generation to improve structure and flexibility in DataFrame handling

This commit is contained in:
nuluh
2025-06-16 03:13:07 +07:00
parent 3e652accfb
commit 60ff4e0fa9

View File

@@ -26,70 +26,78 @@ class DamageFilesIndices(TypedDict):
files: List[str] files: List[str]
def generate_damage_files_index(**kwargs) -> DamageFilesIndices: def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", ext="TXT", first_col_start=1, last_col_offset=25,
special_groups=None, group=True):
""" """
Generate a dictionary of damage scenarios with file indices. Generate a structured list of tuples containing DataFrame references and column indices.
:param kwargs: Keyword arguments to specify parameters.
- prefix: Prefix for the file names (default: "zzzAD"). Parameters:
- extension: File extension (default: ".TXT"). -----------
- num_damage: Number of damage scenarios. total_dfs : int, default 30
- file_index_start: Starting index for file names. Total number of DataFrames to include in the tuples
- col: Number of files per damage scenario. group_size : int, default 5
- base_path: Base path for the files. Number of DataFrames in each group (determines the pattern repeat)
- undamage_file: Name of the undamaged file with extension. prefix : str, default "df"
:return: A dictionary where keys are damage scenario indices and values are lists of file paths. Prefix for DataFrame variable names
first_col_start : int, default 1
Starting value for the first column index (1-indexed)
last_col_offset : int, default 25
Offset to add to first_col_start to get the last column index
special_groups : list of dict, optional
List of special groups to insert, each dict should contain:
- 'df_name': The DataFrame name to use for all tuples in this group
- 'position': Where to insert this group (0 for beginning)
- 'size': Size of this group (default: same as group_size)
Returns:
--------
list
List of tuples, where each tuple contains (df_name, [first_col, last_col])
""" """
tuples = []
# Add regular groups
for i in range(1, total_dfs + 1):
# for _ in range(group_size): # group tuple
# temporary list to hold tuples for this group
# list = []
# Calculate the position within the group (1 to group_size)
position_in_group = ((i - 1) % group_size) + 1
# Calculate column indices based on position in group
first_col = first_col_start + position_in_group - 1
last_col = first_col + last_col_offset
# Create the tuple with DataFrame reference and column indices
df_name = f"{prefix}{i}.{ext}"
tuples.append((df_name, [first_col, last_col]))
prefix: str = kwargs.get("prefix", "zzzAD") # tuples.append(list)
extension: str = kwargs.get("extension", ".TXT") # Add special groups at specified positions (other than beginning)
num_damage: int = kwargs.get("num_damage") if special_groups:
file_index_start: int = kwargs.get("file_index_start") for group in special_groups:
col: int = kwargs.get("col") position = group.get('position', 0) # default value is 0 if not specified
base_path: str = kwargs.get("base_path") if position > 0:
undamage_file: str = kwargs.get("undamage_file") df_name = group['df_name']
size = group.get('size', group_size)
# Create the special group tuples
special_tuples = []
for i in range(1, size + 1):
first_col = first_col_start + i - 1
last_col = first_col + last_col_offset
special_tuples.append((df_name, [first_col, last_col]))
tuples.insert(special_tuples)
if group:
# Group tuples into sublists of group_size
grouped_tuples = []
for i in range(0, len(tuples), group_size):
grouped_tuples.append(tuples[i:i + group_size])
return grouped_tuples
return tuples
damage_scenarios = {}
a = file_index_start
b = col + 1
for i in range(1, num_damage + 1):
damage_scenarios[i] = range(a, b)
a += col
b += col
# return damage_scenarios
x = {}
if undamage_file:
try:
x[0] = []
if base_path:
x[0].append(
os.path.normpath(os.path.join(base_path, f"{undamage_file}"))
)
else:
x[0].append(f"{prefix}{undamage_file}")
except Exception as e:
print(Fore.RED + f"Error processing undamaged file: {e}")
sys.exit(1)
else:
print(Fore.RED + "No undamaged file specified, terminating.")
sys.exit(1)
for damage, files in damage_scenarios.items():
x[damage] = [] # Initialize each key with an empty list
for i, file_index in enumerate(files, start=1):
if base_path:
x[damage].append(
os.path.normpath(
os.path.join(base_path, f"{prefix}{file_index}{extension}")
)
)
# if not os.path.exists(file_path):
# print(Fore.RED + f"File {file_path} does not exist.")
# continue
else:
x[damage].append(f"{prefix}{file_index}{extension}")
return x
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT") # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
# df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names # df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names
@@ -189,7 +197,7 @@ class DataProcessor:
y = 0 y = 0
for data_group in self.data: # len(data_group[i]) = 5 for data_group in self.data: # len(data_group[i]) = 5
for j in data_group: # len(j[i]) = for j in data_group: # len(j[i]) =
c: VectorColumnIndex = [] # column vector c_{j} c: VectorColumnIndex = []
x = 0 x = 0
for _ in range(6): # TODO: range(6) should be dynamic and parameterized for _ in range(6): # TODO: range(6) should be dynamic and parameterized
c.append(x + y) c.append(x + y)