feat(src): replace convert.py to src/data_preprocessing.py and fix some functions prefix parameter

This commit is contained in:
nuluh
2025-07-02 03:25:18 +07:00
parent 5ba628b678
commit 2504157b29

View File

@@ -25,18 +25,18 @@ class DamageFilesIndices(TypedDict):
damage_index: int
files: List[str]
def complement_pairs(n):
def complement_pairs(n, prefix, extension):
"""
Return the four complement tuples for zzzBD<n>.TXT
"""
filename = f"zzzAD{n}.TXT" # TODO: shouldnt be hardcoded
filename = f"{prefix}{n}.{extension}" # TODO: shouldnt be hardcoded
orig_a = (n - 1) % 5 + 1 # 1 … 5
for a in range(1, 6): # a = 1 … 5
if a != orig_a: # skip original a
yield (filename, [a, a + 25]) # use yield instead of return to return a generator of tuples
def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TXT", first_col_start=1, last_col_offset=25,
special_groups=None, group=True):
def generate_df_tuples(total_dfs, prefix, extension, first_col_start, last_col_offset,
group_size=5, special_groups=None, group=True):
"""
Generate a structured list of tuples containing DataFrame references and column indices.
@@ -46,7 +46,7 @@ def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TX
Total number of DataFrames to include in the tuples
group_size : int, default 5
Number of DataFrames in each group (determines the pattern repeat)
prefix : str, default "df"
prefix : str
Prefix for DataFrame variable names
first_col_start : int, default 1
Starting value for the first column index (1-indexed)
@@ -70,7 +70,7 @@ def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", extension="TX
group = []
for i in range(1, 6): # TODO: shouldnt be hardcoded
n = g * 5 + i
bottom_end = i # 1, 2, 3, 4, 5
bottom_end = i # 1, 2, 3, 4, 5
top_end = bottom_end + 25 # 26, 27, 28, 29, 30 # TODO: shouldnt be hardcoded
group.append((f"{prefix}{n}.{extension}", [bottom_end, top_end]))
result.append(group)