feat(data): Propose new damage file index generation to improve structure and flexibility in DataFrame handling
This commit is contained in:
@@ -26,70 +26,78 @@ class DamageFilesIndices(TypedDict):
|
|||||||
files: List[str]
|
files: List[str]
|
||||||
|
|
||||||
|
|
||||||
def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
|
def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", ext="TXT", first_col_start=1, last_col_offset=25,
|
||||||
|
special_groups=None, group=True):
|
||||||
"""
|
"""
|
||||||
Generate a dictionary of damage scenarios with file indices.
|
Generate a structured list of tuples containing DataFrame references and column indices.
|
||||||
:param kwargs: Keyword arguments to specify parameters.
|
|
||||||
- prefix: Prefix for the file names (default: "zzzAD").
|
Parameters:
|
||||||
- extension: File extension (default: ".TXT").
|
-----------
|
||||||
- num_damage: Number of damage scenarios.
|
total_dfs : int, default 30
|
||||||
- file_index_start: Starting index for file names.
|
Total number of DataFrames to include in the tuples
|
||||||
- col: Number of files per damage scenario.
|
group_size : int, default 5
|
||||||
- base_path: Base path for the files.
|
Number of DataFrames in each group (determines the pattern repeat)
|
||||||
- undamage_file: Name of the undamaged file with extension.
|
prefix : str, default "df"
|
||||||
:return: A dictionary where keys are damage scenario indices and values are lists of file paths.
|
Prefix for DataFrame variable names
|
||||||
|
first_col_start : int, default 1
|
||||||
|
Starting value for the first column index (1-indexed)
|
||||||
|
last_col_offset : int, default 25
|
||||||
|
Offset to add to first_col_start to get the last column index
|
||||||
|
special_groups : list of dict, optional
|
||||||
|
List of special groups to insert, each dict should contain:
|
||||||
|
- 'df_name': The DataFrame name to use for all tuples in this group
|
||||||
|
- 'position': Where to insert this group (0 for beginning)
|
||||||
|
- 'size': Size of this group (default: same as group_size)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
list
|
||||||
|
List of tuples, where each tuple contains (df_name, [first_col, last_col])
|
||||||
"""
|
"""
|
||||||
|
tuples = []
|
||||||
|
# Add regular groups
|
||||||
|
for i in range(1, total_dfs + 1):
|
||||||
|
# for _ in range(group_size): # group tuple
|
||||||
|
# temporary list to hold tuples for this group
|
||||||
|
# list = []
|
||||||
|
# Calculate the position within the group (1 to group_size)
|
||||||
|
position_in_group = ((i - 1) % group_size) + 1
|
||||||
|
|
||||||
|
# Calculate column indices based on position in group
|
||||||
|
first_col = first_col_start + position_in_group - 1
|
||||||
|
last_col = first_col + last_col_offset
|
||||||
|
|
||||||
|
# Create the tuple with DataFrame reference and column indices
|
||||||
|
df_name = f"{prefix}{i}.{ext}"
|
||||||
|
tuples.append((df_name, [first_col, last_col]))
|
||||||
|
|
||||||
prefix: str = kwargs.get("prefix", "zzzAD")
|
# tuples.append(list)
|
||||||
extension: str = kwargs.get("extension", ".TXT")
|
# Add special groups at specified positions (other than beginning)
|
||||||
num_damage: int = kwargs.get("num_damage")
|
if special_groups:
|
||||||
file_index_start: int = kwargs.get("file_index_start")
|
for group in special_groups:
|
||||||
col: int = kwargs.get("col")
|
position = group.get('position', 0) # default value is 0 if not specified
|
||||||
base_path: str = kwargs.get("base_path")
|
if position > 0:
|
||||||
undamage_file: str = kwargs.get("undamage_file")
|
df_name = group['df_name']
|
||||||
|
size = group.get('size', group_size)
|
||||||
|
|
||||||
|
# Create the special group tuples
|
||||||
|
special_tuples = []
|
||||||
|
for i in range(1, size + 1):
|
||||||
|
first_col = first_col_start + i - 1
|
||||||
|
last_col = first_col + last_col_offset
|
||||||
|
special_tuples.append((df_name, [first_col, last_col]))
|
||||||
|
|
||||||
|
tuples.insert(special_tuples)
|
||||||
|
|
||||||
|
if group:
|
||||||
|
# Group tuples into sublists of group_size
|
||||||
|
grouped_tuples = []
|
||||||
|
for i in range(0, len(tuples), group_size):
|
||||||
|
grouped_tuples.append(tuples[i:i + group_size])
|
||||||
|
return grouped_tuples
|
||||||
|
|
||||||
|
return tuples
|
||||||
|
|
||||||
damage_scenarios = {}
|
|
||||||
a = file_index_start
|
|
||||||
b = col + 1
|
|
||||||
for i in range(1, num_damage + 1):
|
|
||||||
damage_scenarios[i] = range(a, b)
|
|
||||||
a += col
|
|
||||||
b += col
|
|
||||||
# return damage_scenarios
|
|
||||||
|
|
||||||
x = {}
|
|
||||||
if undamage_file:
|
|
||||||
try:
|
|
||||||
x[0] = []
|
|
||||||
if base_path:
|
|
||||||
x[0].append(
|
|
||||||
os.path.normpath(os.path.join(base_path, f"{undamage_file}"))
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
x[0].append(f"{prefix}{undamage_file}")
|
|
||||||
except Exception as e:
|
|
||||||
print(Fore.RED + f"Error processing undamaged file: {e}")
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
print(Fore.RED + "No undamaged file specified, terminating.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
for damage, files in damage_scenarios.items():
|
|
||||||
x[damage] = [] # Initialize each key with an empty list
|
|
||||||
for i, file_index in enumerate(files, start=1):
|
|
||||||
if base_path:
|
|
||||||
x[damage].append(
|
|
||||||
os.path.normpath(
|
|
||||||
os.path.join(base_path, f"{prefix}{file_index}{extension}")
|
|
||||||
)
|
|
||||||
)
|
|
||||||
# if not os.path.exists(file_path):
|
|
||||||
# print(Fore.RED + f"File {file_path} does not exist.")
|
|
||||||
# continue
|
|
||||||
else:
|
|
||||||
x[damage].append(f"{prefix}{file_index}{extension}")
|
|
||||||
|
|
||||||
return x
|
|
||||||
|
|
||||||
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
|
# file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
|
||||||
# df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names
|
# df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names
|
||||||
@@ -189,7 +197,7 @@ class DataProcessor:
|
|||||||
y = 0
|
y = 0
|
||||||
for data_group in self.data: # len(data_group[i]) = 5
|
for data_group in self.data: # len(data_group[i]) = 5
|
||||||
for j in data_group: # len(j[i]) =
|
for j in data_group: # len(j[i]) =
|
||||||
c: VectorColumnIndex = [] # column vector c_{j}
|
c: VectorColumnIndex = []
|
||||||
x = 0
|
x = 0
|
||||||
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
|
for _ in range(6): # TODO: range(6) should be dynamic and parameterized
|
||||||
c.append(x + y)
|
c.append(x + y)
|
||||||
|
|||||||
Reference in New Issue
Block a user