diff --git a/data/QUGS/convert.py b/data/QUGS/convert.py index 6e95377..eba7ac9 100644 --- a/data/QUGS/convert.py +++ b/data/QUGS/convert.py @@ -26,70 +26,78 @@ class DamageFilesIndices(TypedDict): files: List[str] -def generate_damage_files_index(**kwargs) -> DamageFilesIndices: +def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", ext="TXT", first_col_start=1, last_col_offset=25, + special_groups=None, group=True): """ - Generate a dictionary of damage scenarios with file indices. - :param kwargs: Keyword arguments to specify parameters. - - prefix: Prefix for the file names (default: "zzzAD"). - - extension: File extension (default: ".TXT"). - - num_damage: Number of damage scenarios. - - file_index_start: Starting index for file names. - - col: Number of files per damage scenario. - - base_path: Base path for the files. - - undamage_file: Name of the undamaged file with extension. - :return: A dictionary where keys are damage scenario indices and values are lists of file paths. + Generate a structured list of tuples containing DataFrame references and column indices. + + Parameters: + ----------- + total_dfs : int, default 30 + Total number of DataFrames to include in the tuples + group_size : int, default 5 + Number of DataFrames in each group (determines the pattern repeat) + prefix : str, default "df" + Prefix for DataFrame variable names + first_col_start : int, default 1 + Starting value for the first column index (1-indexed) + last_col_offset : int, default 25 + Offset to add to first_col_start to get the last column index + special_groups : list of dict, optional + List of special groups to insert, each dict should contain: + - 'df_name': The DataFrame name to use for all tuples in this group + - 'position': Where to insert this group (0 for beginning) + - 'size': Size of this group (default: same as group_size) + + Returns: + -------- + list + List of tuples, where each tuple contains (df_name, [first_col, last_col]) """ + tuples = [] + # Add regular groups + for i in range(1, total_dfs + 1): + # for _ in range(group_size): # group tuple + # temporary list to hold tuples for this group + # list = [] + # Calculate the position within the group (1 to group_size) + position_in_group = ((i - 1) % group_size) + 1 + + # Calculate column indices based on position in group + first_col = first_col_start + position_in_group - 1 + last_col = first_col + last_col_offset + + # Create the tuple with DataFrame reference and column indices + df_name = f"{prefix}{i}.{ext}" + tuples.append((df_name, [first_col, last_col])) - prefix: str = kwargs.get("prefix", "zzzAD") - extension: str = kwargs.get("extension", ".TXT") - num_damage: int = kwargs.get("num_damage") - file_index_start: int = kwargs.get("file_index_start") - col: int = kwargs.get("col") - base_path: str = kwargs.get("base_path") - undamage_file: str = kwargs.get("undamage_file") + # tuples.append(list) + # Add special groups at specified positions (other than beginning) + if special_groups: + for group in special_groups: + position = group.get('position', 0) # default value is 0 if not specified + if position > 0: + df_name = group['df_name'] + size = group.get('size', group_size) + + # Create the special group tuples + special_tuples = [] + for i in range(1, size + 1): + first_col = first_col_start + i - 1 + last_col = first_col + last_col_offset + special_tuples.append((df_name, [first_col, last_col])) + + tuples.insert(special_tuples) + + if group: + # Group tuples into sublists of group_size + grouped_tuples = [] + for i in range(0, len(tuples), group_size): + grouped_tuples.append(tuples[i:i + group_size]) + return grouped_tuples + + return tuples - damage_scenarios = {} - a = file_index_start - b = col + 1 - for i in range(1, num_damage + 1): - damage_scenarios[i] = range(a, b) - a += col - b += col - # return damage_scenarios - - x = {} - if undamage_file: - try: - x[0] = [] - if base_path: - x[0].append( - os.path.normpath(os.path.join(base_path, f"{undamage_file}")) - ) - else: - x[0].append(f"{prefix}{undamage_file}") - except Exception as e: - print(Fore.RED + f"Error processing undamaged file: {e}") - sys.exit(1) - else: - print(Fore.RED + "No undamaged file specified, terminating.") - sys.exit(1) - - for damage, files in damage_scenarios.items(): - x[damage] = [] # Initialize each key with an empty list - for i, file_index in enumerate(files, start=1): - if base_path: - x[damage].append( - os.path.normpath( - os.path.join(base_path, f"{prefix}{file_index}{extension}") - ) - ) - # if not os.path.exists(file_path): - # print(Fore.RED + f"File {file_path} does not exist.") - # continue - else: - x[damage].append(f"{prefix}{file_index}{extension}") - - return x # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT") # df = pd.read_csv(file_path, sep="\t", skiprows=10) # Read with explicit column names @@ -189,7 +197,7 @@ class DataProcessor: y = 0 for data_group in self.data: # len(data_group[i]) = 5 for j in data_group: # len(j[i]) = - c: VectorColumnIndex = [] # column vector c_{j} + c: VectorColumnIndex = [] x = 0 for _ in range(6): # TODO: range(6) should be dynamic and parameterized c.append(x + y)