2025-06-18 02:06:04 +00:00
1 changed files with 69 additions and 61 deletions
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -26,70 +26,78 @@ class DamageFilesIndices(TypedDict):
    files: List[str]


-def generate_damage_files_index(**kwargs) -> DamageFilesIndices:
+def generate_df_tuples(total_dfs=30, group_size=5, prefix="zzzAD", ext="TXT", first_col_start=1, last_col_offset=25, 
+                      special_groups=None, group=True):
    """
-    Generate a dictionary of damage scenarios with file indices.
-    :param kwargs: Keyword arguments to specify parameters.
-        - prefix: Prefix for the file names (default: "zzzAD").
-        - extension: File extension (default: ".TXT").
-        - num_damage: Number of damage scenarios.
-        - file_index_start: Starting index for file names.
-        - col: Number of files per damage scenario.
-        - base_path: Base path for the files.
-        - undamage_file: Name of the undamaged file with extension.
-    :return: A dictionary where keys are damage scenario indices and values are lists of file paths.
+    Generate a structured list of tuples containing DataFrame references and column indices.
+    
+    Parameters:
+    -----------
+    total_dfs : int, default 30
+        Total number of DataFrames to include in the tuples
+    group_size : int, default 5
+        Number of DataFrames in each group (determines the pattern repeat)
+    prefix : str, default "df"
+        Prefix for DataFrame variable names
+    first_col_start : int, default 1
+        Starting value for the first column index (1-indexed)
+    last_col_offset : int, default 25
+        Offset to add to first_col_start to get the last column index
+    special_groups : list of dict, optional
+        List of special groups to insert, each dict should contain:
+        - 'df_name': The DataFrame name to use for all tuples in this group
+        - 'position': Where to insert this group (0 for beginning)
+        - 'size': Size of this group (default: same as group_size)
+    
+    Returns:
+    --------
+    list
+        List of tuples, where each tuple contains (df_name, [first_col, last_col])
    """
+    tuples = []
+    # Add regular groups
+    for i in range(1, total_dfs + 1):
+    # for _ in range(group_size): # group tuple
+        # temporary list to hold tuples for this group
+        # list = []
+        # Calculate the position within the group (1 to group_size)
+        position_in_group = ((i - 1) % group_size) + 1
        
-    prefix: str = kwargs.get("prefix", "zzzAD")
-    extension: str = kwargs.get("extension", ".TXT")
-    num_damage: int = kwargs.get("num_damage")
-    file_index_start: int = kwargs.get("file_index_start")
-    col: int = kwargs.get("col")
-    base_path: str = kwargs.get("base_path")
-    undamage_file: str = kwargs.get("undamage_file")
+        # Calculate column indices based on position in group
+        first_col = first_col_start + position_in_group - 1
+        last_col = first_col + last_col_offset
        
-    damage_scenarios = {}
-    a = file_index_start
-    b = col + 1
-    for i in range(1, num_damage + 1):
-        damage_scenarios[i] = range(a, b)
-        a += col
-        b += col
-    # return damage_scenarios
+        # Create the tuple with DataFrame reference and column indices
+        df_name = f"{prefix}{i}.{ext}"
+        tuples.append((df_name, [first_col, last_col]))

-    x = {}
-    if undamage_file:
-        try:
-            x[0] = []
-            if base_path:
-                x[0].append(
-                    os.path.normpath(os.path.join(base_path, f"{undamage_file}"))
-                )
-            else:
-                x[0].append(f"{prefix}{undamage_file}")
-        except Exception as e:
-            print(Fore.RED + f"Error processing undamaged file: {e}")
-            sys.exit(1)
-    else:
-        print(Fore.RED + "No undamaged file specified, terminating.")
-        sys.exit(1)
+        # tuples.append(list)
+    # Add special groups at specified positions (other than beginning)
+    if special_groups:
+        for group in special_groups:
+            position = group.get('position', 0) # default value is 0 if not specified
+            if position > 0:
+                df_name = group['df_name']
+                size = group.get('size', group_size)
                
-    for damage, files in damage_scenarios.items():
-        x[damage] = []  # Initialize each key with an empty list
-        for i, file_index in enumerate(files, start=1):
-            if base_path:
-                x[damage].append(
-                    os.path.normpath(
-                        os.path.join(base_path, f"{prefix}{file_index}{extension}")
-                    )
-                )
-                # if not os.path.exists(file_path):
-                #     print(Fore.RED + f"File {file_path} does not exist.")
-                #     continue
-            else:
-                x[damage].append(f"{prefix}{file_index}{extension}")
+                # Create the special group tuples
+                special_tuples = []
+                for i in range(1, size + 1):
+                    first_col = first_col_start + i - 1
+                    last_col = first_col + last_col_offset
+                    special_tuples.append((df_name, [first_col, last_col]))
+                
+                tuples.insert(special_tuples)
+    
+    if group:
+        # Group tuples into sublists of group_size
+        grouped_tuples = []
+        for i in range(0, len(tuples), group_size):
+            grouped_tuples.append(tuples[i:i + group_size])
+        return grouped_tuples
+    
+    return tuples

-    return x

    # file_path = os.path.join(base_path, f"zzz{prefix}D{file_index}.TXT")
    # df = pd.read_csv(file_path, sep="\t", skiprows=10)  # Read with explicit column names
@@ -189,7 +197,7 @@ class DataProcessor:
        y = 0
        for data_group in self.data:  # len(data_group[i]) = 5
            for j in data_group:  # len(j[i]) =
-                c: VectorColumnIndex = []  # column vector c_{j}
+                c: VectorColumnIndex = []
                x = 0
                for _ in range(6):  # TODO: range(6) should be dynamic and parameterized
                    c.append(x + y)