refactor(data): remove redundant column extraction method and simplify dataframe loading

2025-06-14 00:57:54 +07:00
parent e7332252a6
commit 195f8143f0
1 changed files with 1 additions and 25 deletions
--- a/data/QUGS/convert.py
+++ b/data/QUGS/convert.py
@@ -73,26 +73,6 @@ class DataProcessor:
        else:
            self.data = self._load_all_data()
    def _extract_column_names(self, file_path: str) -> List[str]:
        """
        Extracts column names from the header of the given file.
        Assumes the 6th line contains column names.
        :param file_path: Path to the data file.
        :return: List of column names.
        """
        with open(file_path, "r") as f:
            header_lines = [next(f) for _ in range(12)]
        # Extract column names from the 6th line
        channel_line = header_lines[10].strip()
        tokens = re.findall(r'"([^"]+)"', channel_line)
        if not channel_line.startswith('"'):
            first_token = channel_line.split()[0]
            tokens = [first_token] + tokens
        return tokens  # Prepend 'Time' column if applicable
    def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
        """
        Loads a single data file into a pandas DataFrame.
@@ -100,11 +80,7 @@ class DataProcessor:
        :param file_path: Path to the data file.
        :return: DataFrame containing the numerical data.
        """
-        col_names = self._extract_column_names(file_path)
+        df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
        df = pd.read_csv(
            file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
        )
        df.columns = col_names
        return df
    def _load_all_data(self) -> GroupDataset: