refactor(data): remove redundant column extraction method and simplify dataframe loading

This commit is contained in:
nuluh
2025-06-14 00:57:54 +07:00
parent e7332252a6
commit 195f8143f0

View File

@@ -73,26 +73,6 @@ class DataProcessor:
else: else:
self.data = self._load_all_data() self.data = self._load_all_data()
def _extract_column_names(self, file_path: str) -> List[str]:
"""
Extracts column names from the header of the given file.
Assumes the 6th line contains column names.
:param file_path: Path to the data file.
:return: List of column names.
"""
with open(file_path, "r") as f:
header_lines = [next(f) for _ in range(12)]
# Extract column names from the 6th line
channel_line = header_lines[10].strip()
tokens = re.findall(r'"([^"]+)"', channel_line)
if not channel_line.startswith('"'):
first_token = channel_line.split()[0]
tokens = [first_token] + tokens
return tokens # Prepend 'Time' column if applicable
def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario: def _load_dataframe(self, file_path: str) -> OriginalSingleDamageScenario:
""" """
Loads a single data file into a pandas DataFrame. Loads a single data file into a pandas DataFrame.
@@ -100,11 +80,7 @@ class DataProcessor:
:param file_path: Path to the data file. :param file_path: Path to the data file.
:return: DataFrame containing the numerical data. :return: DataFrame containing the numerical data.
""" """
col_names = self._extract_column_names(file_path) df = pd.read_csv(file_path, delim_whitespace=True, skiprows=10, header=0, memory_map=True)
df = pd.read_csv(
file_path, delim_whitespace=True, skiprows=11, header=None, memory_map=True
)
df.columns = col_names
return df return df
def _load_all_data(self) -> GroupDataset: def _load_all_data(self) -> GroupDataset: