Source code for biomechzoo.statistics.lineval
import os
import pandas as pd
import numpy as np
from typing import Any, Dict, Literal
from biomechzoo.utils.zload import zload
[docs]
def lineval(root_folder: str, channel_name: str, output_format: Literal['array', 'wide'] = 'array',
subject_level: int = 0, condition_level: int = 1) -> pd.DataFrame:
"""
Extract time-normalized ``line`` arrays from Zoo files.
This function recursively searches ``root_folder`` for ``.zoo`` files
and extracts the ``line`` field from the specified channel. Folder
levels are used to assign subject and condition labels.
Data must already be time-normalized. The function will raise
an error if inconsistent signal lengths are detected.
:param root_folder: Root directory containing data.
:type root_folder: str
:param channel_name: Name of the channel to extract.
:type channel_name: str
:param output_format: Output format.
- ``'array'``: one column containing the full array (default)
- ``'wide'``: one column per timepoint (p0, p1, ...)
:type output_format: Literal['array', 'wide']
:param subject_level: Folder index used to define subject label
(0 = first folder below root).
:type subject_level: int
:param condition_level: Folder index used to define condition label
(0 = first folder below root).
:type condition_level: int
:raises KeyError: If the specified channel or ``line`` field is missing.
:raises ValueError: If signals are not equal length (not normalized).
:raises ValueError: If invalid format is provided.
:raises IndexError: If folder depth is insufficient for specified levels.
:return: DataFrame containing extracted line data with subject,
condition, and trial references.
:rtype: pandas.DataFrame
"""
if output_format not in ['array', 'wide']:
raise ValueError("format must be 'array' or 'wide'")
results = []
line_lengths = []
for dirpath, _, files in os.walk(root_folder):
for file in files:
if not file.endswith('.zoo'):
continue
file_path = os.path.join(dirpath, file)
relative_path = os.path.relpath(file_path, root_folder)
parts = relative_path.split(os.sep)
# Remove filename from parts
folder_parts = parts[:-1]
if len(folder_parts) <= max(subject_level, condition_level):
raise IndexError(
'Folder depth is insufficient for specified '
'subject_level or condition_level.'
)
subject = folder_parts[subject_level]
condition = folder_parts[condition_level]
data = zload(file_path)
if channel_name not in data:
raise KeyError(
'Channel {} not found in {}'.format(channel_name, file_path)
)
line_array = np.asarray(data[channel_name]['line']).squeeze()
line_lengths.append(len(line_array))
base_row: Dict[str, Any] = {
'subject': subject,
'condition': condition,
'trial': file
}
if output_format == 'array':
base_row['line'] = line_array
results.append(base_row)
elif output_format == 'wide':
for i in range(len(line_array)):
base_row['p{}'.format(i)] = line_array[i]
results.append(base_row)
print('Line extracted from {}'.format(file_path))
# Strict normalization check
if len(set(line_lengths)) > 1:
raise ValueError(
'Line arrays are not equal length. '
'Data must be time-normalized before calling lineval().'
)
df = pd.DataFrame(results)
return df