Skip to content

process

convert_temperature(temperature_dict, target_unit='c')

Convenience function to convert temperature values.

Parameters:

Name Type Description Default
temperature_dict dict

Dictionary with temperature value(s) and unit.

required
target_unit str

Target unit for temperature. Must be "c", "k", "celsius", or "kelvin" and is case-insensitive.

'c'

Returns:

Type Description
Union[float, list[float]]

Converted temperature value(s).

Source code in hdxms_datasets/process.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def convert_temperature(
    temperature_dict: dict, target_unit: str = "c"
) -> Union[float, list[float]]:
    """
    Convenience function to convert temperature values.

    Args:
        temperature_dict: Dictionary with temperature value(s) and unit.
        target_unit: Target unit for temperature. Must be "c", "k", "celsius", or "kelvin" and is
            case-insensitive.

    Returns:
        Converted temperature value(s).
    """

    src_unit = temperature_dict["unit"].lower()
    temp_offset = TEMPERATURE_OFFSETS[src_unit] - TEMPERATURE_OFFSETS[target_unit.lower()]
    if values := temperature_dict.get("values"):
        return [v + temp_offset for v in values]
    elif value := temperature_dict.get("value"):
        return value + temp_offset
    else:
        raise ValueError("Invalid temperature dictionary")

convert_time(time_dict, target_unit='s')

Convenience function to convert time values.

Parameters:

Name Type Description Default
time_dict dict

Dictionary with time value(s) and unit.

required
target_unit Literal['s', 'min', 'h']

Target unit for time.

's'

Returns:

Type Description
Union[float, list[float]]

Converted time value(s).

Source code in hdxms_datasets/process.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def convert_time(
    time_dict: dict, target_unit: Literal["s", "min", "h"] = "s"
) -> Union[float, list[float]]:
    """
    Convenience function to convert time values.

    Args:
        time_dict: Dictionary with time value(s) and unit.
        target_unit: Target unit for time.

    Returns:
        Converted time value(s).
    """

    src_unit = time_dict["unit"]

    time_factor = TIME_FACTORS[src_unit] / TIME_FACTORS[target_unit]
    if values := time_dict.get("values"):
        return [v * time_factor for v in values]
    elif value := time_dict.get("value"):
        return value * time_factor
    else:
        raise ValueError("Invalid time dictionary")

filter_peptides(df, state=None, exposure=None, query=None, dropna=True, time_unit='s')

Convenience function to filter a peptides DataFrame. .

Parameters:

Name Type Description Default
df DataFrame

Input dataframe.

required
state Optional[str]

Name of protein state to select.

None
exposure Optional[dict]

Exposure value(s) to select. Exposure is given as a :obj:dict, with keys "value" or "values" for exposure value, and "unit" for the time unit.

None
query Optional[list[str]]

Additional queries to pass to pandas.DataFrame.query.

None
dropna bool

Drop rows with NaN uptake entries.

True
time_unit str

Time unit for exposure column of supplied dataframe.

's'

Examples:

Filter peptides for a specific protein state and exposure time:

>>> d = {"state", "SecB WT apo", "exposure": {"value": 0.167, "unit": "min"}
>>> filtered_df = filter_peptides(df, **d)

Returns:

Type Description
DataFrame

Filtered dataframe.

Source code in hdxms_datasets/process.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def filter_peptides(
    df: pd.DataFrame,
    state: Optional[str] = None,
    exposure: Optional[dict] = None,
    query: Optional[list[str]] = None,
    dropna: bool = True,
    time_unit: str = "s",
) -> pd.DataFrame:
    """
    Convenience function to filter a peptides DataFrame. .

    Args:
        df: Input dataframe.
        state: Name of protein state to select.
        exposure: Exposure value(s) to select. Exposure is given as a :obj:`dict`, with keys "value" or "values" for
            exposure value, and "unit" for the time unit.
        query: Additional queries to pass to [pandas.DataFrame.query][].
        dropna: Drop rows with `NaN` uptake entries.
        time_unit: Time unit for exposure column of supplied dataframe.

    Examples:
        Filter peptides for a specific protein state and exposure time:

        >>> d = {"state", "SecB WT apo", "exposure": {"value": 0.167, "unit": "min"}
        >>> filtered_df = filter_peptides(df, **d)

    Returns:
        Filtered dataframe.
    """

    if state is not None:
        df = df[df["state"] == state]

    if exposure is not None:
        t_val = convert_time(exposure, time_unit)  # type: ignore
        if isinstance(t_val, list):
            df = df[df["exposure"].isin(t_val)]
        else:
            df = df[df["exposure"] == t_val]

    if query:
        for q in query:
            df = df.query(q)

    if dropna:
        df = df.dropna(subset=["uptake"])

    return df.reset_index(drop=True)

parse_data_files(data_file_spec, data_dir)

Parse data file specifications from a YAML file.

Parameters:

Name Type Description Default
data_file_spec dict

Dictionary with data file specifications.

required
data_dir Path

Path to data directory.

required

Returns:

Type Description
dict[str, DataFile]

Dictionary with parsed data file specifications.

Source code in hdxms_datasets/process.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def parse_data_files(data_file_spec: dict, data_dir: Path) -> dict[str, DataFile]:
    """
    Parse data file specifications from a YAML file.

    Args:
        data_file_spec: Dictionary with data file specifications.
        data_dir: Path to data directory.

    Returns:
        Dictionary with parsed data file specifications.
    """

    from hdxms_datasets import DataFile

    data_files = {}
    for name, spec in data_file_spec.items():
        datafile = DataFile(
            name=name,
            filepath_or_buffer=Path(data_dir / spec["filename"]),
            **{k: v for k, v in spec.items() if k != "filename"},
        )
        data_files[name] = datafile

    return data_files