Skip to content

convert

convert_rt(rt_str)

Convert HDExaminer retention time string to float
example: "7.44-7.65" -> 7.545

Lossy conversion

This conversion loses information. The full range is not preserved. This was done such that
retention time can be stored as float and thus be aggregated.
Future versions may store the full range with additional rt_min and rt_max columns.

Source code in hdxms_datasets/convert.py
67
68
69
70
71
72
73
74
75
76
77
78
79
def convert_rt(rt_str: str) -> float:
    """Convert HDExaminer retention time string to float
    example: "7.44-7.65" -> 7.545

    !!! warning "Lossy conversion"
        This conversion loses information. The full range is not preserved. This was done such that
        retention time can be stored as float and thus be aggregated.
        Future versions may store the full range with additional `rt_min` and `rt_max` columns.

    """
    vmin, vmax = rt_str.split("-")
    mean = (float(vmin) + float(vmax)) / 2.0
    return mean

from_dynamx_cluster(dynamx_df)

Convert a DynamX cluster DataFrame to OpenHDX format.

Source code in hdxms_datasets/convert.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def from_dynamx_cluster(dynamx_df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert a DynamX cluster DataFrame to OpenHDX format.
    """
    column_mapping = {
        "State": "state",
        "Exposure": "exposure",
        "Start": "start",
        "End": "end",
        "Sequence": "sequence",
        "File": "replicate",
        "z": "charge",
        "Center": "centroid_mz",
        "Inten": "intensity",
        "RT": "rt",
    }

    column_order = list(column_mapping.values())
    column_order.insert(column_order.index("charge") + 1, "centroid_mass")

    df = (
        dynamx_df.rename(column_mapping)
        .with_columns([centroid_mass, nw.col("exposure") * 60.0])
        .select(column_order)
        .sort(by=["state", "exposure", "start", "end", "replicate"])
    )

    return df

from_dynamx_state(dynamx_df)

Convert a DynamX state DataFrame to OpenHDX format.

Source code in hdxms_datasets/convert.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def from_dynamx_state(dynamx_df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert a DynamX state DataFrame to OpenHDX format.
    """
    column_mapping = {
        "State": "state",
        "Exposure": "exposure",
        "Start": "start",
        "End": "end",
        "Sequence": "sequence",
        "Uptake": "uptake",
        "Uptake SD": "uptake_sd",
        "Center": "centroid_mz",
        "RT": "rt",
        "RT SD": "rt_sd",
    }

    column_order = list(column_mapping.values())

    df = (
        dynamx_df.rename(column_mapping)
        .with_columns([nw.col("exposure") * 60.0])
        .select(column_order)
        .sort(by=["state", "exposure", "start", "end"])
    )

    return df

from_hdexaminer(hd_examiner_df, extra_columns=None)

Convert an HDExaminer DataFrame to OpenHDX format.

Parameters:

Name Type Description Default
hd_examiner_df DataFrame

DataFrame in HDExaminer format.

required
extra_columns list[str] | dict[str, str] | str | None

Additional columns to include, either as a list/str of column name(s)
or a dictionary mapping original column names to new names.

None

Returns:

Type Description
DataFrame

A DataFrame in OpenHDX format.

Source code in hdxms_datasets/convert.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def from_hdexaminer(
    hd_examiner_df: nw.DataFrame,
    extra_columns: list[str] | dict[str, str] | str | None = None,
) -> nw.DataFrame:
    """
    Convert an HDExaminer DataFrame to OpenHDX format.

    Args:
        hd_examiner_df: DataFrame in HDExaminer format.
        extra_columns: Additional columns to include, either as a list/str of column name(s)
                       or a dictionary mapping original column names to new names.

    Returns:
        A DataFrame in OpenHDX format.

    """
    from hdxms_datasets.loader import BACKEND

    column_mapping = {
        "Protein State": "state",
        "Deut Time": "exposure",
        "Start": "start",
        "End": "end",
        "Sequence": "sequence",
        "Experiment": "replicate",
        "Charge": "charge",
        "Exp Cent": "centroid_mz",
        "Max Inty": "intensity",
    }

    column_order = list(column_mapping.values())
    column_order.insert(column_order.index("charge") + 1, "centroid_mass")
    column_order.append("rt")

    cols = _fmt_extra_columns(extra_columns)

    column_mapping.update(cols)
    column_order.extend(cols.values())

    rt_values = [convert_rt(rt_str) for rt_str in hd_examiner_df["Actual RT"]]
    rt_series = nw.new_series(values=rt_values, name="rt", backend=BACKEND)

    df = (
        hd_examiner_df.rename(column_mapping)
        .with_columns([centroid_mass, rt_series])
        .select(column_order)
        .sort(by=["state", "exposure", "start", "end", "replicate"])
    )

    return cast_exposure(df)

from_hxms(hxms_df, extra_columns='sequence')

Convert an HXMS DataFrame to OpenHDX format.

Parameters:

Name Type Description Default
hxms_df DataFrame

DataFrame in HXMS format.

required
extra_columns list[str] | dict[str, str] | str | None

Additional columns to include, either as a list/str of column name(s)
or a dictionary mapping original column names to new names.

'sequence'

Returns:

Type Description
DataFrame

A DataFrame in OpenHDX format.

Source code in hdxms_datasets/convert.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def from_hxms(
    hxms_df: nw.DataFrame,
    extra_columns: list[str] | dict[str, str] | str | None = "sequence",
) -> nw.DataFrame:
    """
    Convert an HXMS DataFrame to OpenHDX format.

    Args:
        hxms_df: DataFrame in HXMS format.
        extra_columns: Additional columns to include, either as a list/str of column name(s)
            or a dictionary mapping original column names to new names.

    Returns:
        A DataFrame in OpenHDX format.

    """

    column_mapping = {
        "START": "start",
        "END": "end",
        "REP": "replicate",
        "TIME(Sec)": "exposure",
        "UPTAKE": "uptake",
    }

    column_order = list(column_mapping.values())
    cols = _fmt_extra_columns(extra_columns)
    column_mapping.update(cols)
    column_order.extend(cols.values())

    df = hxms_df.rename(column_mapping)
    df = df.select(column_order).sort(by=["exposure", "start", "end", "replicate"])

    return df