Skip to content

formats

DynamX_v3_cluster

Source code in hdxms_datasets/stable/v020/formats.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
class DynamX_v3_cluster:
    columns = [
        "Protein",
        "Start",
        "End",
        "Sequence",
        "Modification",
        "Fragment",
        "MaxUptake",
        "MHP",
        "State",
        "Exposure",
        "File",
        "z",
        "RT",
        "Inten",
        "Center",
    ]
    state_name = "State"
    exposure_name = "Exposure"
    aggregated = False

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_dynamx_cluster(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/stable/v020/formats.py
160
161
162
163
164
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_dynamx_cluster(df)

DynamX_vx_state

There are also DynamX state data files which do not have 'Modification' and 'Fragment' columns. not sure which version this is.

Source code in hdxms_datasets/stable/v020/formats.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class DynamX_vx_state:
    """There are also DynamX state data files which do not have 'Modification' and 'Fragment' columns.
    not sure which version this is.
    """

    columns = [
        "Protein",
        "Start",
        "End",
        "Sequence",
        "MaxUptake",
        "MHP",
        "State",
        "Exposure",
        "Center",
        "Center SD",
        "Uptake",
        "Uptake SD",
        "RT",
        "RT SD",
    ]

    state_name = "State"
    exposure_name = "Exposure"
    aggregated = True

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_dynamx_state(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/stable/v020/formats.py
110
111
112
113
114
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_dynamx_state(df)

HDExaminer_v3

Source code in hdxms_datasets/stable/v020/formats.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
class HDExaminer_v3:
    columns = [
        "Protein State",
        "Deut Time",
        "Experiment",
        "Start",
        "End",
        "Sequence",
        "Charge",
        "Search RT",
        "Actual RT",
        "# Spectra",
        "Peak Width Da",
        "m/z Shift Da",
        "Max Inty",
        "Exp Cent",
        "Theor Cent",
        "Score",
        "Cent Diff",
        "# Deut",
        "Deut %",
        "Confidence",
    ]
    state_name = "Protein State"
    exposure_name = "Deut Time"
    aggregated = False

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_hdexaminer(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/stable/v020/formats.py
194
195
196
197
198
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_hdexaminer(df)

HDXFormat

Bases: Protocol

Source code in hdxms_datasets/stable/v020/formats.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class HDXFormat(Protocol):
    columns: list[str]
    state_name: str
    exposure_name: str
    # aggregated: bool = False  # whether the data is aggregated or expanded as multiple replicates

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        ...

    @property
    def aggregated(self) -> bool: ...

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/stable/v020/formats.py
53
54
55
56
57
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    ...

OpenHDXFormat dataclass

A format where columns names are standardized to a common set.

Hence OpenHDXFormat.convert() is a no-op.

Source code in hdxms_datasets/stable/v020/formats.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
@dataclass
class OpenHDXFormat:
    """A format where columns names are standardized to a common set.

    Hence OpenHDXFormat.convert() is a no-op.

    """

    columns = STANDARD_COLUMNS + OPTIONAL_COLUMNS
    state_name = "state"
    exposure_name = "exposure"
    aggregated: bool  #  = True  # whether the data is aggregated or expanded as multiple replicates

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """

        return df

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/stable/v020/formats.py
76
77
78
79
80
81
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """

    return df

identify_format(df, *, exact=True)

Identify which HDXFormat subclass the given column list matches. If there is no match, return an OpenHDXFormat instance with aggregated set to True if 'replicate' is in the columns.

Parameters:

Name Type Description Default
cols

The column names to check.

required
exact bool

If True, order must match; otherwise, uses set equality.

True

Returns:

Type Description
Optional[HDXFormat]

The matching HDXFormat subclass, or None if no match.

Source code in hdxms_datasets/stable/v020/formats.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def identify_format(df: nw.DataFrame, *, exact: bool = True) -> Optional[HDXFormat]:
    """
    Identify which HDXFormat subclass the given column list matches. If there is no match,
    return an OpenHDXFormat instance with aggregated set to True if 'replicate' is in the columns.

    Args:
        cols: The column names to check.
        exact: If True, order must match; otherwise, uses set equality.

    Returns:
        The matching HDXFormat subclass, or None if no match.
    """
    cols = df.columns
    for fmt in HDX_FORMATS:
        template = fmt.columns
        if exact and cols == template:
            return fmt
        elif not exact and set(cols) == set(template):
            return fmt

    # it there is no match, we try to return the OpenHDXFormat
    aggregated = "replicate" not in cols
    fmt = OpenHDXFormat(aggregated=aggregated)

    return fmt