Skip to content

formats

DynamX_v3_cluster

Source code in hdxms_datasets/formats.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class DynamX_v3_cluster:
    columns = [
        "Protein",
        "Start",
        "End",
        "Sequence",
        "Modification",
        "Fragment",
        "MaxUptake",
        "MHP",
        "State",
        "Exposure",
        "File",
        "z",
        "RT",
        "Inten",
        "Center",
    ]
    state_name = "State"
    exposure_name = "Exposure"
    aggregated = False

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_dynamx_cluster(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
129
130
131
132
133
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_dynamx_cluster(df)

DynamX_vx_state

There are also DynamX state data files which do not have 'Modification' and 'Fragment' columns. not sure which version this is.

Source code in hdxms_datasets/formats.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class DynamX_vx_state:
    """There are also DynamX state data files which do not have 'Modification' and 'Fragment' columns.
    not sure which version this is.
    """

    columns = [
        "Protein",
        "Start",
        "End",
        "Sequence",
        "MaxUptake",
        "MHP",
        "State",
        "Exposure",
        "Center",
        "Center SD",
        "Uptake",
        "Uptake SD",
        "RT",
        "RT SD",
    ]

    state_name = "State"
    exposure_name = "Exposure"
    aggregated = True

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_dynamx_state(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
79
80
81
82
83
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_dynamx_state(df)

HDExaminer_v3

Source code in hdxms_datasets/formats.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
class HDExaminer_v3:
    columns = [
        "Protein State",
        "Deut Time",
        "Experiment",
        "Start",
        "End",
        "Sequence",
        "Charge",
        "Search RT",
        "Actual RT",
        "# Spectra",
        "Peak Width Da",
        "m/z Shift Da",
        "Max Inty",
        "Exp Cent",
        "Theor Cent",
        "Score",
        "Cent Diff",
        "# Deut",
        "Deut %",
        "Confidence",
    ]
    state_name = "Protein State"
    exposure_name = "Deut Time"
    aggregated = False

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_hdexaminer(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
163
164
165
166
167
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_hdexaminer(df)

HDXFormat

Bases: Protocol

Source code in hdxms_datasets/formats.py
40
41
42
43
44
45
46
47
48
49
50
class HDXFormat(Protocol):
    columns: list[str]
    state_name: str
    exposure_name: str
    aggregated: bool = False  # whether the data is aggregated or expanded as multiple replicates

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        ...

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
46
47
48
49
50
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    ...

identify_format(cols, *, exact=True)

Identify which HDXFormat subclass the given column list matches.

Parameters:

Name Type Description Default
cols list[str]

The column names to check.

required
exact bool

If True, order must match; otherwise, uses set equality.

True

Returns:

Type Description
Optional[HDXFormat]

The matching HDXFormat subclass, or None if no match.

Source code in hdxms_datasets/formats.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def identify_format(cols: list[str], *, exact: bool = True) -> Optional[HDXFormat]:
    """
    Identify which HDXFormat subclass the given column list matches.

    Args:
        cols: The column names to check.
        exact: If True, order must match; otherwise, uses set equality.

    Returns:
        The matching HDXFormat subclass, or None if no match.
    """
    for fmt_class in HDX_FORMATS:
        template = fmt_class.columns
        if exact and cols == template:
            return fmt_class
        elif not exact and set(cols) == set(template):
            return fmt_class
    return None