Skip to content

formats

DynamX_v3_cluster

Source code in hdxms_datasets/formats.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class DynamX_v3_cluster:
    columns = [
        "Protein",
        "Start",
        "End",
        "Sequence",
        "Modification",
        "Fragment",
        "MaxUptake",
        "MHP",
        "State",
        "Exposure",
        "File",
        "z",
        "RT",
        "Inten",
        "Center",
    ]
    state_name = "State"
    exposure_name = "Exposure"
    aggregated = False

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_dynamx_cluster(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
103
104
105
106
107
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_dynamx_cluster(df)

DynamX_vx_state

There are also DynamX state data files which do not have 'Modification' and 'Fragment' columns. not sure which version this is.

Source code in hdxms_datasets/formats.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
class DynamX_vx_state:
    """There are also DynamX state data files which do not have 'Modification' and 'Fragment' columns.
    not sure which version this is.
    """

    columns = [
        "Protein",
        "Start",
        "End",
        "Sequence",
        "MaxUptake",
        "MHP",
        "State",
        "Exposure",
        "Center",
        "Center SD",
        "Uptake",
        "Uptake SD",
        "RT",
        "RT SD",
    ]

    state_name = "State"
    exposure_name = "Exposure"
    aggregated = True

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_dynamx_state(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
45
46
47
48
49
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_dynamx_state(df)

HDExaminer_v3

Source code in hdxms_datasets/formats.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
class HDExaminer_v3:
    columns = [
        "Protein State",
        "Deut Time",
        "Experiment",
        "Start",
        "End",
        "Sequence",
        "Charge",
        "Search RT",
        "Actual RT",
        "# Spectra",
        "Peak Width Da",
        "m/z Shift Da",
        "Max Inty",
        "Exp Cent",
        "Theor Cent",
        "Score",
        "Cent Diff",
        "# Deut",
        "Deut %",
        "Confidence",
    ]
    state_name = "Protein State"
    exposure_name = "Deut Time"
    aggregated = False

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        return from_hdexaminer(df)

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
137
138
139
140
141
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    return from_hdexaminer(df)

HDXFormat

Bases: Protocol

Source code in hdxms_datasets/formats.py
 6
 7
 8
 9
10
11
12
13
14
15
16
class HDXFormat(Protocol):
    columns: list[str]
    state_name: str
    exposure_name: str
    aggregated: bool = False  # whether the data is aggregated or expanded as multiple replicates

    def convert(self, df: nw.DataFrame) -> nw.DataFrame:
        """
        Convert the DataFrame to a standard format.
        """
        ...

convert(df)

Convert the DataFrame to a standard format.

Source code in hdxms_datasets/formats.py
12
13
14
15
16
def convert(self, df: nw.DataFrame) -> nw.DataFrame:
    """
    Convert the DataFrame to a standard format.
    """
    ...

identify_format(cols, *, exact=True)

Identify which HDXFormat subclass the given column list matches.

Parameters:

Name Type Description Default
cols list[str]

The column names to check.

required
exact bool

If True, order must match; otherwise, uses set equality.

True

Returns:

Type Description
Optional[HDXFormat]

The matching HDXFormat subclass, or None if no match.

Source code in hdxms_datasets/formats.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def identify_format(cols: list[str], *, exact: bool = True) -> Optional[HDXFormat]:
    """
    Identify which HDXFormat subclass the given column list matches.

    Args:
        cols: The column names to check.
        exact: If True, order must match; otherwise, uses set equality.

    Returns:
        The matching HDXFormat subclass, or None if no match.
    """
    for fmt_class in HDX_FORMATS:
        template = fmt_class.columns
        if exact and cols == template:
            return fmt_class
        elif not exact and set(cols) == set(template):
            return fmt_class
    return None