Skip to content

plot

find_wrap(peptides, margin=4, step=5, wrap_limit=200)

Find the minimum wrap value for a given list of intervals.

Parameters:

Name Type Description Default
peptides DataFrame

Dataframe with columns 'start' and 'end' representing intervals.

required
margin int

The margin applied to the wrap value. Defaults to 4.

4
step int

The increment step for the wrap value. Defaults to 5.

5
wrap_limit int

The maximum allowed wrap value. Defaults to 200.

200

Returns:

Type Description
int

The minimum wrap value that does not overlap with any intervals.

Source code in hdxms_datasets/plot.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def find_wrap(
    peptides: pl.DataFrame,
    margin: int = 4,
    step: int = 5,
    wrap_limit: int = 200,
) -> int:
    """
    Find the minimum wrap value for a given list of intervals.

    Args:
        peptides: Dataframe with columns 'start' and 'end' representing intervals.
        margin: The margin applied to the wrap value. Defaults to 4.
        step: The increment step for the wrap value. Defaults to 5.
        wrap_limit: The maximum allowed wrap value. Defaults to 200.

    Returns:
        The minimum wrap value that does not overlap with any intervals.
    """
    wrap = step

    while True:
        peptides_y = peptides.with_columns(
            (pl.int_range(pl.len(), dtype=pl.UInt32).alias("y") % wrap)
        )

        no_overlaps = True
        for name, df in peptides_y.group_by("y", maintain_order=True):
            overlaps = (np.array(df["end"]) + 1 + margin)[:-1] >= np.array(df["start"])[1:]
            if np.any(overlaps):
                no_overlaps = False
                break
                # return wrap

        wrap += step
        if wrap > wrap_limit:
            return wrap_limit  # Return the maximum wrap limit if no valid wrap found
        elif no_overlaps:
            return wrap

peptide_rectangles(peptides, wrap=None)

Given a DataFrame with 'start' and 'end' columns, each describing a peptide range, this function computes the corresponding rectangle coordinates for visualization.

Typicall used for Altair plotting. The rectangles will be stacked vertically based on the wrap parameter. Horizontally, each rectangle spans from start - 0.5 to end + 0.5.

Parameters:

Name Type Description Default
peptides DataFrame

DataFrame containing peptide information with 'start' and 'end' columns.

required
wrap int | None

The number of peptides to stack vertically before wrapping to the next row. If None, the function will compute an optimal wrap value to avoid overlaps.

None

Returns:

Type Description
DataFrame

A DataFrame with columns 'x', 'x2', 'y', and 'y2' representing the rectangle coordinates.

Source code in hdxms_datasets/plot.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def peptide_rectangles(peptides: pl.DataFrame, wrap: int | None = None) -> pl.DataFrame:
    """
    Given a DataFrame with 'start' and 'end' columns, each describing a peptide range,
    this function computes the corresponding rectangle coordinates for visualization.

    Typicall used for Altair plotting. The rectangles will be stacked vertically based on the `wrap` parameter.
    Horizontally, each rectangle spans from `start - 0.5` to `end + 0.5`.

    Args:
        peptides: DataFrame containing peptide information with 'start' and 'end' columns.
        wrap: The number of peptides to stack vertically before wrapping to the next row.
              If `None`, the function will compute an optimal wrap value to avoid overlaps.

    Returns:
        A DataFrame with columns 'x', 'x2', 'y', and 'y2' representing the rectangle coordinates.

    """
    wrap = find_wrap(peptides, step=1) if wrap is None else wrap
    columns = [
        (pl.col("start") - 0.5).alias("x"),
        (pl.col("end") + 0.5).alias("x2"),
        (wrap - (pl.col("idx") % wrap)).alias("y"),
    ]

    rectangles = (
        peptides["start", "end"]
        .with_row_index("idx")
        .with_columns(columns)
        .with_columns((pl.col("y") - 1).alias("y2"))
    )

    return rectangles

plot_peptides(peptides, value='value', value_sd=None, colormap='viridis', domain=None, bad_color='#8c8c8c', N=256, label=None, width='container', height=350, wrap=None, fill_nan=True)

Create an altair chart visualizing peptides as colored rectangles.

Parameters:

Name Type Description Default
peptides DataFrame

DataFrame containing peptide information with 'start', 'end', and value columns.

required
value str

The column name in peptides to use for coloring the rectangles.

'value'
value_sd str | None

Optional column name for standard deviation of value, used in tooltips.

None
colormap str | Colormap

Colormap to use for coloring the rectangles. Can be a string or a Colormap object.

'viridis'
domain tuple[float | None, float | None] | None

Tuple specifying the (min, max) values for the colormap. If None, uses min and max of value.

None
bad_color str

Color to use for invalid or NaN values.

'#8c8c8c'
N int

Number of discrete colors to generate from the colormap.

256
label str | None

Label for the color legend. If None, uses a title-cased version of value.

None
width str | int

Width of the chart. Can be an integer or 'container' for responsive width.

'container'
height str | int

Height of the chart in pixels.

350
wrap int | None

Number of peptides to stack vertically before wrapping to the next row. If None, computes an optimal wrap value.

None
fill_nan bool

Whether to fill NaN values in peptides with None to avoid serialization issues.

True

Returns:

Type Description
Chart

An Altair Chart object visualizing the peptides.

Source code in hdxms_datasets/plot.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def plot_peptides(
    peptides: pl.DataFrame,
    value: str = "value",
    value_sd: str | None = None,
    colormap: str | Colormap = "viridis",
    domain: tuple[float | None, float | None] | None = None,
    bad_color: str = "#8c8c8c",
    N: int = 256,
    label: str | None = None,
    width: str | int = "container",
    height: str | int = 350,
    wrap: int | None = None,
    fill_nan: bool = True,
) -> alt.Chart:
    """
    Create an altair chart visualizing peptides as colored rectangles.

    Args:
        peptides: DataFrame containing peptide information with 'start', 'end', and `value` columns.
        value: The column name in `peptides` to use for coloring the rectangles.
        value_sd: Optional column name for standard deviation of `value`, used in tooltips.
        colormap: Colormap to use for coloring the rectangles. Can be a string or a Colormap object.
        domain: Tuple specifying the (min, max) values for the colormap. If `None`, uses min and max of `value`.
        bad_color: Color to use for invalid or NaN values.
        N: Number of discrete colors to generate from the colormap.
        label: Label for the color legend. If `None`, uses a title-cased version of `value`.
        width: Width of the chart. Can be an integer or 'container' for responsive width.
        height: Height of the chart in pixels.
        wrap: Number of peptides to stack vertically before wrapping to the next row. If `None`, computes an optimal wrap value.
        fill_nan: Whether to fill NaN values in `peptides` with None to avoid serialization issues.

    Returns:
        An Altair Chart object visualizing the peptides.

    """

    if not unique_peptides(peptides):
        raise ValueError("Peptides must be unique by 'start' and 'end' columns.")

    if fill_nan:
        # nan values can cause problems in serialization
        peptides = peptides.fill_nan(None)

    value_sd = value_sd or f"{value}_sd"
    colormap = Colormap(colormap) if isinstance(colormap, str) else colormap
    if domain is None:
        domain = (None, None)
    vmin = domain[0] if domain[0] is not None else peptides[value].min()  # type: ignore
    vmax = domain[1] if domain[1] is not None else peptides[value].max()  # type: ignore

    scale = alt.Scale(domain=(vmin, vmax), range=colormap.to_altair(N=N))  # type: ignore
    label = label or value.replace("_", " ").title()

    if value_sd in peptides.columns:
        tooltip_value = []
        for v, v_sd in zip(peptides[value], peptides[value_sd]):
            if v is not None and v_sd is not None:
                tooltip_value.append(
                    f"{v:.2f} \u00b1 {v_sd:.2f}"  # type: ignore
                )
            else:
                tooltip_value.append("NaN")
    else:
        tooltip_value = [f"{value:.2f}" if value is not None else "" for value in peptides[value]]

    rectangles = peptide_rectangles(peptides, wrap=wrap)
    peptide_source = peptides.join(rectangles, on=["start", "end"], how="left").with_columns(
        pl.col(value), pl.Series(tooltip_value).alias("tooltip_value")
    )

    invalid = {"color": {"value": bad_color}}
    peptide_chart = (
        alt.Chart(peptide_source)
        .mark_rect(
            stroke="black",
        )
        .encode(
            x=alt.X("x:Q", title="Residue Number"),
            y=alt.Y("y:Q", title="", axis=alt.Axis(ticks=False, domain=False, labels=False)),
            x2=alt.X2("x2:Q"),
            y2=alt.Y2("y2:Q"),
            tooltip=[
                alt.Tooltip("idx:Q", title="Index"),
                alt.Tooltip("start:Q", title="Start"),
                alt.Tooltip("end:Q", title="End"),
                alt.Tooltip("sequence:N", title="Sequence"),
                alt.Tooltip("tooltip_value:N", title=label),
            ],
            color=alt.Color(f"{value}:Q", scale=scale, title=label),
        )
        .configure_scale(invalid=invalid)
    )

    return peptide_chart.properties(height=height, width=width)

unique_peptides(df)

Checks if all peptides in the DataFrame are unique. Needs to have columns 'start' and 'end' marking peptide intervals (inclusive).

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing peptide information.

required

Returns:

Type Description
bool

True if all peptides are unique, otherwise False.

Source code in hdxms_datasets/plot.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
def unique_peptides(df: pl.DataFrame) -> bool:
    """
    Checks if all peptides in the DataFrame are unique.
    Needs to have columns 'start' and 'end' marking peptide intervals (inclusive).

    Args:
        df: DataFrame containing peptide information.

    Returns:
        `True` if all peptides are unique, otherwise `False`.

    """

    return len(df) == len(df.unique(subset=["start", "end"]))