view

`StructureView`

Source code in hdxms_datasets/view.py

class StructureView:
    def __init__(
        self,
        structure: Structure,
        mapping: StructureMapping = StructureMapping(),
        hide_water=True,
        **kwargs: dict,
    ):
        """
        Initialize the PDBeMolstar visualization namespace.
        Can uses a `StructureMapping` which relates peptides to the structure

        Args:
            structure: The structure to visualize.
            mapping: Optional structure mapping information.
            **kwargs: Additional keyword arguments for customization.
        """
        self.structure = structure
        self.mapping = mapping

        from ipymolstar import PDBeMolstar

        self.view = PDBeMolstar(
            custom_data=self.structure.pdbemolstar_custom_data(),
            hide_water=hide_water,
            **kwargs,
        )

    def show(self):
        return self.view

    def highlight(self, resi: int) -> StructureView:
        """
        Highlights a residue in the structure.

        Args:
            resi: Residue number to highlight.

        Returns:
            The updated StructureView object.
        """
        param = self.get_query_param(resi)
        data = self._augment_chain([param])
        self.view.highlight = {"data": data}

        return self

    def color_peptide(
        self,
        start: int,
        end: int,
        color: str = "red",
        non_selected_color: str = "lightgray",
    ) -> StructureView:
        """
        Color a peptide by start and end residue numbers.

        Args:
            start: Start residue number.
            end: End residue number.
            color: Color for the peptide.
            non_selected_color: Color for non-selected regions.

        Returns:
            The updated StructureView object.
        """
        kwargs = {"color": color}
        param = self.get_query_param_range(start, end, **kwargs)
        data = self._augment_chain([param])

        color_data = {
            "data": data,
            "nonSelectedColor": non_selected_color,
        }

        self.view.color_data = color_data
        self.view.tooltips = None

        return self

    def peptide_coverage(
        self,
        peptides: nw.DataFrame,
        color: str = "darkgreen",
        non_selected_color: str = "lightgray",
    ):
        """
        Plots peptide coverage on the structure.

        Args:
            peptides: Peptides object or DataFrame containing peptide data.
            color: Color for the covered regions.
            non_selected_color: Color for non-covered regions.

        Returns:
            The updated StructureView object.
        """
        intervals = contiguous_peptides(peptides)

        data = [self.get_query_param_range(start, end, color=color) for start, end in intervals]
        color_data = {
            "data": self._augment_chain(data),
            "nonSelectedColor": non_selected_color,
        }

        self.view.color_data = color_data
        self.view.tooltips = None
        return self

    def non_overlapping_peptides(
        self,
        peptides: nw.DataFrame,
        colors: list[str] | None = None,
        non_selected_color: str = "lightgray",
    ):
        """Selects a set of non-overlapping peptides to display on the structure. Starts with the first
        peptide and successively adds peptides that do not overlap with already selected peptides.

        Args:
            peptides: Peptides object or DataFrame containing peptide data.
            colors: List of colors to cycle through for different peptides.
            non_selected_color: Color for non-covered regions.

        Returns:
            The updated StructureView object.

        """

        intervals = non_overlapping_peptides(peptides)

        colors = (
            colors
            if colors is not None
            else ["#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02"]
        )

        cdata = []
        tdata = []
        for (start, end), color in zip(intervals, itertools.cycle(colors)):
            cdata.append(self.get_query_param_range(start, end, color=color))
            df_f = peptides.filter((nw.col("start") == start) & (nw.col("end") == end)).to_native()
            sequence = df_f["sequence"].unique().first()
            tdata.append(self.get_query_param_range(start, end, tooltip=f"Peptide: {sequence}"))

        color_data = {
            "data": self._augment_chain(cdata),
            "nonSelectedColor": non_selected_color,
        }

        self.view.color_data = color_data
        self.view.tooltips = {"data": self._augment_chain(tdata)}
        return self

    def peptide_redundancy(
        self,
        peptides: nw.DataFrame,
        colors: list[str] | None = None,
        clip: Optional[int] = None,
        non_selected_color: str = "lightgray",
    ):
        """Colors residues by peptide redundancy.

        Args:
            peptides: Peptides DataFrame containing peptide data.
            colors: List of colors to use for different redundancy levels.
            clip: Optional maximum redundancy value for clipping.
            non_selected_color: Color for non-covered regions.

        Returns:
            The updated StructureView object.

        """
        r_number, redundancy = peptide_redundancy(peptides)

        colors = (
            colors
            if colors is not None
            else ["#C6DBEF", "#9ECAE1", "#6BAED6", "#4292C6", "#2171B5", "#08519C", "#08306B"]
        )

        if clip:
            vals = ((redundancy.clip(None, clip) / clip) * (len(colors) - 1)).astype(int)
        else:
            vals = ((redundancy / redundancy.max()) * (len(colors) - 1)).astype(int)

        data = []
        tooltips = []
        for rn, rv, rv_clip in zip(r_number, redundancy, vals):
            tooltips.append(self.get_query_param(int(rn), tooltip=f"Redundancy: {rv} peptides"))

            if rv == 0:
                continue

            color_elem = self.get_query_param(int(rn), color=colors[rv_clip])
            data.append(color_elem)

        color_data = {
            "data": self._augment_chain(data),
            "nonSelectedColor": non_selected_color,
        }

        self.view.color_data = color_data
        self.view.tooltips = {"data": self._augment_chain(tooltips)}
        return self

    def set_mapping(self, mapping: StructureMapping):
        self.mapping = mapping
        return self

    def get_query_param(self, resi: int, **kwargs):
        resi = self.mapping.map(resi)

        # TODO entity support
        c_dict = {
            self.residue_name: int(resi),
            **kwargs,
        }

        return c_dict

    def get_query_param_range(self, start: int, end: int, **kwargs):
        start = self.mapping.map(start)
        end = self.mapping.map(end)

        # TODO entity support
        c_dict = {
            "start_" + self.residue_name: int(start),
            "end_" + self.residue_name: int(end),
            **kwargs,
        }

        return c_dict

    @property
    def residue_name(self) -> str:
        """
        Returns the residue name based on whether auth residue numbers are used.
        """
        return "auth_residue_number" if self.mapping.auth_residue_numbers else "residue_number"

    @property
    def chain_name(self) -> str:
        """
        Returns the chain name based on whether auth chain labels are used.

        Note that 'struct_asym_id' used in PDBeMolstar is equivalent to
        'label_asym_id' in mmCIF.

        """
        return "auth_asym_id" if self.mapping.auth_chain_labels else "struct_asym_id"

    def _augment_chain(
        self,
        data: list[dict[str, ValueType]],
    ) -> list[dict[str, ValueType]]:
        """Augment a list of data with chain information"""
        if self.mapping.chain:
            aug_data = []
            for elem, chain in itertools.product(data, self.mapping.chain):
                aug_data.append(elem | {self.chain_name: chain})
        else:
            aug_data = data

        return aug_data

    def _repr_mimebundle_(self, include=None, exclude=None):
        return self.show()._repr_mimebundle_(include=include, exclude=exclude)

`chain_name` `property`

Returns the chain name based on whether auth chain labels are used.

Note that 'struct_asym_id' used in PDBeMolstar is equivalent to
'label_asym_id' in mmCIF.

`residue_name` `property`

Returns the residue name based on whether auth residue numbers are used.

`init(structure, mapping=StructureMapping(), hide_water=True, **kwargs)`

Initialize the PDBeMolstar visualization namespace.
Can uses a StructureMapping which relates peptides to the structure

Parameters:

Name	Type	Description	Default
`structure`	`Structure`	The structure to visualize.	required
`mapping`	`StructureMapping`	Optional structure mapping information.	`StructureMapping()`
`**kwargs`	`dict`	Additional keyword arguments for customization.	`{}`

Source code in hdxms_datasets/view.py

def __init__(
    self,
    structure: Structure,
    mapping: StructureMapping = StructureMapping(),
    hide_water=True,
    **kwargs: dict,
):
    """
    Initialize the PDBeMolstar visualization namespace.
    Can uses a `StructureMapping` which relates peptides to the structure

    Args:
        structure: The structure to visualize.
        mapping: Optional structure mapping information.
        **kwargs: Additional keyword arguments for customization.
    """
    self.structure = structure
    self.mapping = mapping

    from ipymolstar import PDBeMolstar

    self.view = PDBeMolstar(
        custom_data=self.structure.pdbemolstar_custom_data(),
        hide_water=hide_water,
        **kwargs,
    )

`color_peptide(start, end, color='red', non_selected_color='lightgray')`

Color a peptide by start and end residue numbers.

Parameters:

Name	Type	Description	Default
`start`	`int`	Start residue number.	required
`end`	`int`	End residue number.	required
`color`	`str`	Color for the peptide.	`'red'`
`non_selected_color`	`str`	Color for non-selected regions.	`'lightgray'`

Returns:

Type	Description
`StructureView`	The updated StructureView object.

Source code in hdxms_datasets/view.py

def color_peptide(
    self,
    start: int,
    end: int,
    color: str = "red",
    non_selected_color: str = "lightgray",
) -> StructureView:
    """
    Color a peptide by start and end residue numbers.

    Args:
        start: Start residue number.
        end: End residue number.
        color: Color for the peptide.
        non_selected_color: Color for non-selected regions.

    Returns:
        The updated StructureView object.
    """
    kwargs = {"color": color}
    param = self.get_query_param_range(start, end, **kwargs)
    data = self._augment_chain([param])

    color_data = {
        "data": data,
        "nonSelectedColor": non_selected_color,
    }

    self.view.color_data = color_data
    self.view.tooltips = None

    return self

`highlight(resi)`

Highlights a residue in the structure.

Parameters:

Name	Type	Description	Default
`resi`	`int`	Residue number to highlight.	required

Returns:

Type	Description
`StructureView`	The updated StructureView object.

Source code in hdxms_datasets/view.py

def highlight(self, resi: int) -> StructureView:
    """
    Highlights a residue in the structure.

    Args:
        resi: Residue number to highlight.

    Returns:
        The updated StructureView object.
    """
    param = self.get_query_param(resi)
    data = self._augment_chain([param])
    self.view.highlight = {"data": data}

    return self

`non_overlapping_peptides(peptides, colors=None, non_selected_color='lightgray')`

Selects a set of non-overlapping peptides to display on the structure. Starts with the first
peptide and successively adds peptides that do not overlap with already selected peptides.

Parameters:

Name	Type	Description	Default
`peptides`	`DataFrame`	Peptides object or DataFrame containing peptide data.	required
`colors`	`list[str] \| None`	List of colors to cycle through for different peptides.	`None`
`non_selected_color`	`str`	Color for non-covered regions.	`'lightgray'`

Returns:

Type	Description
	The updated StructureView object.

Source code in hdxms_datasets/view.py

def non_overlapping_peptides(
    self,
    peptides: nw.DataFrame,
    colors: list[str] | None = None,
    non_selected_color: str = "lightgray",
):
    """Selects a set of non-overlapping peptides to display on the structure. Starts with the first
    peptide and successively adds peptides that do not overlap with already selected peptides.

    Args:
        peptides: Peptides object or DataFrame containing peptide data.
        colors: List of colors to cycle through for different peptides.
        non_selected_color: Color for non-covered regions.

    Returns:
        The updated StructureView object.

    """

    intervals = non_overlapping_peptides(peptides)

    colors = (
        colors
        if colors is not None
        else ["#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02"]
    )

    cdata = []
    tdata = []
    for (start, end), color in zip(intervals, itertools.cycle(colors)):
        cdata.append(self.get_query_param_range(start, end, color=color))
        df_f = peptides.filter((nw.col("start") == start) & (nw.col("end") == end)).to_native()
        sequence = df_f["sequence"].unique().first()
        tdata.append(self.get_query_param_range(start, end, tooltip=f"Peptide: {sequence}"))

    color_data = {
        "data": self._augment_chain(cdata),
        "nonSelectedColor": non_selected_color,
    }

    self.view.color_data = color_data
    self.view.tooltips = {"data": self._augment_chain(tdata)}
    return self

`peptide_coverage(peptides, color='darkgreen', non_selected_color='lightgray')`

Plots peptide coverage on the structure.

Parameters:

Name	Type	Description	Default
`peptides`	`DataFrame`	Peptides object or DataFrame containing peptide data.	required
`color`	`str`	Color for the covered regions.	`'darkgreen'`
`non_selected_color`	`str`	Color for non-covered regions.	`'lightgray'`

Returns:

Type	Description
	The updated StructureView object.

Source code in hdxms_datasets/view.py

def peptide_coverage(
    self,
    peptides: nw.DataFrame,
    color: str = "darkgreen",
    non_selected_color: str = "lightgray",
):
    """
    Plots peptide coverage on the structure.

    Args:
        peptides: Peptides object or DataFrame containing peptide data.
        color: Color for the covered regions.
        non_selected_color: Color for non-covered regions.

    Returns:
        The updated StructureView object.
    """
    intervals = contiguous_peptides(peptides)

    data = [self.get_query_param_range(start, end, color=color) for start, end in intervals]
    color_data = {
        "data": self._augment_chain(data),
        "nonSelectedColor": non_selected_color,
    }

    self.view.color_data = color_data
    self.view.tooltips = None
    return self

`peptide_redundancy(peptides, colors=None, clip=None, non_selected_color='lightgray')`

Colors residues by peptide redundancy.

Parameters:

Name	Type	Description	Default
`peptides`	`DataFrame`	Peptides DataFrame containing peptide data.	required
`colors`	`list[str] \| None`	List of colors to use for different redundancy levels.	`None`
`clip`	`Optional[int]`	Optional maximum redundancy value for clipping.	`None`
`non_selected_color`	`str`	Color for non-covered regions.	`'lightgray'`

Returns:

Type	Description
	The updated StructureView object.

Source code in hdxms_datasets/view.py

def peptide_redundancy(
    self,
    peptides: nw.DataFrame,
    colors: list[str] | None = None,
    clip: Optional[int] = None,
    non_selected_color: str = "lightgray",
):
    """Colors residues by peptide redundancy.

    Args:
        peptides: Peptides DataFrame containing peptide data.
        colors: List of colors to use for different redundancy levels.
        clip: Optional maximum redundancy value for clipping.
        non_selected_color: Color for non-covered regions.

    Returns:
        The updated StructureView object.

    """
    r_number, redundancy = peptide_redundancy(peptides)

    colors = (
        colors
        if colors is not None
        else ["#C6DBEF", "#9ECAE1", "#6BAED6", "#4292C6", "#2171B5", "#08519C", "#08306B"]
    )

    if clip:
        vals = ((redundancy.clip(None, clip) / clip) * (len(colors) - 1)).astype(int)
    else:
        vals = ((redundancy / redundancy.max()) * (len(colors) - 1)).astype(int)

    data = []
    tooltips = []
    for rn, rv, rv_clip in zip(r_number, redundancy, vals):
        tooltips.append(self.get_query_param(int(rn), tooltip=f"Redundancy: {rv} peptides"))

        if rv == 0:
            continue

        color_elem = self.get_query_param(int(rn), color=colors[rv_clip])
        data.append(color_elem)

    color_data = {
        "data": self._augment_chain(data),
        "nonSelectedColor": non_selected_color,
    }

    self.view.color_data = color_data
    self.view.tooltips = {"data": self._augment_chain(tooltips)}
    return self

`summarize_peptide_df(df)`

Summarize a peptide DataFrame.

Source code in hdxms_datasets/view.py

def summarize_peptide_df(df: nw.DataFrame) -> str:
    """
    Summarize a peptide DataFrame.

    """
    exposures = df["exposure"].unique().to_list()
    peptides = df.select(["start", "end"]).unique()

    mean_length = peptides.with_columns((peptides["end"] - peptides["start"] + 1).alias("length"))[
        "length"
    ].mean()

    s = []
    s.append(f"Number of unique peptides: {len(peptides)}")
    s.append(f"Mean peptide length: {mean_length:.2f}")
    s.append(f"Exposures: {exposures}")
    s.append(f"Total number of data points: {len(df)}")

    if "n_clusters" in df.columns:
        n_clusters = df["n_clusters"].mean()
        s.append(f"Mean number of clusters per peptide/exposure: {n_clusters}")

    if "n_replicates" in df.columns:
        n_replicates = df["n_replicates"].mean()
        s.append(f"Mean number of replicates per peptide/exposure: {n_replicates}")

    if "n_charges" in df.columns:
        n_charges = df["n_charges"].mean()
        s.append(f"Mean number of charge states per peptide/exposure: {n_charges}")

    return "\n".join(s)

view

StructureView

chain_name property

residue_name property

__init__(structure, mapping=StructureMapping(), hide_water=True, **kwargs)

color_peptide(start, end, color='red', non_selected_color='lightgray')

highlight(resi)

non_overlapping_peptides(peptides, colors=None, non_selected_color='lightgray')

peptide_coverage(peptides, color='darkgreen', non_selected_color='lightgray')

peptide_redundancy(peptides, colors=None, clip=None, non_selected_color='lightgray')

summarize_peptide_df(df)

`StructureView`

`chain_name` `property`

`residue_name` `property`

`init(structure, mapping=StructureMapping(), hide_water=True, **kwargs)`

`color_peptide(start, end, color='red', non_selected_color='lightgray')`

`highlight(resi)`

`non_overlapping_peptides(peptides, colors=None, non_selected_color='lightgray')`

`peptide_coverage(peptides, color='darkgreen', non_selected_color='lightgray')`

`peptide_redundancy(peptides, colors=None, clip=None, non_selected_color='lightgray')`

`summarize_peptide_df(df)`