Luna Pathology¶

`analysis` ¶

Created on April 27, 2021

@author: pashaa@mskcc.org

`ml` ¶

`BaseTorchClassifier` ¶

Bases: Module

Source code in src/luna/pathology/analysis/ml.py

class BaseTorchClassifier(nn.Module):
    def __init__(self, **kwargs):
        """Initialize BaseTorchClassifier

        A generic base class for a PyTorch classifier model. This serves as the base class inhereted
        for model training and inference.

        Will run on cuda if available, on the device specified by the CUDA_VISIBLE_DEVICES environment variable

        Args:
            kwargs: Keyward arguements passed onto the subclass method
        """

        super(BaseTorchClassifier, self).__init__()

        self.cuda_is_available = torch.cuda.is_available()

        self.setup(**kwargs)

        if self.cuda_is_available:
            self.cuda()

    def setup(self, **kwargs):
        """Set classifier modules

        Template/abstract method where individual modules that make up the forward pass are configured

        Args:
            kwargs: Keyword arguements passed onto the subclass method
        """
        raise NotImplementedError("setup() has not been implimented in the subclass!")

`init(**kwargs)` ¶

Initialize BaseTorchClassifier

A generic base class for a PyTorch classifier model. This serves as the base class inhereted for model training and inference.

Will run on cuda if available, on the device specified by the CUDA_VISIBLE_DEVICES environment variable

Parameters:

Name	Type	Description	Default
`kwargs`		Keyward arguements passed onto the subclass method	`{}`

Source code in src/luna/pathology/analysis/ml.py

def __init__(self, **kwargs):
    """Initialize BaseTorchClassifier

    A generic base class for a PyTorch classifier model. This serves as the base class inhereted
    for model training and inference.

    Will run on cuda if available, on the device specified by the CUDA_VISIBLE_DEVICES environment variable

    Args:
        kwargs: Keyward arguements passed onto the subclass method
    """

    super(BaseTorchClassifier, self).__init__()

    self.cuda_is_available = torch.cuda.is_available()

    self.setup(**kwargs)

    if self.cuda_is_available:
        self.cuda()

`setup(**kwargs)` ¶

Set classifier modules

Template/abstract method where individual modules that make up the forward pass are configured

Parameters:

Name	Type	Description	Default
`kwargs`		Keyword arguements passed onto the subclass method	`{}`

Source code in src/luna/pathology/analysis/ml.py

def setup(self, **kwargs):
    """Set classifier modules

    Template/abstract method where individual modules that make up the forward pass are configured

    Args:
        kwargs: Keyword arguements passed onto the subclass method
    """
    raise NotImplementedError("setup() has not been implimented in the subclass!")

`BaseTorchTileClassifier` ¶

Bases: BaseTorchClassifier

Source code in src/luna/pathology/analysis/ml.py

class BaseTorchTileClassifier(BaseTorchClassifier):
    def forward(self, index, tile_data):
        """Forward pass for base classifier class

        Loads a tile image from the tile manifest

        Args:
            index (list[str]): Tile address indicies with length B
            tile_data (torch.tensor): Input tiles of shape (B, *)

        Returns:
            pd.DataFrame: Dataframe of output features
        """
        if self.cuda_is_available:
            tile_data = tile_data.cuda()
        self.eval()
        with torch.no_grad():
            return pd.DataFrame(
                self.predict(tile_data).cpu().numpy(),
                index=index,
            )

    def setup(self, **kwargs):
        """Set classifier modules

        Template/abstract method where individual modules that make up the forward pass are configured

        Args:
            kwargs: Keyword arguements passed onto the subclass method
        """
        raise NotImplementedError("setup() has not been implimented in the subclass!")

    def predict(self, input_tiles: torch.tensor):
        """predict method

        Loads a tile image from the tile manifest, must be manually implimented to pass the input tensor through the modules specified in setup()

        Args:
            input_tiles (torch.tensor): Input tiles of shape (B, *)

        Returns:
            torch.tensor: 2D tensor with (B, C) where B is the batch dimension and C are output classes or features
        """
        raise NotImplementedError("predict() has not been implimented in the subclass!")

`forward(index, tile_data)` ¶

Forward pass for base classifier class

Loads a tile image from the tile manifest

Parameters:

Name	Type	Description	Default
`index`	`list[str]`	Tile address indicies with length B	required
`tile_data`	`tensor`	Input tiles of shape (B, *)	required

Returns:

Type	Description
	pd.DataFrame: Dataframe of output features

Source code in src/luna/pathology/analysis/ml.py

def forward(self, index, tile_data):
    """Forward pass for base classifier class

    Loads a tile image from the tile manifest

    Args:
        index (list[str]): Tile address indicies with length B
        tile_data (torch.tensor): Input tiles of shape (B, *)

    Returns:
        pd.DataFrame: Dataframe of output features
    """
    if self.cuda_is_available:
        tile_data = tile_data.cuda()
    self.eval()
    with torch.no_grad():
        return pd.DataFrame(
            self.predict(tile_data).cpu().numpy(),
            index=index,
        )

`predict(input_tiles)` ¶

predict method

Loads a tile image from the tile manifest, must be manually implimented to pass the input tensor through the modules specified in setup()

Parameters:

Name	Type	Description	Default
`input_tiles`	`tensor`	Input tiles of shape (B, *)	required

Returns:

Type	Description
	torch.tensor: 2D tensor with (B, C) where B is the batch dimension and C are output classes or features

Source code in src/luna/pathology/analysis/ml.py

def predict(self, input_tiles: torch.tensor):
    """predict method

    Loads a tile image from the tile manifest, must be manually implimented to pass the input tensor through the modules specified in setup()

    Args:
        input_tiles (torch.tensor): Input tiles of shape (B, *)

    Returns:
        torch.tensor: 2D tensor with (B, C) where B is the batch dimension and C are output classes or features
    """
    raise NotImplementedError("predict() has not been implimented in the subclass!")

`setup(**kwargs)` ¶

Set classifier modules

Template/abstract method where individual modules that make up the forward pass are configured

Parameters:

Name	Type	Description	Default
`kwargs`		Keyword arguements passed onto the subclass method	`{}`

Source code in src/luna/pathology/analysis/ml.py

def setup(self, **kwargs):
    """Set classifier modules

    Template/abstract method where individual modules that make up the forward pass are configured

    Args:
        kwargs: Keyword arguements passed onto the subclass method
    """
    raise NotImplementedError("setup() has not been implimented in the subclass!")

`BaseTorchTileDataset` ¶

Bases: Dataset

Base class for a tile dataset

Impliments the usual torch dataset methods, and additionally provides a decoding of the binary tile data. PIL images can be further preprocessed before becoming torch tensors via an abstract preprocess method

Will send the tensors to gpu if available, on the device specified by CUDA_VISIBLE_DEVICES="1"

Source code in src/luna/pathology/analysis/ml.py

class BaseTorchTileDataset(Dataset):
    """Base class for a tile dataset

    Impliments the usual torch dataset methods, and additionally provides a decoding of the binary tile data.
    PIL images can be further preprocessed before becoming torch tensors via an abstract preprocess method

    Will send the tensors to gpu if available, on the device specified by CUDA_VISIBLE_DEVICES="1"
    """

    def __init__(
        self,
        tile_manifest: pd.DataFrame = None,
        tile_urlpath: str = "",
        label_cols: List[str] = [],
        using_ray: bool = False,
        storage_options: dict = {},
        **kwargs,
    ):
        """Initialize BaseTileDataset

        Can accept either a tile dataframe or a path to tile data

        Args:
            tile_manifest (pd.DataFrame): Dataframe of tile data
            tile_path (str): Base path of tile data
            label_cols (list[str]): (Optional) label columns to return as tensors, e.g. for training
            using_ray (bool): (Optional) Perform distributed dataloading with Ray for training
        """

        if tile_manifest is not None:
            self.tile_manifest = tile_manifest
        elif tile_urlpath:
            with open(tile_urlpath, **storage_options) as of:
                self.tile_manifest = pd.read_parquet(of).set_index("address")
        else:
            raise RuntimeError("Must specifiy either tile_manifest or tile_path")

        self.label_cols = label_cols
        self.using_ray = using_ray

        self.setup(**kwargs)

    def __len__(self):
        return len(self.tile_manifest)

    def __repr__(self):
        return f"TileDataset with {len(self.tile_manifest)} tiles, indexed by {self.tile_manifest.index.names}, returning label columns: {self.label_cols}"

    def __getitem__(self, idx: int):
        """Tile accessor

        Loads a tile image from the tile manifest.  Returns a batch of the indices of the input dataframe, the tile data always.
        If label columns where specified, the 3rd position of the tuple is a tensor of the label data. If Ray is being used for
        model training, then only the image data and the label is returned.

        Args:
            idx (int): Integer index

        Returns:
            (optional str, torch.tensor, optional torch.tensor): tuple of the tile index and corresponding tile as a torch tensor, and metadata labels if specified
        """

        row = self.tile_manifest.iloc[idx]
        img = Image.fromarray(get_tile_array(row))

        if self.using_ray:
            if not (len(self.label_cols)):
                raise ValueError(
                    "If using Ray for training, you must provide a label column"
                )
            return self.preprocess(img), torch.tensor(row[self.label_cols]).squeeze()

        if len(self.label_cols):
            return (
                row.name,
                self.preprocess(img),
                torch.tensor(row[self.label_cols].to_list()),
            )
        else:
            return row.name, self.preprocess(img)

    def setup(self, **kwargs):
        """Set additional attributes for dataset class

        Template/abstract method where a dataset is configured

        Args:
            kwargs: Keyword arguements passed onto the subclass method
        """
        raise NotImplementedError("setup() has not been implimented in the subclass!")

    def preprocess(self, input_tile: Image):
        """Preprocessing method called for each tile patch

        Loads a tile image from the tile manifest, must be manually implimented to accept a single PIL image and return a torch tensor.

        Args:
            input_tile (Image): Integer index

        Returns:
            torch.tensor: Output tile as preprocessed tensor
        """
        raise NotImplementedError(
            "preprocess() has not been implimented in the subclass!"
        )

`getitem(idx)` ¶

Tile accessor

Loads a tile image from the tile manifest. Returns a batch of the indices of the input dataframe, the tile data always. If label columns where specified, the 3rd position of the tuple is a tensor of the label data. If Ray is being used for model training, then only the image data and the label is returned.

Parameters:

Name	Type	Description	Default
`idx`	`int`	Integer index	required

Returns:

Type	Description
`optional str, torch.tensor, optional torch.tensor`	tuple of the tile index and corresponding tile as a torch tensor, and metadata labels if specified

Source code in src/luna/pathology/analysis/ml.py

def __getitem__(self, idx: int):
    """Tile accessor

    Loads a tile image from the tile manifest.  Returns a batch of the indices of the input dataframe, the tile data always.
    If label columns where specified, the 3rd position of the tuple is a tensor of the label data. If Ray is being used for
    model training, then only the image data and the label is returned.

    Args:
        idx (int): Integer index

    Returns:
        (optional str, torch.tensor, optional torch.tensor): tuple of the tile index and corresponding tile as a torch tensor, and metadata labels if specified
    """

    row = self.tile_manifest.iloc[idx]
    img = Image.fromarray(get_tile_array(row))

    if self.using_ray:
        if not (len(self.label_cols)):
            raise ValueError(
                "If using Ray for training, you must provide a label column"
            )
        return self.preprocess(img), torch.tensor(row[self.label_cols]).squeeze()

    if len(self.label_cols):
        return (
            row.name,
            self.preprocess(img),
            torch.tensor(row[self.label_cols].to_list()),
        )
    else:
        return row.name, self.preprocess(img)

`init(tile_manifest=None, tile_urlpath='', label_cols=[], using_ray=False, storage_options={}, **kwargs)` ¶

Initialize BaseTileDataset

Can accept either a tile dataframe or a path to tile data

Parameters:

Name	Type	Description	Default
`tile_manifest`	`DataFrame`	Dataframe of tile data	`None`
`tile_path`	`str`	Base path of tile data	required
`label_cols`	`list[str]`	(Optional) label columns to return as tensors, e.g. for training	`[]`
`using_ray`	`bool`	(Optional) Perform distributed dataloading with Ray for training	`False`

Source code in src/luna/pathology/analysis/ml.py

def __init__(
    self,
    tile_manifest: pd.DataFrame = None,
    tile_urlpath: str = "",
    label_cols: List[str] = [],
    using_ray: bool = False,
    storage_options: dict = {},
    **kwargs,
):
    """Initialize BaseTileDataset

    Can accept either a tile dataframe or a path to tile data

    Args:
        tile_manifest (pd.DataFrame): Dataframe of tile data
        tile_path (str): Base path of tile data
        label_cols (list[str]): (Optional) label columns to return as tensors, e.g. for training
        using_ray (bool): (Optional) Perform distributed dataloading with Ray for training
    """

    if tile_manifest is not None:
        self.tile_manifest = tile_manifest
    elif tile_urlpath:
        with open(tile_urlpath, **storage_options) as of:
            self.tile_manifest = pd.read_parquet(of).set_index("address")
    else:
        raise RuntimeError("Must specifiy either tile_manifest or tile_path")

    self.label_cols = label_cols
    self.using_ray = using_ray

    self.setup(**kwargs)

`preprocess(input_tile)` ¶

Preprocessing method called for each tile patch

Loads a tile image from the tile manifest, must be manually implimented to accept a single PIL image and return a torch tensor.

Parameters:

Name	Type	Description	Default
`input_tile`	`Image`	Integer index	required

Returns:

Type	Description
	torch.tensor: Output tile as preprocessed tensor

Source code in src/luna/pathology/analysis/ml.py

def preprocess(self, input_tile: Image):
    """Preprocessing method called for each tile patch

    Loads a tile image from the tile manifest, must be manually implimented to accept a single PIL image and return a torch tensor.

    Args:
        input_tile (Image): Integer index

    Returns:
        torch.tensor: Output tile as preprocessed tensor
    """
    raise NotImplementedError(
        "preprocess() has not been implimented in the subclass!"
    )

`setup(**kwargs)` ¶

Set additional attributes for dataset class

Template/abstract method where a dataset is configured

Parameters:

Name	Type	Description	Default
`kwargs`		Keyword arguements passed onto the subclass method	`{}`

Source code in src/luna/pathology/analysis/ml.py

def setup(self, **kwargs):
    """Set additional attributes for dataset class

    Template/abstract method where a dataset is configured

    Args:
        kwargs: Keyword arguements passed onto the subclass method
    """
    raise NotImplementedError("setup() has not been implimented in the subclass!")

`HDF5Dataset` ¶

Bases: Dataset

General dataset that uses a HDF5 manifest convention

Applies preprocessing steps per instance, returning aggregate batches of data. Useful for training and inference.

Source code in src/luna/pathology/analysis/ml.py

class HDF5Dataset(Dataset):
    """General dataset that uses a HDF5 manifest convention

    Applies preprocessing steps per instance, returning aggregate batches of data. Useful for training and inference.
    """

    def __init__(
        self,
        hdf5_manifest,
        preprocess=nn.Identity(),
        label_cols=[],
        using_ray=False,
        storage_options={},
    ):
        """Initialize HD5FDataset

        Args:
            hdf5_manifest (pd.DataFrame): Dataframe of H5 data
            preprocess (transform): Function to apply to every bit of data
            label_cols (list[str]): (Optional) label columns to return as tensors, e.g. for training
            using_ray (bool): (Optional) Perform distributed dataloading with Ray for training
        """
        self.hdf5_manifest = hdf5_manifest
        self.label_cols = label_cols
        self.using_ray = using_ray
        self.preprocess = preprocess
        self.storage_options = storage_options

    def __len__(self):
        return len(self.hdf5_manifest)

    def set_preprocess(self, preprocess):
        preprocess = preprocess

    def __repr__(self):
        return f"HD5FDataset with {len(self.hd5f_manifest)} tiles, indexed by {self.hd5f_manifest.index.names}, returning label columns: {self.label_cols}"

    def __getitem__(self, idx: int):
        """Tile accessor

        Loads a tile image from the tile manifest.  Returns a batch of the indices of the input dataframe, the tile data always.
        If label columns where specified, the 3rd position of the tuple is a tensor of the label data. If Ray is being used for
        model training, then only the image data and the label is returned.

        Args:
            idx (int): Integer index

        Returns:
            (optional str, torch.tensor, optional torch.tensor): tuple of the tile index and corresponding tile as a torch tensor, and metadata labels if specified, else the index
        """

        row = self.hdf5_manifest.iloc[idx]
        img = get_tile_array(row, self.storage_options)

        if self.using_ray and not (len(self.label_cols)):
            raise ValueError(
                "If using Ray for training, you must provide a label column"
            )
        if len(self.label_cols):
            return self.preprocess(img), torch.tensor(row[self.label_cols]).squeeze()
        else:
            return self.preprocess(img), row.name

`getitem(idx)` ¶

Tile accessor

Loads a tile image from the tile manifest. Returns a batch of the indices of the input dataframe, the tile data always. If label columns where specified, the 3rd position of the tuple is a tensor of the label data. If Ray is being used for model training, then only the image data and the label is returned.

Parameters:

Name	Type	Description	Default
`idx`	`int`	Integer index	required

Returns:

Type	Description
`optional str, torch.tensor, optional torch.tensor`	tuple of the tile index and corresponding tile as a torch tensor, and metadata labels if specified, else the index

Source code in src/luna/pathology/analysis/ml.py

def __getitem__(self, idx: int):
    """Tile accessor

    Loads a tile image from the tile manifest.  Returns a batch of the indices of the input dataframe, the tile data always.
    If label columns where specified, the 3rd position of the tuple is a tensor of the label data. If Ray is being used for
    model training, then only the image data and the label is returned.

    Args:
        idx (int): Integer index

    Returns:
        (optional str, torch.tensor, optional torch.tensor): tuple of the tile index and corresponding tile as a torch tensor, and metadata labels if specified, else the index
    """

    row = self.hdf5_manifest.iloc[idx]
    img = get_tile_array(row, self.storage_options)

    if self.using_ray and not (len(self.label_cols)):
        raise ValueError(
            "If using Ray for training, you must provide a label column"
        )
    if len(self.label_cols):
        return self.preprocess(img), torch.tensor(row[self.label_cols]).squeeze()
    else:
        return self.preprocess(img), row.name

`init(hdf5_manifest, preprocess=nn.Identity(), label_cols=[], using_ray=False, storage_options={})` ¶

Initialize HD5FDataset

Parameters:

Name	Type	Description	Default
`hdf5_manifest`	`DataFrame`	Dataframe of H5 data	required
`preprocess`	`transform`	Function to apply to every bit of data	`Identity()`
`label_cols`	`list[str]`	(Optional) label columns to return as tensors, e.g. for training	`[]`
`using_ray`	`bool`	(Optional) Perform distributed dataloading with Ray for training	`False`

Source code in src/luna/pathology/analysis/ml.py

def __init__(
    self,
    hdf5_manifest,
    preprocess=nn.Identity(),
    label_cols=[],
    using_ray=False,
    storage_options={},
):
    """Initialize HD5FDataset

    Args:
        hdf5_manifest (pd.DataFrame): Dataframe of H5 data
        preprocess (transform): Function to apply to every bit of data
        label_cols (list[str]): (Optional) label columns to return as tensors, e.g. for training
        using_ray (bool): (Optional) Perform distributed dataloading with Ray for training
    """
    self.hdf5_manifest = hdf5_manifest
    self.label_cols = label_cols
    self.using_ray = using_ray
    self.preprocess = preprocess
    self.storage_options = storage_options

`TorchTransformModel` ¶

Source code in src/luna/pathology/analysis/ml.py

class TorchTransformModel:
    def get_preprocess(self, **kwargs):
        """The transform model's preprocessing code

        Args:
            kwargs: Keyword arguements passed onto the subclass method
        """
        raise NotImplementedError(
            "get_preprocess() has not been implimented in the subclass!"
        )

    def transform(self, X: torch.Tensor):
        """Main transformer method, X -> X'

        Args:
            X (torch.Tensor): input tensor

        Returns:
            torch.tensor: Output tile as preprocessed tensor
        """
        raise NotImplementedError(
            "transform() has not been implimented in the subclass!"
        )

    pass

`get_preprocess(**kwargs)` ¶

The transform model's preprocessing code

Parameters:

Name	Type	Description	Default
`kwargs`		Keyword arguements passed onto the subclass method	`{}`

Source code in src/luna/pathology/analysis/ml.py

def get_preprocess(self, **kwargs):
    """The transform model's preprocessing code

    Args:
        kwargs: Keyword arguements passed onto the subclass method
    """
    raise NotImplementedError(
        "get_preprocess() has not been implimented in the subclass!"
    )

`transform(X)` ¶

Main transformer method, X -> X'

Parameters:

Name	Type	Description	Default
`X`	`Tensor`	input tensor	required

Returns:

Type	Description
	torch.tensor: Output tile as preprocessed tensor

Source code in src/luna/pathology/analysis/ml.py

def transform(self, X: torch.Tensor):
    """Main transformer method, X -> X'

    Args:
        X (torch.Tensor): input tensor

    Returns:
        torch.tensor: Output tile as preprocessed tensor
    """
    raise NotImplementedError(
        "transform() has not been implimented in the subclass!"
    )

`get_group_stratified_sampler(df_nh, label_col, group_col, num_splits=5, random_seed=42)` ¶

Generates sampler indices for torch DataLoader object that are stratified by a given group set (ie a column in a dataframe corresponding to patient identifiers), and balanced between target labels

Parameters:

Name	Type	Description	Default
`df_nh`	`DataFrame`	A non-hierarchical/non-multi-indexed/flat dataframe	required
`label_col`	`str`	The column name for the classes to balance across training and validation splits.	required
`group_col`	`str`	The column name used to stratify the data (ie patient ids).	required
`num_splits`	`int`	(Optional) The number of folds, must at least be 2.	`5`

Returns: Tuple(List, List): a tuple of indices that correspond to training and validation samplers

Source code in src/luna/pathology/analysis/ml.py

def get_group_stratified_sampler(
    df_nh: pd.DataFrame,
    label_col: str,
    group_col: str,
    num_splits: int = 5,
    random_seed: int = 42,
) -> Tuple[List, List]:
    """Generates sampler indices for torch DataLoader object that are
    stratified by a given group set (ie a column in a dataframe
    corresponding to patient identifiers), and balanced between target
    labels

    Args:
        df_nh (pd.DataFrame): A non-hierarchical/non-multi-indexed/flat dataframe
        label_col (str): The column name for the classes to balance across training and validation splits.
        group_col (str): The column name used to stratify the data (ie patient ids).
        num_splits (int): (Optional) The number of folds, must at least be 2.
    Returns:
        Tuple(List, List): a tuple of indices that correspond to training and validation samplers
    """

    cv = StratifiedGroupKFold(
        n_splits=num_splits, random_state=random_seed, shuffle=True
    )
    classes = df_nh[label_col]
    groups = df_nh[group_col]
    for fold_idx, (train_indices, val_indices) in enumerate(
        cv.split(df_nh, classes, groups)
    ):
        # check integrity. asserts that same group (ie patients) aren't in both
        # train and validation splits
        train_groups, val_groups = groups[train_indices], groups[val_indices]
        assert len(set(train_groups) & set(val_groups)) == 0

    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    return (train_sampler, val_sampler)

`post_transform_to_2d(input)` ¶

Convert input to a 2D numpy array on CPU

Parameters:

Name	Type	Description	Default
`input`	`tensor`	tensor input of shape [B, *] where B is the batch dimension	required

Source code in src/luna/pathology/analysis/ml.py

def post_transform_to_2d(input: np.array) -> np.array:
    """Convert input to a 2D numpy array on CPU

    Args:
        input (torch.tensor): tensor input of shape [B, *] where B is the batch dimension
    """
    if type(input) == torch.tensor:
        input = input.cpu.numpy()

    if not len(input.shape) == 2:
        warnings.warn(f"Reshaping model output (was {input.shape}) to 2D")
        input = np.reshape(input, (input.shape[0], -1))

    return input

`cli` ¶

Created on April 27, 2021

@author: pashaa@mskcc.org

`create_wide_shape_features_query` ¶

`cli(shape_features_urlpath, storage_options={})` ¶

Prints wide shape features query for Dremio

Parameters:

Name	Type	Description	Default
`shape_features_urlpaths`	`List[str]`	URL/path to shape features parquet files	required
`storage_options`	`dict`	storage options to pass to reading functions	`{}`

Source code in src/luna/pathology/cli/create_wide_shape_features_query.py

def cli(
    shape_features_urlpath: str,
    storage_options: dict = {}
):
    """Prints wide shape features query for Dremio

    Args:
        shape_features_urlpaths (List[str]): URL/path to shape features parquet files
        storage_options (dict): storage options to pass to reading functions
    """
    config = get_config(vars())
    query = create_wide_shape_features_query(
        config['shape_features_urlpath'],
        config['storage_options']
    )

    print(query)

`create_wide_shape_features_query(shape_features_urlpath, storage_options={})` ¶

Gets wide shape features query for dremio

Parameters:

Name	Type	Description	Default
`shape_features_urlpaths`	`List[str]`	URL/path to shape feature parquet files	required
`storage_options`	`dict`	storage options to pass to reading functions	`{}`

Source code in src/luna/pathology/cli/create_wide_shape_features_query.py

def create_wide_shape_features_query(
    shape_features_urlpath: str,
    storage_options: dict = {},
):
    """Gets wide shape features query for dremio

    Args:
        shape_features_urlpaths (List[str]): URL/path to shape feature parquet files
        storage_options (dict): storage options to pass to reading functions
    """
    with open(shape_features_urlpath, **storage_options) as of:
        df = pd.read_parquet(of)
    ShapeFeaturesSchema.validate(df)
    df['merged_variable'] = df.Parent + " " + df.Class + " " + df.variable
    return create_query(df['merged_variable'].unique())

`dsa_annotation_etl` ¶

`DsaAnnotationProcessor` ¶

Source code in src/luna/pathology/cli/dsa_annotation_etl.py

class DsaAnnotationProcessor:
    def __init__(self, girder, annotation_name, output_urlpath, storage_options):
        self.girder = girder
        self.annotation_name = annotation_name
        self.output_urlpath = output_urlpath
        self.storage_options = storage_options

    def histomics_annotation_table_to_geojson(
        self, df, properties, shape_type_col="type", x_col="x_coords", y_col="y_coords"
    ):
        """Takes a table generated by histomicstk (parse_slide_annotations_into_tables) and creates a geojson"""

        features = []
        df[properties] = df[properties].fillna("None")

        logger.info(f"About to turn {len(df)} geometric annotations into a geojson!")

        for _, row in df.iterrows():
            x, y = deepcopy(row[x_col]), deepcopy(row[y_col])
            if row[shape_type_col] == "polyline":
                x.append(x[0]), y.append(y[0])
                geometry = Polygon(
                    [list(zip(x, y))]
                )  # Polygons are once nested to account for holes

            elif row[shape_type_col] == "point":
                geometry = Point((x[0], y[0]))
            else:
                continue  # don't process non-polyline(regional) or point annotations

            logger.info(f"\tCreated geometry {str(shape(geometry)):.40s}...")
            feature = Feature(
                geometry=geometry, properties={prop: row[prop] for prop in properties}
            )
            features.append(feature)

        feature_collection = FeatureCollection(features)
        logger.info(
            f"Checking geojson, errors with geojson FeatureCollection: {feature_collection.errors()}"
        )

        return feature_collection

    def build_proxy_repr_dsa(self, row):
        """Build a proxy table slice given, primarily, a DSA itemId (slide_item_uuid)"""

        itemId = row.slide_item_uuid
        slide_id = row.slide_id

        logger.info(
            f"Trying to process annotation for slide_id={slide_id}, item_id={itemId}"
        )

        annotation_uuids = get_annotation_uuid(
            self.girder, item_id=itemId, annotation_name=self.annotation_name
        )

        if annotation_uuids is None:
            return None

        # need to loop through annotation uuids since the same annotation name
        # can coorespond to multiple uuids (a 'Regional' annotation on the same
        # slide made two days apart)
        df_annotations = []
        for annotation_uuid in annotation_uuids:
            df_annotation = get_annotation_df(self.girder, annotation_uuid)
            df_annotations.append(df_annotation)

        df_annotations = pd.concat(df_annotations)

        # This turns the regional data into a nice geojson
        feature_collection = self.histomics_annotation_table_to_geojson(
            df_annotations,
            ["annotation_girder_id", "element_girder_id", "group", "label"],
            shape_type_col="type",
            x_col="x_coords",
            y_col="y_coords",
        )

        fs, urlpath = fsspec.core.url_to_fs(self.output_urlpath, **self.storage_options)

        slide_geojson_path = str(Path(urlpath) / f"{slide_id}.annotation.geojson")
        with fs.open(slide_geojson_path, "w") as fp:
            json.dump(feature_collection, fp)  # Finally, save it!

        df_annotation_proxy = pd.concat(
            [
                df_annotations,
                pd.DataFrame(
                    [
                        {
                            "slide_item_uuid": itemId,
                            "type": "geojson",
                            "slide_geojson": slide_geojson_path,
                        }
                    ]
                ),
            ]
        )  # Add our geojson as a special type of annotation

        return df_annotation_proxy

    def run(self, row):
        """Run DsaAnnotationProcessor

        Args:
            row (string): row of a DSA slide table

        Returns:
            pd.DataFrame: annotation metadata
        """

        df = self.build_proxy_repr_dsa(row)

        return df

`build_proxy_repr_dsa(row)` ¶

Build a proxy table slice given, primarily, a DSA itemId (slide_item_uuid)

Source code in src/luna/pathology/cli/dsa_annotation_etl.py

def build_proxy_repr_dsa(self, row):
    """Build a proxy table slice given, primarily, a DSA itemId (slide_item_uuid)"""

    itemId = row.slide_item_uuid
    slide_id = row.slide_id

    logger.info(
        f"Trying to process annotation for slide_id={slide_id}, item_id={itemId}"
    )

    annotation_uuids = get_annotation_uuid(
        self.girder, item_id=itemId, annotation_name=self.annotation_name
    )

    if annotation_uuids is None:
        return None

    # need to loop through annotation uuids since the same annotation name
    # can coorespond to multiple uuids (a 'Regional' annotation on the same
    # slide made two days apart)
    df_annotations = []
    for annotation_uuid in annotation_uuids:
        df_annotation = get_annotation_df(self.girder, annotation_uuid)
        df_annotations.append(df_annotation)

    df_annotations = pd.concat(df_annotations)

    # This turns the regional data into a nice geojson
    feature_collection = self.histomics_annotation_table_to_geojson(
        df_annotations,
        ["annotation_girder_id", "element_girder_id", "group", "label"],
        shape_type_col="type",
        x_col="x_coords",
        y_col="y_coords",
    )

    fs, urlpath = fsspec.core.url_to_fs(self.output_urlpath, **self.storage_options)

    slide_geojson_path = str(Path(urlpath) / f"{slide_id}.annotation.geojson")
    with fs.open(slide_geojson_path, "w") as fp:
        json.dump(feature_collection, fp)  # Finally, save it!

    df_annotation_proxy = pd.concat(
        [
            df_annotations,
            pd.DataFrame(
                [
                    {
                        "slide_item_uuid": itemId,
                        "type": "geojson",
                        "slide_geojson": slide_geojson_path,
                    }
                ]
            ),
        ]
    )  # Add our geojson as a special type of annotation

    return df_annotation_proxy

`histomics_annotation_table_to_geojson(df, properties, shape_type_col='type', x_col='x_coords', y_col='y_coords')` ¶

Takes a table generated by histomicstk (parse_slide_annotations_into_tables) and creates a geojson

Source code in src/luna/pathology/cli/dsa_annotation_etl.py

def histomics_annotation_table_to_geojson(
    self, df, properties, shape_type_col="type", x_col="x_coords", y_col="y_coords"
):
    """Takes a table generated by histomicstk (parse_slide_annotations_into_tables) and creates a geojson"""

    features = []
    df[properties] = df[properties].fillna("None")

    logger.info(f"About to turn {len(df)} geometric annotations into a geojson!")

    for _, row in df.iterrows():
        x, y = deepcopy(row[x_col]), deepcopy(row[y_col])
        if row[shape_type_col] == "polyline":
            x.append(x[0]), y.append(y[0])
            geometry = Polygon(
                [list(zip(x, y))]
            )  # Polygons are once nested to account for holes

        elif row[shape_type_col] == "point":
            geometry = Point((x[0], y[0]))
        else:
            continue  # don't process non-polyline(regional) or point annotations

        logger.info(f"\tCreated geometry {str(shape(geometry)):.40s}...")
        feature = Feature(
            geometry=geometry, properties={prop: row[prop] for prop in properties}
        )
        features.append(feature)

    feature_collection = FeatureCollection(features)
    logger.info(
        f"Checking geojson, errors with geojson FeatureCollection: {feature_collection.errors()}"
    )

    return feature_collection

`run(row)` ¶

Run DsaAnnotationProcessor

Parameters:

Name	Type	Description	Default
`row`	`string`	row of a DSA slide table	required

Returns:

Type	Description
	pd.DataFrame: annotation metadata

Source code in src/luna/pathology/cli/dsa_annotation_etl.py

def run(self, row):
    """Run DsaAnnotationProcessor

    Args:
        row (string): row of a DSA slide table

    Returns:
        pd.DataFrame: annotation metadata
    """

    df = self.build_proxy_repr_dsa(row)

    return df

`cli(dsa_endpoint='???', collection_name='???', annotation_name='???', username='${oc.env:DSA_USERNAME}', password='${oc.env:DSA_PASSWORD}', local_config='', output_urlpath='.', storage_options={})` ¶

DSA annotation ETL Args: dsa_endpoint (str): path to input data collection_name (str): collection name in DSA annotation_name (str): annotation name username (str): DSA username (defaults to environment variable DSA_USERNAME) password (str): DSA password (defaults to environment variable DSA_PASSWORD) local_config (str): local config yaml url/path output_urlpath (str): output/working url/path prefix storage_options (dict): options to pass to reading/writing functions

Returns:

Type	Description
	pd.DataFrame: metadata from function call

Source code in src/luna/pathology/cli/dsa_annotation_etl.py

@timed
@save_metadata
def cli(
    dsa_endpoint: str = "???",
    collection_name: str = "???",
    annotation_name: str = "???",
    username: str = "${oc.env:DSA_USERNAME}",
    password: str = "${oc.env:DSA_PASSWORD}",
    local_config: str = "",
    output_urlpath: str = ".",
    storage_options: dict = {},
):
    """DSA annotation ETL
    Args:
        dsa_endpoint (str): path to input data
        collection_name (str): collection name in DSA
        annotation_name (str): annotation name
        username (str): DSA username (defaults to environment variable DSA_USERNAME)
        password (str): DSA password (defaults to environment variable DSA_PASSWORD)
        local_config (str): local config yaml url/path
        output_urlpath (str): output/working url/path prefix
        storage_options (dict): options to pass to reading/writing functions

    Returns:
        pd.DataFrame: metadata from function call
    """
    config = get_config(vars())

    configure_dask_client()

    df_full_annotation_data = dsa_annotation_etl(
        config["dsa_endpoint"],
        config["collection_name"],
        config["annotation_name"],
        config["username"],
        config["password"],
        config["output_urlpath"],
        config["storage_options"],
    )

    output_fs, output_path = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["storage_options"]
    )

    slide_annotation_dataset_path = str(
        Path(output_path)
        / f"slide_annotation_dataset_{config['collection_name']}_{config['annotation_name']}.parquet"
    )

    if len(df_full_annotation_data) > 0:
        with output_fs.open(slide_annotation_dataset_path, "wb") as of:
            df_full_annotation_data.to_parquet(of)

        properties = {
            "slide_annotation_dataset": slide_annotation_dataset_path,
            "segment_keys": {
                "dsa_collection_uuid": df_full_annotation_data["collection_uuid"][0]
            },
        }
        return properties

`dsa_annotation_etl(dsa_endpoint, collection_name, annotation_name, username, password, output_urlpath, storage_options)` ¶

DSA annotation ETL

Parameters:

Name	Type	Description	Default
`dsa_endpoint`	`str`	path to input data	required
`collection_name`	`str`	collection name in DSA	required
`annotation_name`	`str`	annotation name	required
`username`	`str`	DSA username	required
`password`	`str`	DSA password	required
`output_urlpath`	`str`	output/working url/path prefix	required
`storage_options`	`dict`	options to pass to reading/writing functions	required

Returns:

Type	Description
	pd.DataFrame: slide etl dataframe with annotation columns

Source code in src/luna/pathology/cli/dsa_annotation_etl.py

def dsa_annotation_etl(
    dsa_endpoint: str,
    collection_name: str,
    annotation_name: str,
    username: str,
    password: str,
    output_urlpath: str,
    storage_options: dict,
):
    """DSA annotation ETL

    Args:
        dsa_endpoint (str): path to input data
        collection_name (str): collection name in DSA
        annotation_name (str): annotation name
        username (str): DSA username
        password (str): DSA password
        output_urlpath (str): output/working url/path prefix
        storage_options (dict): options to pass to reading/writing functions

    Returns:
        pd.DataFrame: slide etl dataframe with annotation columns
    """
    client = get_or_create_dask_client()
    # girder = girder_client.GirderClient(apiUrl=dsa_endpoint)
    try:
        girder = girder_client.GirderClient(apiUrl=dsa_endpoint)
        # girder python client doesn't support turning off ssl verify.
        # can be removed once we replace the self-signed cert
        session = requests.Session()
        session.verify = False
        girder._session = session
        girder.authenticate(username, password)

        # check DSA connection
        system_check(girder)

    except Exception as exc:
        logger.error(exc)
        raise RuntimeError("Error connecting to DSA API")

    # dsa_authenticate(girder, username, password)

    collection_uuid = get_collection_uuid(girder, collection_name)

    df_slide_items = get_slide_df(girder, collection_uuid)

    if len(df_slide_items) == 0:
        logger.info("No slides found, exitting!")
        return {}

    # Initialize the DsaAnnotationProcessor
    dap = DsaAnnotationProcessor(
        girder, annotation_name, output_urlpath, storage_options
    )

    logger.info("Dashboard: " + client.dashboard_link)
    df_polygon_data = pd.concat(
        [
            x.result()
            for x in as_completed(
                [client.submit(dap.run, row) for _, row in df_slide_items.iterrows()]
            )
        ]
    )

    # Join the slide level data with the polygon level data, so this is a lot of information!
    df_full_annotation_data = (
        df_slide_items.set_index("slide_item_uuid")
        .join(
            df_polygon_data.set_index("slide_item_uuid"),
            how="right",
            rsuffix="annotation",
        )
        .set_index("slide_id")
    )

    df_full_annotation_data.loc[:, "collection_uuid"] = collection_uuid
    df_full_annotation_data.loc[:, "collection_name"] = collection_name
    df_full_annotation_data.loc[:, "annotation_name"] = annotation_name
    df_full_annotation_data = df_full_annotation_data.drop(columns=["meta"])
    df_full_annotation_data = df_full_annotation_data.rename(
        columns={"group": "group_name"}
    )

    print(df_full_annotation_data)

    # Our dataset is a combination of polyline, point, and geojson annotations!
    logger.info(
        f"""Created {len(df_full_annotation_data.query("type=='geojson'"))} geojsons, {len(df_full_annotation_data.query("type=='point'"))} points, and {len(df_full_annotation_data.query("type=='polyline'"))} polygons"""
    )

    return df_full_annotation_data

`dsa_upload` ¶

`__upload_annotation_to_dsa(gc, dsa_endpoint_url, annotation_file_urlpath, collection_name, image_filename, force=False, storage_options={})` ¶

Upload annotation to DSA

Upload json annotation file as a new annotation to the image in the DSA collection.

Parameters:

Name	Type	Description	Default
`dsa_endpoint_url`	`string`	DSA API endpoint e.g. http://localhost:8080/api/v1	required
`annotation_file_urlpath`	`string`	URL/path to a DSA annotation json file	required
`collection_name`	`string`	name of the collection in DSA	required
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	required
`username`	`string`	DSA username	required
`password`	`string`	DSA password	required
`storage_options`	`dict`	options to pass to reading functions	`{}`

Returns:

Name	Type	Description
`dict`		item_uuid. None if item doesn't exist

Source code in src/luna/pathology/cli/dsa_upload.py

def __upload_annotation_to_dsa(
    gc: girder_client.GirderClient,
    dsa_endpoint_url: str,
    annotation_file_urlpath: str,
    collection_name: str,
    image_filename: str,
    force: bool = False,
    storage_options: dict = {},
):
    """Upload annotation to DSA

    Upload json annotation file as a new annotation to the image in the DSA collection.

    Args:
        dsa_endpoint_url (string): DSA API endpoint e.g. http://localhost:8080/api/v1
        annotation_file_urlpath (string): URL/path to a DSA annotation json file
        collection_name (string): name of the collection in DSA
        image_filename (string): name of the image file in DSA e.g. 123.svs
        username (string): DSA username
        password (string): DSA password
        storage_options (dict): options to pass to reading functions

    Returns:
        dict: item_uuid. None if item doesn't exist
    """

    with open(annotation_file_urlpath, **storage_options).open() as annotation_json:
        dsa_annotation = json.load(annotation_json)

    if not force:
        slide_annotation = get_slide_annotation(
            image_filename, dsa_annotation["name"], collection_name, gc
        )
        if slide_annotation:
            logger.info(
                f"Found {slide_annotation[1]['annotation_id']}: slide {image_filename} in collection {collection_name} already has an annotation named {dsa_annotation['name']}"
            )
            return slide_annotation[1]["annotation_id"]

    dsa_uuid = get_item_uuid(gc, image_filename, collection_name)

    if dsa_uuid:
        dsa_uuid = push_annotation_to_dsa_image(
            dsa_uuid,
            annotation_file_urlpath,
            dsa_endpoint_url[:-6],
            gc,
            storage_options,
        )

    return dsa_uuid

`cli(dsa_endpoint_url='???', annotation_file_urlpath='', annotation_file_list_urlpath='', collection_name='???', image_filename='', username='${oc.env:DSA_USERNAME}', password='${oc.env:DSA_PASSWORD}', force=False, insecure=False, storage_options={}, local_config='')` ¶

Upload annotation to DSA

Upload json annotation file as a new annotation to the image in the DSA collection.

Parameters:

Name	Type	Description	Default
`dsa_endpoint_url`	`string`	DSA API endpoint e.g. http://localhost:8080/api/v1	`'???'`
`annotation_file_urlpath`	`string`	URL/path to a DSA annotation json file	`''`
`annotation_file_list_urlpath`	`string`	URL/path to a DSA annotation json file	`''`
`collection_name`	`string`	name of the collection in DSA	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath	`''`
`username`	`string`	DSA username (defaults to environment variable DSA_USERNAME)	`'${oc.env:DSA_USERNAME}'`
`password`	`string`	DSA password (defaults to environment variable DSA_PASSWORD)	`'${oc.env:DSA_PASSWORD}'`
`force`	`bool`	upload even if annotation with same name exists for the slide	`False`
`insecure`	`bool`	insecure ssl	`False`
`storage_options`	`dict`	options to pass to reading functions	`{}`
`local_config`	`string`	local config yaml url/path	`''`

Returns:

Name	Type	Description
`dict`		metadata

Source code in src/luna/pathology/cli/dsa_upload.py

@timed
@save_metadata
def cli(
    dsa_endpoint_url: str = "???",
    annotation_file_urlpath: str = "",
    annotation_file_list_urlpath: str = "",
    collection_name: str = "???",
    image_filename: str = "",
    username: str = "${oc.env:DSA_USERNAME}",
    password: str = "${oc.env:DSA_PASSWORD}",
    force: bool = False,
    insecure: bool = False,
    storage_options: dict = {},
    local_config: str = "",
):
    """Upload annotation to DSA

    Upload json annotation file as a new annotation to the image in the DSA collection.

    Args:
        dsa_endpoint_url (string): DSA API endpoint e.g. http://localhost:8080/api/v1
        annotation_file_urlpath (string): URL/path to a DSA annotation json file
        annotation_file_list_urlpath (string): URL/path to a DSA annotation json file
        collection_name (string): name of the collection in DSA
        image_filename (string): name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath
        username (string): DSA username (defaults to environment variable DSA_USERNAME)
        password (string): DSA password (defaults to environment variable DSA_PASSWORD)
        force (bool): upload even if annotation with same name exists for the slide
        insecure (bool): insecure ssl
        storage_options (dict): options to pass to reading functions
        local_config (string): local config yaml url/path

    Returns:
        dict: metadata
    """
    config = get_config(vars())

    if (
        not config["annotation_file_urlpath"]
        and not config["annotation_file_list_urlpath"]
    ):
        raise fire.core.FireError(
            "Specify either annotation_file_urlpath or annotation_file_list_urlpath"
        )

    annotation_file_urlpaths = []
    if config["annotation_file_urlpath"]:
        annotation_file_urlpaths.append(config["annotation_file_urlpath"])
    if config["annotation_file_list_urlpath"]:
        with open(config["annotation_file_list_urlpath"], "r") as of:
            data = of.read()
            annotation_file_urlpaths += data.split("\n")

    uuids = []
    for idx, annotation_file_urlpath in enumerate(annotation_file_urlpaths):
        logger.info(
            f"Uploading {annotation_file_urlpath}: {idx+1}/{len(annotation_file_urlpaths)}"
        )
        image_filename = config["image_filename"]
        if not image_filename:
            image_filename = Path(annotation_file_urlpath).with_suffix(".svs").name
            image_filename = re.sub(".*_", "", image_filename)
            if not image_filename:
                raise ValueError(
                    f"Unable to infer image_filename from {annotation_file_urlpath}"
                )
            logger.info(f"Image filename inferred as {image_filename}")
        dsa_uuid = _upload_annotation_to_dsa(
            config["dsa_endpoint_url"],
            annotation_file_urlpath,
            config["collection_name"],
            image_filename,
            config["username"],
            config["password"],
            config["force"],
            config["insecure"],
            config["storage_options"],
        )
        logger.info(f"Uploaded item to {dsa_uuid}")
        if dsa_uuid:
            uuids.append(dsa_uuid)

    return {"item_uuids": uuids}

`upload_annotation_to_dsa(dsa_endpoint_url, slide_manifest, annotation_column, collection_name, image_filename, username, password, force=False, insecure=False, storage_options={})` ¶

Upload annotation to DSA

Upload json annotation file as a new annotation to the image in the DSA collection.

Parameters:

Name	Type	Description	Default
`dsa_endpoint_url`	`string`	DSA API endpoint e.g. http://localhost:8080/api/v1	required
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`annotation_column`	`string`	annotation column of slide_manifest containing the dsa url	required
`collection_name`	`string`	name of the collection in DSA	required
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath	required
`username`	`string`	DSA username (defaults to environment variable DSA_USERNAME)	required
`password`	`string`	DSA password (defaults to environment variable DSA_PASSWORD)	required
`force`	`bool`	upload even if annotation with same name exists for the slide	`False`
`insecure`	`bool`	insecure ssl	`False`
`storage_options`	`dict`	options to pass to reading functions	`{}`

Returns:

Type	Description
	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/dsa_upload.py

def upload_annotation_to_dsa(
    dsa_endpoint_url: str,
    slide_manifest: DataFrame[SlideSchema],
    annotation_column: str,
    collection_name: str,
    image_filename: str,
    username: str,
    password: str,
    force: bool = False,
    insecure: bool = False,
    storage_options: dict = {},
):
    """Upload annotation to DSA

    Upload json annotation file as a new annotation to the image in the DSA collection.

    Args:
        dsa_endpoint_url (string): DSA API endpoint e.g. http://localhost:8080/api/v1
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        annotation_column (string): annotation column of slide_manifest containing the dsa url
        collection_name (string): name of the collection in DSA
        image_filename (string): name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath
        username (string): DSA username (defaults to environment variable DSA_USERNAME)
        password (string): DSA password (defaults to environment variable DSA_PASSWORD)
        force (bool): upload even if annotation with same name exists for the slide
        insecure (bool): insecure ssl
        storage_options (dict): options to pass to reading functions

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    uuids = []
    for _, slide in slide_manifest.iterrows():
        uuids = _upload_annotation_to_dsa(
            dsa_endpoint_url,
            slide[annotation_column],
            collection_name,
            image_filename,
            username,
            password,
            force,
            insecure,
            storage_options,
        )
        uuids.append(uuids[0])
    return slide_manifest.assign(**{annotation_column: uuids})

`dsa_viz` ¶

`__bmp_polygon(input_urlpath, output_urlpath, image_filename, label_map, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={})` ¶

Build DSA annotation json from a BMP with multiple labels.

Vectorizes and simplifies contours per label.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	url/path to bmp file	required
`label_map`	`dict[int, str]`	map of label number to label name	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict[str, str]`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict[str, str]`	fill color map with {feature name:rgba values}	`None`
`scale_factor`	`int`	scale to match image DSA.	`1`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`

Returns:

Name	Type	Description
`dict`		DSA annotation

Source code in src/luna/pathology/cli/dsa_viz.py

def __bmp_polygon(
    input_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    label_map: Dict[int, str],
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    scale_factor: Optional[int] = 1,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Build DSA annotation json from a BMP with multiple labels.

    Vectorizes and simplifies contours per label.

    Args:
        input_urlpath (string): url/path to bmp file
        label_map (dict[int,str]): map of label number to label name
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict[str,str], optional): line color map with {feature name:rgb values}
        fill_colors (dict[str,str], optional): fill color map with {feature name:rgba values}
        scale_factor (int, optional): scale to match image DSA.
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions

    Returns:
        dict: DSA annotation
    """
    elements = []
    Image.MAX_IMAGE_PIXELS = 5000000000
    with open(input_urlpath, **storage_options).open() as of:
        annotation = Image.open(of)
    arr = np.array(annotation)

    for label_num, label_name in label_map.items():
        simplified_contours = vectorize_np_array_bitmask_by_pixel_value(
            arr, label_num, scale_factor=scale_factor
        )

        for n, contour in enumerate(simplified_contours):
            element = copy.deepcopy(base_dsa_polygon_element)
            element["label"]["value"] = label_name
            if fill_colors and label_name in fill_colors:
                element["fillColor"] = fill_colors[label_name]
            if line_colors and label_name in line_colors:
                element["lineColor"] = line_colors[label_name]

            coords = contour.tolist()
            for c in coords:
                c.append(0)
            element["points"] = coords
            elements.append(element)

    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        storage_options,
    )

`__heatmap(input_urlpath, output_urlpath, image_filename, annotation_name, column, tile_size, scale_factor=None, fill_colors=None, line_colors=None, storage_options={}, output_storage_options={})` ¶

Generate heatmap based on the tile scores

Creates a heatmap for the given column, using the color palette viridis to set a fill value - the color ranges from purple to yellow, for scores from 0 to 1.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	url/path to parquet with tile scores	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`column`	`list[string]`	columns to visualize e.g. tile_score	required
`tile_size`	`int`	size of tiles	required
`scale_factor`	`int`	scale to match the image on DSA.	`None`
`fill_colors`	`Optional[dict[str, str]]`	fill color map with {feature name:rgba values}	`None`
`line_colors`	`Optional[dict[str, str]]`	line color map with {feature name:rgb values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`

Returns:

Name	Type	Description
`dict`		DSA annotation

Source code in src/luna/pathology/cli/dsa_viz.py

def __heatmap(
    input_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    column: List[str],
    tile_size: int,
    scale_factor: Optional[int] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    line_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Generate heatmap based on the tile scores

    Creates a heatmap for the given column, using the color palette `viridis`
    to set a fill value
    - the color ranges from purple to yellow, for scores from 0 to 1.

    Args:
        input_urlpath (string): url/path to parquet with tile scores
        annotation_name (string): name of the annotation to be displayed in DSA
        column (list[string]): columns to visualize e.g. tile_score
        tile_size (int): size of tiles
        scale_factor (int, optional): scale to match the image on DSA.
        fill_colors (Optional[dict[str,str]]): fill color map with {feature name:rgba values}
        line_colors (Optional[dict[str,str]]): line color map with {feature name:rgb values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions

    Returns:
        dict: DSA annotation
    """
    if type(column) == str:
        column = [column]

    with open(input_urlpath, **storage_options) as of:
        df = pd.read_parquet(of).reset_index()
    scaled_tile_size = int(tile_size * int(scale_factor if scale_factor else 1))

    elements = []
    for _, row in df.iterrows():
        element = copy.deepcopy(base_dsa_polygon_element)

        # get label specific color and add to elements
        if len(column) == 1:
            label = row[column[0]]
            element["label"]["value"] = str(label)
        else:
            label = pd.to_numeric(row[column]).idxmax()
            element["label"]["value"] = str(label)

        if fill_colors and label in fill_colors:
            element["fillColor"] = fill_colors[label]
        if line_colors and label in line_colors:
            element["lineColor"] = line_colors[label]

        # convert coordinate string to tuple using eval
        x, y = address_to_coord(row["address"])

        pixel_x = x * scaled_tile_size
        pixel_y = y * scaled_tile_size

        coords = [
            [pixel_x, pixel_y],
            [pixel_x + scaled_tile_size, pixel_y],
            [pixel_x + scaled_tile_size, pixel_y + scaled_tile_size],
            [pixel_x, pixel_y + scaled_tile_size],
            [pixel_x, pixel_y],
        ]
        for c in coords:
            c.append(0)
        element["points"] = coords
        elements.append(element)

    if len(column) == 1:
        annotation_name = column[0] + "_" + annotation_name

    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        output_storage_options,
    )

`__qupath_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, classes_to_include, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

Build DSA annotation json from Qupath polygon geojson

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	url/path of Qupath polygon geojson	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`classes_to_include`	`list`	list of classification labels to visualize	required
`line_colors`	`map`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`map`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`

Returns:

Name	Type	Description
`dict`		dsa annotation

Source code in src/luna/pathology/cli/dsa_viz.py

def __qupath_polygon(
    input_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    classes_to_include: List,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Build DSA annotation json from Qupath polygon geojson

    Args:
        input_urlpath (string): url/path of Qupath polygon geojson
        annotation_name (string): name of the annotation to be displayed in DSA
        classes_to_include (list): list of classification labels to visualize
        e.g. ["Tumor", "Stroma", ...]
        line_colors (map, optional): line color map with {feature name:rgb values}
        fill_colors (map, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions

    Returns:
        dict: dsa annotation
    """
    regional_file = open(input_urlpath, "r", **storage_options)
    with regional_file.open() as of:
        pixel_clf_polygons = geojson.load(of)

    feature_iter = iter(pixel_clf_polygons)
    if type(pixel_clf_polygons) == geojson.feature.FeatureCollection:
        feature_iter = iter(pixel_clf_polygons.features)

    elements = []
    for polygon in feature_iter:
        props = polygon.properties
        if "classification" not in props:
            continue

        label_name = polygon.properties["classification"]["name"]
        if label_name in classes_to_include:
            element = copy.deepcopy(base_dsa_polygon_element)
            element["label"]["value"] = label_name
            if fill_colors and label_name in fill_colors:
                element["fillColor"] = fill_colors[label_name]
            if line_colors and label_name in line_colors:
                element["lineColor"] = line_colors[label_name]

            coords = polygon["geometry"]["coordinates"]

            # uneven nesting of connected components
            for coord in coords:
                if isinstance(coord[0], list) and isinstance(coord[0][0], (int, float)):
                    for c in coord:
                        c.append(0)
                    element["points"] = coord
                    elements.append(element)
                else:
                    for i in range(len(coord)):
                        connected_component_coords = coord[i]
                        connected_component_element = copy.deepcopy(element)
                        for c in connected_component_coords:
                            c.append(0)

                        connected_component_element[
                            "points"
                        ] = connected_component_coords
                        elements.append(connected_component_element)
    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        output_storage_options,
    )

`__regional_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

Build DSA annotation json from regional annotation geojson

Parameters:

Name	Type	Description	Default
`input`	`string`	path to regional annotation geojson	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read/write functions	`{}`

Returns:

Name	Type	Description
`dict`		DSA annotation

Source code in src/luna/pathology/cli/dsa_viz.py

def __regional_polygon(
    input_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Build DSA annotation json from regional annotation geojson

    Args:
        input (string): path to regional annotation geojson
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read/write functions

    Returns:
        dict: DSA annotation
    """
    with open(input_urlpath, **storage_options).open() as regional_file:
        regional_annotation = geojson.loads(geojson.load(regional_file))

    elements = []
    for annot in regional_annotation["features"]:
        # get label name and add to element
        element = copy.deepcopy(base_dsa_polygon_element)
        label_name = annot.properties["label_name"]
        element["label"]["value"] = label_name
        if fill_colors and label_name in fill_colors:
            element["fillColor"] = fill_colors[label_name]
        if line_colors and label_name in line_colors:
            element["lineColor"] = line_colors[label_name]

        # add coordinates
        coords = annot["geometry"]["coordinates"]
        # if coordinates have extra nesting, set coordinates to 2d array.
        coords_arr = np.array(coords)
        if coords_arr.ndim == 3 and coords_arr.shape[0] == 1:
            coords = np.squeeze(coords_arr).tolist()

        for c in coords:
            c.append(0)
        element["points"] = coords
        elements.append(element)

    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        output_storage_options,
    )

`__stardist_cell(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

Build DSA annotation json from TSV classification data generated by stardist

Processes a cell classification data generated by Qupath/stardist and adds the center coordinates of the cells as annotation elements.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	url/path to TSV classification data generated by stardist	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read/write functions	`{}`

Returns:

Name	Type	Description
`dict`		dsa annotation

Source code in src/luna/pathology/cli/dsa_viz.py

def __stardist_cell(
    input_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
):
    """Build DSA annotation json from TSV classification data generated by
    stardist

    Processes a cell classification data generated by Qupath/stardist and
    adds the center coordinates of the cells
    as annotation elements.

    Args:
        input_urlpath (string): url/path to TSV classification data generated by stardist
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read/write functions

    Returns:
        dict: dsa annotation
    """
    # qupath_stardist_cell_tsv can be quite large to load all columns
    # into memory (contains many feature columns),
    # so only load baisc columns that are needed for now
    cols_to_load = [
        "Image",
        "Name",
        "Class",
        "Centroid X µm",
        "Centroid Y µm",
    ]
    df = pd.read_csv(
        input_urlpath,
        sep="\t",
        usecols=cols_to_load,
        index_col=False,
        storage_options=storage_options,
    )

    # do some preprocessing on the tsv -- e.g. stardist sometimes finds
    # cells in glass
    # df = df[df["Parent"] != "Glass"]
    df = df.dropna(subset=["Centroid X µm", "Centroid Y µm"])
    # populate json elements
    elements = []
    for idx, row in df.iterrows():
        elements_entry = copy.deepcopy(base_dsa_point_element)

        # x,y coordinates from stardist are in microns so divide by
        # QUPATH_MAG_FACTOR = 0.5011 (exact 20x mag factor used by qupath
        # specifically)
        x = row["Centroid X µm"] / QUPATH_MAG_FACTOR
        y = row["Centroid Y µm"] / QUPATH_MAG_FACTOR

        # Get cell label and add to element
        label_name = row["Class"]
        elements_entry["label"]["value"] = label_name
        if fill_colors and label_name in fill_colors:
            elements_entry["fillColor"] = fill_colors[label_name]
        if line_colors and label_name in line_colors:
            elements_entry["lineColor"] = line_colors[label_name]

        # add centroid coordinate of cell to element
        center = [x, y, 0]
        elements_entry["center"] = center

        elements.append(elements_entry)

    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        output_storage_options,
    )

`__stardist_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

Build DSA annotation from stardist geojson classification results

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path to stardist geojson classification results	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict[str, str]`	user-provided line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict[str, str]`	user-provided fill color map with {feature name:rgba values}	`None`

Returns:

Type	Description
	dict[str,str]: annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

def __stardist_polygon(
    input_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Build DSA annotation from stardist geojson classification results

    Args:
        input_urlpath (string): URL/path to stardist geojson classification results
        json
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict[str,str]): user-provided line color map with {feature name:rgb values}
        fill_colors (dict[str,str]): user-provided fill color map with {feature name:rgba values}

    Returns:
        dict[str,str]: annotation file path
    """
    # TODO: find better fix
    # can't handle NaNs for vectors, do this to replace all NaNs
    # for now: https://stackoverflow.com/questions/17140886/how-to-search
    # -and-replace-text-in-a-file
    with open(input_urlpath, "r", **storage_options).open() as input_file:
        filedata = input_file.read()
    newdata = filedata.replace("NaN", "-1")

    elements = []
    for cell in ijson.items(newdata, "item"):
        label_name = cell["properties"]["classification"]["name"]
        coord_list = list(cell["geometry"]["coordinates"][0])

        # uneven nested list when iterative parsing of json --> make sure
        # to get the list of coords
        # this can come as mixed types as well, so type checking needed
        while (
            isinstance(coord_list, list)
            and isinstance(coord_list[0], list)
            and not isinstance(coord_list[0][0], (int, float, Decimal))
        ):
            coord_list = coord_list[0]

        coords = [[float(coord[0]), float(coord[1]), 0] for coord in coord_list]
        element = copy.deepcopy(base_dsa_polygon_element)

        element["label"]["value"] = str(label_name)
        if fill_colors and label_name in fill_colors:
            element["fillColor"] = fill_colors[label_name]
        if line_colors and label_name in line_colors:
            element["lineColor"] = line_colors[label_name]
        element["points"] = coords

        elements.append(element)

    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        output_storage_options,
    )

`__stardist_polygon_tile(object_urlpath, tiles_urlpath, output_urlpath, image_filename, annotation_name_prefix, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

Build DSA annotation json from stardist geojson classification and labeled tiles

Parameters:

Name	Type	Description	Default
`object_urlpath`	`string`	URL/path to stardist geojson classification results	required
`tiles_urlpath`	`string`	URL/path to tiles manifest parquet	required
`output_urlpath`	`string`	URL/path prefix to save annotations	required
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	required
`annotation_name_prefix`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	user-provided line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	user-provided fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`

Returns:

Name	Type	Description
`dict`		DSA annotations

Source code in src/luna/pathology/cli/dsa_viz.py

def __stardist_polygon_tile(
    object_urlpath: str,
    tiles_urlpath: str,
    output_urlpath: str,
    image_filename: str,
    annotation_name_prefix: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Build DSA annotation json from stardist geojson classification and labeled tiles

    Args:
        object_urlpath (string): URL/path to stardist geojson classification results
        tiles_urlpath (string): URL/path to tiles manifest parquet
        output_urlpath (string): URL/path prefix to save annotations
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name_prefix (string): name of the annotation to be displayed in DSA
        line_colors (dict): user-provided line color map with {feature name:rgb values}
        fill_colors (dict): user-provided fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions

    Returns:
        dict: DSA annotations
    """
    with open(tiles_urlpath, **storage_options) as of:
        tiles_df = pd.read_parquet(of)
    LabeledTileSchema.validate(tiles_df.reset_index())
    logger.info(f"Read tiles manifest with {len(tiles_df)} tiles")

    with open(object_urlpath, **storage_options) as of:
        object_gdf = gpd.read_file(of)

    logger.info(f"Read {len(object_gdf)} stardist objects")

    ann_region_polygons = [
        box(
            row.x_coord,
            row.y_coord,
            row.x_coord + row.xy_extent,
            row.y_coord + row.xy_extent,
        )
        for _, row in tiles_df.iterrows()
    ]
    tiles_gdf = gpd.GeoDataFrame(
        data=tiles_df, geometry=ann_region_polygons, crs="EPSG:4326"
    )

    object_tiles = object_gdf.sjoin(tiles_gdf, how="left", predicate="within")
    logger.info("Spatially joined stardist objects with tiles manifest")
    tile_elements = {}
    for _, row in object_tiles.iterrows():
        tile_label = row["Classification"]
        if pd.isnull(tile_label):
            tile_label = "unclassified"

        if tile_label not in tile_elements.keys():
            tile_elements[tile_label] = []

        label_name = row["classification"]["name"]
        multipolygon = row["geometry"]
        if type(multipolygon) != MultiPolygon:
            multipolygon = MultiPolygon([multipolygon])
        for polygon in list(multipolygon.geoms):
            coord_list = list(polygon.exterior.coords)

            coords = [[float(coord[0]), float(coord[1]), 0] for coord in coord_list]
            element = copy.deepcopy(base_dsa_polygon_element)

            element["label"]["value"] = str(label_name)
            if fill_colors and label_name in fill_colors:
                element["fillColor"] = fill_colors[label_name]
            if line_colors and label_name in line_colors:
                element["lineColor"] = line_colors[label_name]
            element["points"] = coords

            tile_elements[tile_label].append(element)

    metadata = {}
    for tile_label, elements in tile_elements.items():
        dsa_annotation = get_dsa_annotation(
            elements, annotation_name_prefix + "_" + tile_label
        )
        annotation_filepath = save_dsa_annotation(
            dsa_annotation,
            output_urlpath,
            image_filename,
            output_storage_options,
        )
        metadata[tile_label] = annotation_filepath

    return metadata

`bitmask_polygon(input_map, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={})` ¶

Build DSA annotation json from bitmask PNGs

Vectorizes and simplifies contours from the bitmask.

Parameters:

Name	Type	Description	Default
`input`	`map`	map of {label:urlpath_to_bitmask_png}	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`scale_factor`	`int`	scale to match the image on DSA.	`1`
`storage_options`	`dict`	storage options to pass to read/write functions	`{}`

Returns:

Name	Type	Description
`dict`		DSA annotation

Source code in src/luna/pathology/cli/dsa_viz.py

def bitmask_polygon(
    input_map: Dict[str, str],
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    scale_factor: Optional[int] = 1,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Build DSA annotation json from bitmask PNGs

    Vectorizes and simplifies contours from the bitmask.

    Args:
        input (map): map of {label:urlpath_to_bitmask_png}
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        scale_factor (int, optional): scale to match the image on DSA.
        storage_options (dict): storage options to pass to read/write functions

    Returns:
        dict: DSA annotation
    """
    if not check_filepaths_valid(input_map.values(), storage_options):
        raise ValueError("No valid PNG masks found. Exiting..")

    elements = []
    for bitmask_label, bitmask_filepath in input_map.items():
        Image.MAX_IMAGE_PIXELS = 5000000000
        with open(bitmask_filepath, "rb", **storage_options).open() as of:
            annotation = Image.open(of)
            bitmask_np = np.array(annotation)
        simplified_contours = vectorize_np_array_bitmask_by_pixel_value(
            bitmask_np, scale_factor=scale_factor
        )

        for n, contour in enumerate(simplified_contours):
            element = copy.deepcopy(base_dsa_polygon_element)
            label_name = bitmask_label
            element["label"]["value"] = label_name
            if fill_colors and label_name in fill_colors:
                element["fillColor"] = fill_colors[label_name]
            if line_colors and label_name in line_colors:
                element["lineColor"] = line_colors[label_name]

            coords = contour.tolist()
            for c in coords:
                c.append(0)
            element["points"] = coords
            elements.append(element)

    dsa_annotation = get_dsa_annotation(elements, annotation_name)
    return save_dsa_annotation(
        dsa_annotation,
        output_urlpath,
        image_filename,
        output_storage_options,
    )

`bitmask_polygon_cli(input_map='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, scale_factor=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from bitmask PNGs

Vectorizes and simplifies contours from the bitmask.

Parameters:

Name	Type	Description	Default
`input_map`	`map`	map of {label:path_to_bitmask_png}	`'???'`
`output_urlpath`	`string`	url/path to save the DSA compatible annotation	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`scale_factor`	`int`	scale to match the image on DSA.	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`local_config`	`string`	local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def bitmask_polygon_cli(
    input_map: Dict[str, str] = "???",  # type: ignore
    output_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name: str = "???",
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    scale_factor: Optional[int] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from bitmask PNGs

    Vectorizes and simplifies contours from the bitmask.

    Args:
        input_map (map): map of {label:path_to_bitmask_png}
        output_urlpath (string): url/path to save the DSA compatible annotation
        json
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        scale_factor (int, optional): scale to match the image on DSA.
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (string): local config yaml file

    Returns:
        dict: annotation file path
    """
    config = get_config(vars())
    annotation_filepath = bitmask_polygon(
        config["input_map"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name"],
        config["line_colors"],
        config["fill_colors"],
        config["scale_factor"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return {"dsa_annotation": annotation_filepath}

`bmp_polygon(slide_manifest, output_urlpath, label_map, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}, annotation_column='bmp_polygon_url', output_column='bmp_polygon_dsa_url')` ¶

Build DSA annotation json from a BMP with multiple labels.

Vectorizes and simplifies contours per label.

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`output_urlpath`	`string`	url/path prefix to save the DSA compatible annotation	required
`label_map`	`dict[int, str]`	map of label number to label name	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict[str, str]`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict[str, str]`	fill color map with {feature name:rgba values}	`None`
`scale_factor`	`int`	scale to match image DSA.	`1`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`annotation_column`	`string`	column containing url to BMP polygon	`'bmp_polygon_url'`
`output_column_suffix`	`string`	column suffix with result url to add to slide_manifest	required

Returns:

Name	Type	Description
`dict`		annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

def bmp_polygon(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    label_map: Dict[int, str],
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    scale_factor: Optional[int] = 1,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    annotation_column: str = "bmp_polygon_url",
    output_column: str = "bmp_polygon_dsa_url",
):
    """Build DSA annotation json from a BMP with multiple labels.

    Vectorizes and simplifies contours per label.

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (string): url/path prefix to save the DSA compatible annotation
        json
        label_map (dict[int,str]): map of label number to label name
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict[str,str], optional): line color map with {feature name:rgb values}
        fill_colors (dict[str,str], optional): fill color map with {feature name:rgba values}
        scale_factor (int, optional): scale to match image DSA.
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        annotation_column (string): column containing url to BMP polygon
        output_column_suffix (string): column suffix with result url to add to slide_manifest

    Returns:
        dict: annotation file path
    """
    if annotation_column not in slide_manifest.columns:
        raise ValueError(f"{annotation_column} not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __bmp_polygon,
            row[annotation_column],
            output_urlpath,
            image_filename,
            label_map,
            annotation_name,
            line_colors,
            fill_colors,
            scale_factor,
            storage_options,
            output_storage_options,
        )
        futures.append(future)
    progress(futures)
    dsa_annotation_urls = client.gather(futures)
    return slide_manifest.assign(**{output_column: dsa_annotation_urls})

`bmp_polygon_cli(input_urlpath='???', output_urlpath='???', label_map='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from a BMP with multiple labels.

Vectorizes and simplifies contours per label.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	url/path to bmp file	`'???'`
`output_urlpath`	`string`	url/path prefix to save the DSA compatible annotation	`'???'`
`label_map`	`dict[int, str]`	map of label number to label name	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`line_colors`	`dict[str, str]`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict[str, str]`	fill color map with {feature name:rgba values}	`None`
`scale_factor`	`int`	scale to match image DSA.	`1`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`

Returns:

Name	Type	Description
`dict`		annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def bmp_polygon_cli(
    input_urlpath: str = "???",
    output_urlpath: str = "???",
    label_map: Dict[int, str] = "???",  # type: ignore
    image_filename: str = "???",
    annotation_name: str = "???",
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    scale_factor: Optional[int] = 1,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from a BMP with multiple labels.

    Vectorizes and simplifies contours per label.

    Args:
        input_urlpath (string): url/path to bmp file
        output_urlpath (string): url/path prefix to save the DSA compatible annotation
        json
        label_map (dict[int,str]): map of label number to label name
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict[str,str], optional): line color map with {feature name:rgb values}
        fill_colors (dict[str,str], optional): fill color map with {feature name:rgba values}
        scale_factor (int, optional): scale to match image DSA.
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions

    Returns:
        dict: annotation file path
    """
    config = get_config(vars())
    annotation_filepath = __bmp_polygon(
        config["input_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["label_map"],
        config["annotation_name"],
        config["line_colors"],
        config["fill_colors"],
        config["scale_factor"],
        config["storage_options"],
        config["output_storage_options"],
    )

    return {"dsa_annotation": annotation_filepath}

`check_filepaths_valid(urls, storage_options)` ¶

Checks if all paths exist.

Parameters:

Name	Type	Description	Default
`filepaths`	`list`	file paths	required

Returns:

Name	Type	Description
`bool`		True if all file paths exist, False otherwise

Source code in src/luna/pathology/cli/dsa_viz.py

def check_filepaths_valid(urls, storage_options):
    """Checks if all paths exist.

    Args:
        filepaths (list): file paths

    Returns:
        bool: True if all file paths exist, False otherwise
    """

    all_files_found = True
    for url in urls:
        fs, urlpath = fsspec.core.url_to_fs(url, **storage_options)
        if not fs.exists(urlpath):
            logger.warning(f"url in config: {url} does not exist")
            all_files_found = False
    return all_files_found

`get_dsa_annotation(elements, annotation_name, description='')` ¶

Helper function to get dsa annotation

Parameters:

Name	Type	Description	Default
`elements`	`list`	list of annotation elements	required
`annotation_name`	`string`	annotation name for HistomicsUI	required
`image_filename`	`string`	name of the image in DSA e.g. 123.svs	required

Returns:

Name	Type	Description
`string`		annotation file path. None if error in writing the file.

Source code in src/luna/pathology/cli/dsa_viz.py

def get_dsa_annotation(elements: list, annotation_name: str, description: str = ""):
    """Helper function to get dsa annotation

    Args:
        elements (list): list of annotation elements
        annotation_name (string): annotation name for HistomicsUI
        image_filename (string): name of the image in DSA e.g. 123.svs

    Returns:
        string: annotation file path. None if error in writing the file.
    """
    dsa_annotation = {
        "description": description,
        "elements": elements,
        "name": annotation_name,
    }

    dsa_annotation["elements"] = elements
    dsa_annotation["name"] = annotation_name

    return dsa_annotation

`heatmap(slide_manifest, output_urlpath, annotation_name, column, tile_size, scale_factor=None, fill_colors=None, line_colors=None, output_column='', storage_options={}, output_storage_options={})` ¶

Generate heatmap based on the tile scores

Creates a heatmap for the given column, using the color palette viridis to set a fill value - the color ranges from purple to yellow, for scores from 0 to 1.

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`output_urlpath`	`string`	URL/path prefix to save the DSA compatible annotation	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`column`	`string`	column to visualize e.g. tile_score	required
`tile_size`	`int`	size of tiles	required
`scale_factor`	`int`	scale to match the image on DSA.	`None`
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`

Returns:

Name	Type	Description
`dict`		annotation file path. None if error in writing the file.

Source code in src/luna/pathology/cli/dsa_viz.py

def heatmap(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    annotation_name: str,
    column: List[str],
    tile_size: int,
    scale_factor: Optional[int] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    line_colors: Optional[Dict[str, str]] = None,
    output_column: str = "",
    storage_options: Dict = {},
    output_storage_options: Dict = {},
):
    """Generate heatmap based on the tile scores

    Creates a heatmap for the given column, using the color palette `viridis`
    to set a fill value
    - the color ranges from purple to yellow, for scores from 0 to 1.

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (string): URL/path prefix to save the DSA compatible annotation
        json
        annotation_name (string): name of the annotation to be displayed in DSA
        column (string): column to visualize e.g. tile_score
        tile_size (int): size of tiles
        scale_factor (int, optional): scale to match the image on DSA.
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions

    Returns:
        dict: annotation file path. None if error in writing the file.
    """
    if not output_column:
        output_column = f"{annotation_name}_dsa_url"
    if "tiles_url" not in slide_manifest.columns:
        raise ValueError("tiles_url not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __heatmap,
            row["tiles_url"],
            output_urlpath,
            image_filename,
            annotation_name,
            column,
            tile_size,
            scale_factor,
            fill_colors,
            line_colors,
            storage_options,
            output_storage_options,
        )

        futures.append(future)
    progress(futures)
    dsa_annotation_urls = client.gather(futures)
    return slide_manifest.assign(**{output_column: dsa_annotation_urls})

`heatmap_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', column='???', tile_size='???', scale_factor=1, fill_colors=None, line_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Generate heatmap based on the tile scores

Creates a heatmap for the given column, using the color palette viridis to set a fill value - the color ranges from purple to yellow, for scores from 0 to 1.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path to parquet with tile scores	`'???'`
`output_urlpath`	`string`	URL/path prefix to save the DSA compatible annotation	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`column`	`string`	column to visualize e.g. tile_score	`'???'`
`tile_size`	`int`	size of tiles	`'???'`
`scale_factor`	`int`	scale to match the image on DSA.	`1`
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to write functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`local_config`	`string`	local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		annotation file path. None if error in writing the file.

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def heatmap_cli(
    input_urlpath: str = "???",
    output_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name: str = "???",
    column: str = "???",
    tile_size: int = "???",  # type: ignore
    scale_factor: Optional[int] = 1,
    fill_colors: Optional[dict[str, str]] = None,
    line_colors: Optional[dict[str, str]] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Generate heatmap based on the tile scores

    Creates a heatmap for the given column, using the color palette `viridis`
    to set a fill value
    - the color ranges from purple to yellow, for scores from 0 to 1.

    Args:
        input_urlpath (string): URL/path to parquet with tile scores
        output_urlpath (string): URL/path prefix to save the DSA compatible annotation
        json
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        column (string): column to visualize e.g. tile_score
        tile_size (int): size of tiles
        scale_factor (int, optional): scale to match the image on DSA.
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to write functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (string): local config yaml file

    Returns:
        dict: annotation file path. None if error in writing the file.
    """
    config = get_config(vars())
    annotation_filepath = __heatmap(
        config["input_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name"],
        config["column"],
        config["tile_size"],
        config["scale_factor"],
        config["fill_colors"],
        config["line_colors"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return {"dsa_annotation": annotation_filepath}

`qupath_polygon(slide_manifest, output_urlpath, image_filename, annotation_name, classes_to_include, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

Build DSA annotation json from Qupath polygon geojson

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`output_urlpath`	`string`	URL/path prefix for saving the DSA compatible annotation	required
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`classes_to_include`	`list`	list of classification labels to visualize	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`annotation_column`	`string`	column containing url to qupath geojson	`''`
`output_column_suffix`	`string`	column suffix with result url to add to slide_manifest	required

Returns:

Type	Description
	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/dsa_viz.py

def qupath_polygon(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    image_filename: str,
    annotation_name: str,
    classes_to_include: List,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    annotation_column: str = "",
    output_column: str = "",
):
    """Build DSA annotation json from Qupath polygon geojson

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (string): URL/path prefix for saving the DSA compatible annotation
        json
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        classes_to_include (list): list of classification labels to visualize
        e.g. ["Tumor", "Stroma", ...]
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        annotation_column (string): column containing url to qupath geojson
        output_column_suffix (string): column suffix with result url to add to slide_manifest

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    if not annotation_column:
        annotation_column = f"{annotation_name}_geojson_url"
    if not output_column:
        output_column = f"{annotation_name}_dsa_url"
    if annotation_column not in slide_manifest.columns:
        raise ValueError(f"{annotation_column} not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __qupath_polygon,
            row[annotation_column],
            output_urlpath,
            image_filename,
            annotation_name,
            classes_to_include,
            line_colors,
            fill_colors,
            storage_options,
            output_storage_options,
        )

        futures.append(future)
    progress(futures)
    dsa_annotation_urls = client.gather(futures)
    return slide_manifest.assign(**{output_column: dsa_annotation_urls})

`qupath_polygon_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', classes_to_include='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from Qupath polygon geojson

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path of Qupath polygon geojson	`'???'`
`output_urlpath`	`string`	URL/path prefix for saving the DSA compatible annotation	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`classes_to_include`	`list`	list of classification labels to visualize	`'???'`
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`local_config`	`string`	local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def qupath_polygon_cli(
    input_urlpath: str = "???",
    output_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name: str = "???",
    classes_to_include: list = "???",  # type: ignore
    line_colors: Optional[dict[str, str]] = None,
    fill_colors: Optional[dict[str, str]] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from Qupath polygon geojson

    Args:
        input_urlpath (string): URL/path of Qupath polygon geojson
        output_urlpath (string): URL/path prefix for saving the DSA compatible annotation
        json
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        classes_to_include (list): list of classification labels to visualize
        e.g. ["Tumor", "Stroma", ...]
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (string): local config yaml file

    Returns:
        dict: annotation file path
    """
    config = get_config(vars())
    annotation_filepath = __qupath_polygon(
        config["input_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name"],
        config["classes_to_include"],
        config["line_colors"],
        config["fill_colors"],
        config["storage_options"],
        config["output_storage_options"],
    )

    return {"dsa_annotation": annotation_filepath}

`regional_polygon(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

Build DSA annotation json from regional annotation geojson

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest	required
`output_urlpath`	`string`	URL/path prefix for saving dsa annotation json	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`annotation_column`	`string`	column containing url to regional geojson	`''`
`output_column_suffix`	`string`	column suffix with result url to add to slide_manifest	required

Returns:

Type	Description
	DataFrame[SlideSchema]: slide schema

Source code in src/luna/pathology/cli/dsa_viz.py

def regional_polygon(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    annotation_column: str = "",
    output_column: str = "",
):
    """Build DSA annotation json from regional annotation geojson

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest
        output_urlpath (string): URL/path prefix for saving dsa annotation json
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        annotation_column (string): column containing url to regional geojson
        output_column_suffix (string): column suffix with result url to add to slide_manifest

    Returns:
        DataFrame[SlideSchema]: slide schema
    """

    if not annotation_column:
        annotation_column = f"{annotation_name}_geojson_url"
    if not output_column:
        output_column = f"{annotation_name}_dsa_url"
    if annotation_column not in slide_manifest.columns:
        raise ValueError(f"{annotation_column} not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __regional_polygon,
            row[annotation_column],
            output_urlpath,
            image_filename,
            annotation_name,
            fill_colors,
            line_colors,
            storage_options,
            output_storage_options,
        )

        futures.append(future)
    progress(futures)
    dsa_annotation_urls = client.gather(futures)
    return slide_manifest.assign(**{output_column: dsa_annotation_urls})

`regional_polygon_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from regional annotation geojson

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path of to regional annotation geojson	`'???'`
`output_urlpath`	`string`	URL/path prefix for saving dsa annotation json	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`local_config`	`string`	local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def regional_polygon_cli(
    input_urlpath: str = "???",
    output_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name: str = "???",
    line_colors: Optional[dict[str, str]] = None,
    fill_colors: Optional[dict[str, str]] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from regional annotation geojson

    Args:
        input_urlpath (string): URL/path of to regional annotation geojson
        output_urlpath (string): URL/path prefix for saving dsa annotation json
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (string): local config yaml file

    Returns:
        dict: annotation file path
    """

    config = get_config(vars())

    annotation_filepath = __regional_polygon(
        config["input_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name"],
        config["line_colors"],
        config["fill_colors"],
        config["storage_options"],
        config["output_storage_options"],
    )

    return {"dsa_annotation": annotation_filepath}

`save_dsa_annotation(dsa_annotation, output_urlpath, image_filename, storage_options={})` ¶

Helper function to save annotation elements to a json file.

Parameters:

Name	Type	Description	Default
`dsa_annotation`	`dict`	DSA annotations	required
`output_urlpath`	`string`	url/path to a directory to save the annotation file	required
`image_filename`	`string`	name of the image in DSA e.g. 123.svs	required
`storage_options`	`dict`	options for storage functions	`{}`

Returns:

Name	Type	Description
`string`		annotation file path. None if error in writing the file.

Source code in src/luna/pathology/cli/dsa_viz.py

def save_dsa_annotation(
    dsa_annotation: dict,
    output_urlpath: str,
    image_filename: str,
    storage_options: dict = {},
):
    """Helper function to save annotation elements to a json file.

    Args:
        dsa_annotation (dict): DSA annotations
        output_urlpath (string): url/path to a directory to save the annotation file
        image_filename (string): name of the image in DSA e.g. 123.svs
        storage_options (dict): options for storage functions

    Returns:
        string: annotation file path. None if error in writing the file.
    """

    result = re.search(image_id_regex, image_filename)
    if result:
        image_id = result.group(1)
    else:
        raise InvalidImageIdException(f"Invalid image filename: {image_filename}")

    annotation_name_replaced = dsa_annotation["name"].replace(" ", "_")

    fs, output_urlpath_prefix = fsspec.core.url_to_fs(output_urlpath, **storage_options)
    output_path = (
        Path(output_urlpath_prefix) / f"{annotation_name_replaced}_{image_id}.json"
    )

    if not fs.exists(output_urlpath_prefix):
        fs.mkdir(output_urlpath_prefix)

    with fs.open(output_path, "w") as outfile:
        json.dump(dsa_annotation, outfile)
    logger.info(
        f"Saved {len(dsa_annotation['elements'])} to {fs.unstrip_protocol(str(output_path))}"
    )
    return fs.unstrip_protocol(str(output_path))

`stardist_cell(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

Build DSA annotation json from TSV classification data generated by stardist

Processes a cell classification data generated by Qupath/stardist and adds the center coordinates of the cells as annotation elements.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path to TSV classification data generated by stardist	required
`output_urlpath`	`string`	URL/path prefix for saving dsa annotation json	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`annotation_column`	`string`	column containing url to stardist polygon geojson	`''`
`output_column_suffix`	`string`	column suffix with result url to add to slide_manifest	required

Returns:

Type	Description
	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/dsa_viz.py

def stardist_cell(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    annotation_column: str = "",
    output_column: str = "",
):
    """Build DSA annotation json from TSV classification data generated by
    stardist

    Processes a cell classification data generated by Qupath/stardist and
    adds the center coordinates of the cells
    as annotation elements.

    Args:
        input_urlpath (string): URL/path to TSV classification data generated by stardist
        output_urlpath (string): URL/path prefix for saving dsa annotation json
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        annotation_column (string): column containing url to stardist polygon geojson
        output_column_suffix (string): column suffix with result url to add to slide_manifest

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    if not annotation_column:
        annotation_column = f"{annotation_name}_tsv_url"
    if not output_column:
        output_column = f"{annotation_name}_dsa_url"
    if annotation_column not in slide_manifest.columns:
        raise ValueError(f"{annotation_column} not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __stardist_cell,
            row[annotation_column],
            output_urlpath,
            image_filename,
            annotation_name,
            line_colors,
            fill_colors,
            storage_options,
            output_storage_options,
        )

        futures.append(future)
    progress(futures)
    dsa_annotation_urls = client.gather(futures)
    return slide_manifest.assign(**{output_column: dsa_annotation_urls})

`stardist_cell_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from TSV classification data generated by stardist

Processes a cell classification data generated by Qupath/stardist and adds the center coordinates of the cells as annotation elements.

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path to TSV classification data generated by stardist	`'???'`
`output_urlpath`	`string`	URL/path prefix for saving dsa annotation json	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`line_colors`	`dict`	line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`local_config`	`string`	local config YAML file	`''`

Returns:

Type	Description
	dict[str,str]: annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def stardist_cell_cli(
    input_urlpath: str = "???",
    output_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name: str = "???",
    line_colors: Optional[dict[str, str]] = None,
    fill_colors: Optional[dict[str, str]] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from TSV classification data generated by
    stardist

    Processes a cell classification data generated by Qupath/stardist and
    adds the center coordinates of the cells
    as annotation elements.

    Args:
        input_urlpath (string): URL/path to TSV classification data generated by stardist
        output_urlpath (string): URL/path prefix for saving dsa annotation json
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (string): local config YAML file

    Returns:
        dict[str,str]: annotation file path
    """
    config = get_config(vars())
    annotation_filepath = __stardist_cell(
        config["input_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name"],
        config["line_colors"],
        config["fill_colors"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return {"dsa_annotation": annotation_filepath}

`stardist_polygon(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

Build DSA annotation json from stardist geojson classification results

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`output_urlpath`	`string`	URL/path prefix to save annotations	required
`annotation_name`	`string`	name of the annotation to be displayed in DSA	required
`line_colors`	`dict`	user-provided line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	user-provided fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`annotation_column`	`string`	column containing url to stardist polygon geojson	`''`
`output_column`	`string`	column with result url to add to slide_manifest	`''`

Returns:

Type	Description
	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/dsa_viz.py

def stardist_polygon(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    annotation_name: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    annotation_column: str = "",
    output_column: str = "",
):
    """Build DSA annotation json from stardist geojson classification results

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (string): URL/path prefix to save annotations
        annotation_name (string): name of the annotation to be displayed in DSA
        line_colors (dict): user-provided line color map with {feature name:rgb values}
        fill_colors (dict): user-provided fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        annotation_column (string): column containing url to stardist polygon geojson
        output_column (string): column with result url to add to slide_manifest

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    if not annotation_column:
        annotation_column = f"{annotation_name}_geojson_url"
    if not output_column:
        output_column = f"{annotation_name}_dsa_url"

    if annotation_column not in slide_manifest.columns:
        raise ValueError(f"{annotation_column} not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __stardist_polygon,
            row[annotation_column],
            output_urlpath,
            image_filename,
            annotation_name,
            line_colors,
            fill_colors,
            storage_options,
            output_storage_options,
        )

        futures.append(future)
    progress(futures)
    dsa_annotation_urls = client.gather(futures)
    for idx, dsa_annotation_url in enumerate(dsa_annotation_urls):
        slide_manifest.at[idx, output_column] = dsa_annotation_url

    return slide_manifest

`stardist_polygon_cli(input_urlpath='???', image_filename='???', annotation_name='???', output_urlpath='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from stardist geojson classification results

Parameters:

Name	Type	Description	Default
`input_urlpath`	`string`	URL/path to stardist geojson classification results json	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name`	`string`	name of the annotation to be displayed in DSA	`'???'`
`output_urlpath`	`string`	URL/path prefix to save annotations	`'???'`
`line_colors`	`dict`	user-provided line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	user-provided fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read/write functions	`{}`
`local_config`	`string`	local config YAML file	`''`

Returns:

Type	Description
	dict[str,str]: annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def stardist_polygon_cli(
    input_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name: str = "???",
    output_urlpath: str = "???",
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from stardist geojson classification results

    Args:
        input_urlpath (string): URL/path to stardist geojson classification results json
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name (string): name of the annotation to be displayed in DSA
        output_urlpath (string): URL/path prefix to save annotations
        line_colors (dict): user-provided line color map with {feature name:rgb values}
        fill_colors (dict): user-provided fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read/write functions
        local_config (string): local config YAML file

    Returns:
        dict[str,str]: annotation file path
    """
    config = get_config(vars())
    annotation_filepath = __stardist_polygon(
        config["input_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name"],
        config["line_colors"],
        config["fill_colors"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return {"dsa_annotation": annotation_filepath}

`stardist_polygon_tile(slide_manifest, output_urlpath, annotation_name_prefix, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column_suffix='')` ¶

Build DSA annotation json from stardist geojson classification and labeled tiles

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest	required
`annotation_name_prefix`	`string`	name of the annotation to be displayed in DSA	required
`output_urlpath`	`string`	URL/path prefix to save annotations	required
`line_colors`	`dict`	user-provided line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	user-provided fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`annotation_column`	`string`	column containing url to stardist polygon geojson	`''`
`output_column_suffix`	`string`	column suffix with result url to add to slide_manifest	`''`

Returns:

Type	Description
	dict[str,str]: annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

def stardist_polygon_tile(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    annotation_name_prefix: str,
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: Dict = {},
    output_storage_options: Dict = {},
    annotation_column: str = "",
    output_column_suffix: str = "",
):
    """Build DSA annotation json from stardist geojson classification and labeled tiles

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest
        annotation_name_prefix (string): name of the annotation to be displayed in DSA
        output_urlpath (string): URL/path prefix to save annotations
        line_colors (dict): user-provided line color map with {feature name:rgb values}
        fill_colors (dict): user-provided fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        annotation_column (string): column containing url to stardist polygon geojson
        output_column_suffix (string): column suffix with result url to add to slide_manifest

    Returns:
        dict[str,str]: annotation file path
    """
    if not annotation_column:
        annotation_column = f"{annotation_name_prefix}_geojson_url"
    if not output_column_suffix:
        output_column_suffix = f"{annotation_name_prefix}_dsa_url"
    if annotation_column not in slide_manifest.columns:
        raise ValueError(f"{annotation_column} not found in slide manifest")
    client = get_or_create_dask_client()
    futures = []
    for _, row in slide_manifest.iterrows():
        image_filename = os.path.basename(row["url"])
        future = client.submit(
            __stardist_polygon_tile,
            row[annotation_column],
            row["tiles_url"],
            output_urlpath,
            image_filename,
            annotation_name_prefix,
            line_colors,
            fill_colors,
            storage_options,
            output_storage_options,
        )

        futures.append(future)
    progress(futures)
    dsa_annotation_url_maps = client.gather(futures)
    tile_labels = dsa_annotation_url_maps[0].keys()
    return slide_manifest.assign(
        **{
            f"{tile_label}_{output_column_suffix}": [
                x[tile_label] for x in dsa_annotation_url_maps
            ]
            for tile_label in tile_labels
        }
    )

`stardist_polygon_tile_cli(object_urlpath='???', tiles_urlpath='???', image_filename='???', annotation_name_prefix='???', output_urlpath='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Build DSA annotation json from stardist geojson classification and labeled tiles

Parameters:

Name	Type	Description	Default
`object_urlpath`	`string`	URL/path to object geojson classification results	`'???'`
`tiles_urlpath`	`string`	URL/path to tiles manifest parquet	`'???'`
`image_filename`	`string`	name of the image file in DSA e.g. 123.svs	`'???'`
`annotation_name_prefix`	`string`	name of the annotation to be displayed in DSA	`'???'`
`output_urlpath`	`string`	URL/path prefix to save annotations	`'???'`
`line_colors`	`dict`	user-provided line color map with {feature name:rgb values}	`None`
`fill_colors`	`dict`	user-provided fill color map with {feature name:rgba values}	`None`
`storage_options`	`dict`	storage options to pass to read functions	`{}`
`output_storage_options`	`dict`	storage options to pass to write functions	`{}`
`local_config`	`string`	local config YAML file	`''`

Returns:

Type	Description
	dict[str,str]: annotation file path

Source code in src/luna/pathology/cli/dsa_viz.py

@timed
@save_metadata
def stardist_polygon_tile_cli(
    object_urlpath: str = "???",
    tiles_urlpath: str = "???",
    image_filename: str = "???",
    annotation_name_prefix: str = "???",
    output_urlpath: str = "???",
    line_colors: Optional[Dict[str, str]] = None,
    fill_colors: Optional[Dict[str, str]] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Build DSA annotation json from stardist geojson classification and labeled tiles

    Args:
        object_urlpath (string): URL/path to object geojson classification results
        tiles_urlpath (string): URL/path to tiles manifest parquet
        image_filename (string): name of the image file in DSA e.g. 123.svs
        annotation_name_prefix (string): name of the annotation to be displayed in DSA
        output_urlpath (string): URL/path prefix to save annotations
        line_colors (dict): user-provided line color map with {feature name:rgb values}
        fill_colors (dict): user-provided fill color map with {feature name:rgba values}
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (string): local config YAML file

    Returns:
        dict[str,str]: annotation file path
    """
    config = get_config(vars())
    metadata = __stardist_polygon_tile(
        config["object_urlpath"],
        config["tiles_urlpath"],
        config["output_urlpath"],
        config["image_filename"],
        config["annotation_name_prefix"],
        config["line_colors"],
        config["fill_colors"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return metadata

`extract_kfunction_statistics` ¶

`cli(input_cell_objects_urlpath='???', tile_size='???', intensity_label='???', tile_stride='???', radius='???', output_urlpath='.', storage_options={}, output_storage_options={}, local_config='')` ¶

Run k function using a sliding window approach, where the k-function is computed locally in a smaller window, and aggregated across the entire slide.

Parameters:

Name	Type	Description	Default
`input_cell_objects_urlpath`	`str`	url/path to cell objects (.csv)	`'???'`
`tile_size`	`int`	size of tiles to use (at the requested magnification)	`'???'`
`tile_stride`	`int`	spacing between tiles	`'???'`
`intensity_label`	`str`	Columns of cell object to use for intensity calculations (for I-K function - spatial + some scalar value clustering)	`'???'`
`radius`	`float`	the radius to consider	`'???'`
`output_urlpath`	`str`	output URL/path prefix	`'.'`
`storage_options`	`dict`	storage options for reading the cell objects	`{}`

Returns:

Type	Description
	pd.DataFrame: metadata about function call

Source code in src/luna/pathology/cli/extract_kfunction_statistics.py

@timed
@save_metadata
def cli(
    input_cell_objects_urlpath: str = "???",
    tile_size: int = "???",  # type: ignore
    intensity_label: str = "???",
    tile_stride: int = "???",  # type: ignore
    radius: float = "???",  # type: ignore
    output_urlpath: str = ".",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Run k function using a sliding window approach, where the k-function is computed locally in a smaller window, and aggregated across the entire slide.

    Args:
        input_cell_objects_urlpath (str): url/path to cell objects (.csv)
        tile_size (int): size of tiles to use (at the requested magnification)
        tile_stride (int): spacing between tiles
        intensity_label (str): Columns of cell object to use for intensity calculations (for I-K function - spatial + some scalar value clustering)
        radius (float):  the radius to consider
        output_urlpath (str): output URL/path prefix
        storage_options (dict): storage options for reading the cell objects

    Returns:
        pd.DataFrame: metadata about function call
    """
    config = get_config(vars())

    configure_dask_client()

    df_stats = extract_kfunction(
        config["input_cell_objects_urlpath"],
        config["tile_size"],
        config["intensity_label"],
        config["tile_stride"],
        config["radius"],
        config["storage_options"],
    )
    fs, output_urlpath_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )
    output_tile_header = Path(output_urlpath_prefix) / (
        str(Path(config["input_cell_objects_urlpath"]).stem)
        + "_kfunction_supertiles.parquet"
    )
    with fs.open(output_tile_header, "wb") as of:
        df_stats.to_parquet(of)

    properties = {
        "slide_tiles": str(output_tile_header),
    }

    return properties

`extract_kfunction(input_cell_objects_urlpath, tile_size, intensity_label, tile_stride, radius, storage_options={})` ¶

Run k function using a sliding window approach, where the k-function is computed locally in a smaller window, and aggregated across the entire slide.

Parameters:

Name	Type	Description	Default
`input_cell_objects`	`str`	URL/path to cell objects (.csv)	required
`tile_size`	`int`	size of tiles to use (at the requested magnification)	required
`intensity_label`	`str`	Columns of cell object to use for intensity calculations (for I-K function - spatial + some scalar value clustering)	required
`tile_stride`	`int`	spacing between tiles	required
`radius`	`float`	the radius to consider	required
`storage_options`	`dict`	storage options for reading the cell objects	`{}`

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/extract_kfunction_statistics.py

def extract_kfunction(
    input_cell_objects_urlpath: str,
    tile_size: int,
    intensity_label: str,
    tile_stride: int,
    radius: float,
    storage_options: dict = {},
):
    """Run k function using a sliding window approach, where the k-function is computed locally in a smaller window, and aggregated across the entire slide.

    Args:
        input_cell_objects (str): URL/path to cell objects (.csv)
        tile_size (int): size of tiles to use (at the requested magnification)
        intensity_label (str): Columns of cell object to use for intensity calculations (for I-K function - spatial + some scalar value clustering)
        tile_stride (int): spacing between tiles
        radius (float):  the radius to consider
        storage_options (dict): storage options for reading the cell objects

    Returns:
        dict: metadata about function call
    """
    client = get_or_create_dask_client()
    df = pd.read_parquet(input_cell_objects_urlpath, storage_options=storage_options)

    l_address = []
    l_k_function_futures = []
    l_x_coord = []
    l_y_coord = []

    feature_name = (
        f"ikfunction_r{radius}_stain{intensity_label.replace(' ','_').replace(':','')}"
    )

    coords = product(
        range(int(df["x_coord"].min()), int(df["x_coord"].max()), tile_stride),
        range(int(df["y_coord"].min()), int(df["y_coord"].max()), tile_stride),
    )

    logger.info("Submitting tasks...")
    for x, y in coords:
        df_tile = df.query(
            f"x_coord >= {x} and x_coord <= {x+tile_size} and y_coord >={y} and y_coord <= {y+tile_size}"
        )

        if len(df_tile) < 3:
            continue

        future = client.submit(
            Kfunction,
            df_tile[["x_coord", "y_coord"]],
            df_tile[["x_coord", "y_coord"]],
            intensity=np.array(df_tile[intensity_label]),
            radius=radius,
            count=True,
        )

        l_address.append(coord_to_address((x, y), 0))
        l_k_function_futures.append(future)
        l_x_coord.append(x)
        l_y_coord.append(y)
    logger.info("Waiting for all tasks to complete...")
    progress(l_k_function_futures)
    l_k_function = client.gather(l_k_function_futures)

    df_stats = pd.DataFrame(
        {
            "address": l_address,
            "x_coord": l_x_coord,
            "y_coord": l_y_coord,
            "results": l_k_function,
        }
    ).set_index("address")
    df_stats.loc[:, "xy_extent"] = tile_size
    df_stats.loc[:, "tile_size"] = tile_size  # Same, 1 to 1
    df_stats.loc[:, "tile_units"] = "um"  # Same, 1 to 1

    df_stats[feature_name] = df_stats["results"].apply(lambda x: x["intensity"])
    df_stats[feature_name + "_norm"] = (
        df_stats[feature_name] / df_stats[feature_name].max()
    )

    df_stats = df_stats.drop(columns=["results"]).dropna()

    logger.info("Generated k-function feature data:")
    logger.info(df_stats)

    return df_stats

`extract_shape_features` ¶

`cli(slide_mask_urlpath='???', label_cols='???', output_urlpath='???', include_smaller_regions=False, storage_options={}, output_storage_options={}, local_config='')` ¶

Extracts shape and spatial features (HIF features) from a slide mask. This CLI extracts two sets of features. The first set are 'whole slide features', where the entire mask label is considred as a single region and features are extracted. These features are useful for determining things like total area of x tissue.

The second set of features are 'regional features', where each label is split up according to their connectivity and features are extracted from these smaller regions. These features are useful for determining things like solidity of the top ten largest regions of tissue y. Pixel intensity values from the WSI are unused. In order to generate connected regions, skimage generates a mask itself where different values coorespond to different regions, which removes the tissue type information from the original mask. So, the original mask is passed as an intensity image to ensure that each region can be associated with a tissue type.

Args: slide_mask_urlpath (str): URL/path to slide mask (*.tif) label_cols (List[str]): list of labels that coorespond to those in slide_mask_urlpath output_urlpath (str): output URL/path prefix include_smaller_regions (bool): include the smaller regions (not just larget) storage_options (dict): storage options to pass to read functions output_storage_options (dict): storage options to pass to write functions local_config (str): local config YAML file

Returns:

Name	Type	Description
`dict`		output .tif path and the number of shapes for which features were generated

Source code in src/luna/pathology/cli/extract_shape_features.py

@timed
@save_metadata
def cli(
    slide_mask_urlpath: str = "???",
    label_cols: List[str] = "???",  # type: ignore
    output_urlpath: str = "???",  # type: ignore
    include_smaller_regions: bool = False,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Extracts shape and spatial features (HIF features) from a slide mask.
    This CLI extracts two sets of features. The first set are 'whole slide features', where
    the entire mask label is considred as a single region and features are extracted. These features
    are useful for determining things like total area of x tissue.

    The second set of features are 'regional features', where each label is split up according to
    their connectivity and features are extracted from these smaller regions.
    These features are useful for determining things like solidity of the top ten largest
    regions of tissue y. Pixel intensity values from the WSI are unused. In order to generate
    connected regions, skimage generates a mask itself where different values coorespond
    to different regions, which removes the tissue type information from the original mask.
    So, the original mask is passed as an intensity image to ensure that each region can be
    associated with a tissue type.

     Args:
        slide_mask_urlpath (str): URL/path to slide mask (*.tif)
        label_cols (List[str]): list of labels that coorespond to those in slide_mask_urlpath
        output_urlpath (str): output URL/path prefix
        include_smaller_regions (bool): include the smaller regions (not just larget)
        storage_options (dict): storage options to pass to read functions
        output_storage_options (dict): storage options to pass to write functions
        local_config (str): local config YAML file

    Returns:
        dict: output .tif path and the number of shapes for which features were generated

    """
    config = get_config(vars())

    with open(config["slide_mask_urlpath"], "rb", **config["storage_options"]) as of:
        mask = tifffile.imread(of)

    mask_values = {k: v + 1 for v, k in enumerate(config["label_cols"])}
    result_df = extract_shape_features(
        mask, mask_values, config["include_smaller_regions"]
    )

    fs, urlpath = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )

    output_fpath = Path(urlpath) / "shape_features.csv"
    with fs.open(output_fpath, "w") as of:
        result_df.to_csv(of)

    properties = {"shape_features": output_fpath, "num_shapes": len(result_df)}

    logger.info(properties)
    return properties

`extract_shape_features(mask, mask_values, include_smaller_regions=False, properties=['area', 'bbox', 'bbox_area', 'centroid', 'convex_area', 'convex_image', 'coords', 'eccentricity', 'equivalent_diameter', 'euler_number', 'extent', 'filled_area', 'filled_image', 'image', 'inertia_tensor', 'inertia_tensor_eigvals', 'label', 'local_centroid', 'major_axis_length', 'minor_axis_length', 'moments', 'moments_central', 'moments_hu', 'moments_normalized', 'orientation', 'perimeter', 'slice', 'solidity'])` ¶

Extracts shape and spatial features (HIF features) from a slide mask

Args: slide_mask_urlpath (str): url/path to slide mask (*.tif) label_cols (List[str]): list of labels that coorespond to those in slide_mask_urlpath

Returns:

Type	Description
	pd.DataFrame: shape and spatial features

Source code in src/luna/pathology/cli/extract_shape_features.py

def extract_shape_features(
    mask: np.ndarray,
    mask_values: Dict[int, str],
    include_smaller_regions=False,
    properties: List[str] = [
        "area",
        "bbox",
        "bbox_area",
        "centroid",
        "convex_area",
        "convex_image",
        "coords",
        "eccentricity",
        "equivalent_diameter",
        "euler_number",
        "extent",
        "filled_area",
        "filled_image",
        "image",
        "inertia_tensor",
        "inertia_tensor_eigvals",
        "label",
        "local_centroid",
        "major_axis_length",
        "minor_axis_length",
        "moments",
        "moments_central",
        "moments_hu",
        "moments_normalized",
        "orientation",
        "perimeter",
        "slice",
        "solidity",
    ],
):
    """Extracts shape and spatial features (HIF features) from a slide mask

     Args:
        slide_mask_urlpath (str): url/path to slide mask (*.tif)
        label_cols (List[str]): list of labels that coorespond to those in slide_mask_urlpath

    Returns:
        pd.DataFrame: shape and spatial features
    """

    logger.info(f"Mask shape={mask.shape}")

    logger.info("Extracting regional features based on connectivity")
    whole_slide_features_df = extract_whole_slide_features(
        mask, mask_values, properties
    )
    whole_slide_features_df["Parent"] = "whole_region"
    whole_slide_features_df = whole_slide_features_df.set_index("Class")
    whole_slide_features_df["area_fraction"] = (
        whole_slide_features_df["area"] / whole_slide_features_df["area"].sum()
    )
    whole_slide_features_mdf = pd.melt(
        whole_slide_features_df.reset_index(), id_vars=["Parent", "Class"]
    )

    area_col = whole_slide_features_df.columns.get_loc("area")
    idx0, idx1 = np.triu_indices(len(whole_slide_features_df), 1)
    np.seterr(divide="ignore")
    whole_slide_ratio_df = pd.DataFrame(
        data={
            "Parent": "whole_region",
            "variable": np.array(
                [
                    f"area_log_ratio_to_{row}"
                    for row in whole_slide_features_df.index.values
                ]
            )[idx1],
            "value": np.log(whole_slide_features_df.iloc[idx0, area_col].values)
            - np.log(whole_slide_features_df.iloc[idx1, area_col].values),
        },
        index=whole_slide_features_df.index[idx0],
    )
    whole_slide_ratio_df = whole_slide_ratio_df.reset_index()

    regional_features_df = extract_regional_features(
        mask, mask_values, properties + ["min_intensity", "max_intensity"]
    )
    regional_features_df = regional_features_df.assign(
        Parent=[f"region_{x}" for x in range(len(regional_features_df))]
    )
    regional_features_df = regional_features_df.set_index(["Parent", "Class"])
    regional_features_df["area_fraction"] = (
        regional_features_df["area"] / whole_slide_features_df["area"]
    )
    regional_features_mdf = pd.melt(
        regional_features_df.reset_index(), id_vars=["Parent", "Class"]
    )

    regional_features_df = regional_features_df.reset_index()
    largest_regional_features_df = regional_features_df.loc[
        regional_features_df.groupby("Class")["area"].idxmax()
    ]
    largest_regional_features_df["Parent"] = "largest_region"
    largest_regional_features_df = largest_regional_features_df.set_index("Class")
    largest_regional_features_mdf = pd.melt(
        largest_regional_features_df.reset_index(), id_vars=["Parent", "Class"]
    )

    area_col = largest_regional_features_df.columns.get_loc("area")
    idx0, idx1 = np.triu_indices(len(largest_regional_features_df), 1)
    np.seterr(divide="ignore")
    ratio_df = pd.DataFrame(
        data={
            "Parent": "largest_region",
            "variable": np.array(
                [
                    f"area_log_ratio_to_{row}"
                    for row in largest_regional_features_df.index.values
                ]
            )[idx1],
            "value": np.log(largest_regional_features_df.iloc[idx0, area_col].values)
            - np.log(largest_regional_features_df.iloc[idx1, area_col].values),
        },
        index=largest_regional_features_df.index[idx0],
    )
    ratio_df = ratio_df.reset_index()

    result_df = pd.concat(
        [
            whole_slide_features_mdf,
            whole_slide_ratio_df,
            largest_regional_features_mdf,
            ratio_df,
        ]
    )

    if include_smaller_regions:
        result_df = pd.concat([result_df, regional_features_mdf])

    return result_df

`extract_stain_texture` ¶

`cli(slide_image_urlpath='???', slide_mask_urlpath='???', stain_sample_factor='???', stain_channel='???', tile_size='???', output_urlpath='.', storage_options={}, output_storage_options={}, local_config='')` ¶

Compute GLCM texture features on a de-convolved slide image

Parameters:

Name	Type	Description	Default
`slide_image_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	`'???'`
`slide_mask_urlpath`	`str`	url/path to slide mask (.tif)	`'???'`
`stain_sample_factor`	`float`	downsample factor to use for stain vector estimation	`'???'`
`stain_channel`	`int`	which channel of the deconvovled image to use for texture analysis	`'???'`
`tile_size`	`int`	size of tiles to use (at the requested magnification) (500-1000 recommended)	`'???'`
`output_urlpath`	`str`	output/working directory	`'.'`

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/extract_stain_texture.py

@timed
@save_metadata
def cli(
    slide_image_urlpath: str = "???",
    slide_mask_urlpath: str = "???",
    stain_sample_factor: float = "???",  # type: ignore
    stain_channel: int = "???",  # type: ignore
    tile_size: int = "???",  # type: ignore
    output_urlpath: str = ".",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Compute GLCM texture features on a de-convolved slide image

    Args:
        slide_image_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        slide_mask_urlpath (str): url/path to slide mask (.tif)
        stain_sample_factor (float): downsample factor to use for stain vector estimation
        stain_channel (int): which channel of the deconvovled image to use for texture analysis
        tile_size (int): size of tiles to use (at the requested magnification) (500-1000 recommended)
        output_urlpath (str): output/working directory

    Returns:
        dict: metadata about function call

    """
    config = get_config(vars())
    df_result = extract_stain_texture(
        config["slide_image_urlpath"],
        config["slide_mask_urlpath"],
        config["stain_sample_factor"],
        config["stain_channel"],
        config["tile_size"],
        config["output_urlpath"],
        config["storage_options"],
        config["output_storage_options"],
    )

    fs, urlpath_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )
    output_filename = Path(urlpath_prefix) / "stainomics.parquet"
    with fs.open(output_filename, "wb") as of:
        df_result.to_parquet(of, index=False)

    properties = {
        # "num_pixel_observations": n,
        "feature_data": output_filename,
    }

    return properties

`extract_stain_texture(slide_image_urlpath, slide_mask_urlpath, stain_sample_factor, stain_channel, tile_size, output_urlpath, storage_options, output_storage_options)` ¶

Compute GLCM texture after automatically deconvolving the image into stain channels, using tile-based processing

Runs statistics on distribution.

Save a feature csv file at the output directory.

Parameters:

Name	Type	Description	Default
`slide_image_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	required
`slide_mask_urlpath`	`str`	url/path to slide mask (.tif)	required
`stain_sample_factor`	`float`	downsample factor to use for stain vector estimation	required
`stain_channel`	`int`	which channel of the deconvovled image to use for texture analysis	required
`tile_size`	`int`	size of tiles to use (at the requested magnification) (500-1000 recommended)	required
`output_urlpath`	`str`	output/working URL/path prefix	required
`storage_options`	`dict`	storage options to pass to reading functions	required
`output_storage_options`	`dict`	storage options to pass to writing functions	required

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/extract_stain_texture.py

def extract_stain_texture(
    slide_image_urlpath: str,
    slide_mask_urlpath: str,
    stain_sample_factor: float,
    stain_channel: int,
    tile_size: int,
    output_urlpath: str,
    storage_options: dict,
    output_storage_options: dict,
):
    """Compute GLCM texture after automatically deconvolving the image into stain channels, using tile-based processing

    Runs statistics on distribution.

    Save a feature csv file at the output directory.

    Args:
        slide_image_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        slide_mask_urlpath (str): url/path to slide mask (.tif)
        stain_sample_factor (float): downsample factor to use for stain vector estimation
        stain_channel (int): which channel of the deconvovled image to use for texture analysis
        tile_size (int): size of tiles to use (at the requested magnification) (500-1000 recommended)
        output_urlpath (str): output/working URL/path prefix
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        dict: metadata about function call
    """
    with open(slide_image_urlpath, "rb", **storage_options) as slide_file:
        slide = tiffslide.TiffSlide(slide_file)
        # oslide = openslide.OpenSlide(slide_image_urlpath)

        logger.info(f"Slide dimensions {slide.dimensions}")
        sample_arr = get_downscaled_thumbnail(slide, stain_sample_factor)

    slide_full_generator, slide_full_level = get_full_resolution_generator(
        slide_image_urlpath, tile_size=tile_size, storage_options=storage_options
    )

    mask_full_generator, mask_full_level = get_full_resolution_generator(
        slide_mask_urlpath, tile_size=tile_size, storage_options=storage_options
    )

    stain_vectors = get_stain_vectors_macenko(sample_arr)

    logger.info(f"Stain vectors={stain_vectors}")

    tile_x_count, tile_y_count = slide_full_generator.level_tiles[slide_full_level]
    logger.info("Tiles x %s, Tiles y %s", tile_x_count, tile_y_count)

    # populate address, coordinates
    address_raster = [
        address
        for address in itertools.product(range(tile_x_count), range(tile_y_count))
    ]
    logger.info("Number of tiles in raster: %s", len(address_raster))

    features = defaultdict(list)

    N_tiles = len(address_raster)
    for n_tile, address in tqdm(enumerate(address_raster), file=sys.stdout):
        mask_patch = np.array(mask_full_generator.get_tile(mask_full_level, address))

        if not np.count_nonzero(mask_patch) > 1:
            continue

        image_patch = np.array(slide_full_generator.get_tile(slide_full_level, address))

        texture_values = extract_patch_texture_features(
            image_patch,
            mask_patch,
            stain_vectors,
            stain_channel,
            plot=False,
        )

        if texture_values is not None:
            for key, values in texture_values.items():
                features[key].append(values)
        logger.info(f"Processed Tile [{n_tile} / {N_tiles}] at {address}")
    for key, values in features.items():
        features[key] = np.concatenate(values).flatten()
    print(features)

    hist_features = {}
    fs, output_urlpath_prefix = fsspec.core.url_to_fs(
        output_urlpath, **output_storage_options
    )
    for key, values in features.items():
        output_path = Path(output_urlpath_prefix) / f"feature_vector_{key}.npy"
        with fs.open(output_path, "wb") as of:
            np.save(of, values)

        if not len(values) > 0:
            continue

        n, (smin, smax), sm, sv, ss, sk = scipy.stats.describe(values)

        if np.min(values) > 0:
            ln_params = scipy.stats.lognorm.fit(values, floc=0)
        else:
            ln_params = (0, 0, 0)

        fx_name_prefix = f"{key}_channel_{stain_channel}"
        hist_features.update(
            {
                f"{fx_name_prefix}_nobs": n,
                f"{fx_name_prefix}_min": smin,
                f"{fx_name_prefix}_max": smax,
                f"{fx_name_prefix}_mean": sm,
                f"{fx_name_prefix}_variance": sv,
                f"{fx_name_prefix}_skewness": ss,
                f"{fx_name_prefix}_kurtosis": sk,
                f"{fx_name_prefix}_lognorm_fit_p0": ln_params[0],
                f"{fx_name_prefix}_lognorm_fit_p2": ln_params[2],
            }
        )

    # The fit may fail sometimes, replace inf with 0
    df_result = (
        pd.DataFrame(data=hist_features, index=[0])
        .replace([np.inf, -np.inf], 0.0)
        .astype(float)
    )
    logger.info(df_result)

    return df_result

`extract_tile_shape_features` ¶

__extract_tile_shape_features(objects_urlpath, tiles_urlpath, slide_urlpath, output_urlpath, resize_factor=16, detection_probability_threshold=None, slide_id='', statistical_descriptors=StatisticalDescriptors.ALL, cellular_features=CellularFeatures.ALL, property_type=PropertyType.ALL, include_smaller_regions=False, label_cols=None, storage_options={}, output_storage_options={}, properties=['area', 'convex_area', 'eccentricity', 'equivalent_diameter', 'euler_number', 'extent', 'label', 'major_axis_length', 'minor_axis_length', 'perimeter', 'solidity']) ¶

Extracts shape and spatial features (HIF features) from a slide mask.

Args: objects_urlpath (str): URL/path to object file (geopandas supported formats) tiles_urlpath (str): URL/path to tiles manifest (parquet) slide_urlpath (str): URL/path to slide (tiffslide supported formats) output_urlpath (str): output URL/path resize_factor (int): factor to downsample slide image detection_probability_threshold (Optional[float]): detection probability threshold slide_id (str): Slide ID to add to dataframes statistical_descriptors (StatisticalDescriptors): statistical descriptors to calculate cellular_features (CellularFeatures): cellular features to include property_type (PropertyType): properties to include include_smaller_regions (bool): include smaller regions label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions properties (List[str]): list of whole slide image properties to extract. Needs to be parquet compatible (numeric). Returns: dict: output paths and the number of features generated

Source code in src/luna/pathology/cli/extract_tile_shape_features.py

def __extract_tile_shape_features(
    objects_urlpath: str,
    tiles_urlpath: str,
    slide_urlpath: str,
    output_urlpath: str,
    resize_factor: int = 16,
    detection_probability_threshold: Optional[float] = None,
    slide_id: str = "",
    statistical_descriptors: StatisticalDescriptors = StatisticalDescriptors.ALL,
    cellular_features: CellularFeatures = CellularFeatures.ALL,
    property_type: PropertyType = PropertyType.ALL,
    include_smaller_regions: bool = False,
    label_cols: List[str] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    properties: List[str] = [
        "area",
        "convex_area",
        "eccentricity",
        "equivalent_diameter",
        "euler_number",
        "extent",
        "label",
        "major_axis_length",
        "minor_axis_length",
        "perimeter",
        "solidity",
    ],
):
    """Extracts shape and spatial features (HIF features) from a slide mask.

     Args:
        objects_urlpath (str): URL/path to object file (geopandas supported formats)
        tiles_urlpath (str): URL/path to tiles manifest (parquet)
        slide_urlpath (str): URL/path to slide (tiffslide supported formats)
        output_urlpath (str): output URL/path
        resize_factor (int): factor to downsample slide image
        detection_probability_threshold (Optional[float]): detection
            probability threshold
        slide_id (str): Slide ID to add to dataframes
        statistical_descriptors (StatisticalDescriptors): statistical descriptors to calculate
        cellular_features (CellularFeatures): cellular features to include
        property_type (PropertyType): properties to include
        include_smaller_regions (bool): include smaller regions
        label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        properties (List[str]): list of whole slide image properties to
            extract. Needs to be parquet compatible (numeric).
    Returns:
        dict: output paths and the number of features generated
    """

    ofs, path = fsspec.core.url_to_fs(
        output_urlpath,
        **output_storage_options,
    )

    output_fpath = Path(path) / "shape_features.parquet"

    if ofs.exists(str(output_fpath)):
        logger.info(
            f"Output file already exist: {ofs.unstrip_protocol(str(output_fpath))}"
        )
        return {}

    with open(tiles_urlpath, **storage_options) as of:
        tiles_df = pd.read_parquet(of)

    with open(objects_urlpath, **storage_options) as of:
        object_gdf = gpd.read_file(of)

    with open(slide_urlpath, **storage_options) as of:
        slide = tiffslide.TiffSlide(of)
        slide_width = slide.dimensions[0]
        slide_height = slide.dimensions[1]

    if label_cols:
        tiles_df["Classification"] = tiles_df[label_cols].idxmax(axis=1)
    LabeledTileSchema.validate(tiles_df.reset_index())

    tile_area = tiles_df.iloc[0].tile_size ** 2

    counts = tiles_df.Classification.value_counts()

    combis = itertools.combinations(counts.index, 2)
    joint_entropy = []
    for i, j in combis:
        ent = {}
        ent["Parent"] = "whole_region"
        ent["Class"] = i
        ent["variable"] = f"Joint Entropy to {j}"
        ent["value"] = entropy(counts[[i, j]], base=2)
        joint_entropy.append(ent)

    entropy_df = pd.DataFrame(joint_entropy)

    shannon_entropy = entropy(counts, base=2)
    entropy_df = entropy_df.append(
        {
            "Parent": "whole_region",
            "Class": "All",
            "variable": "Entropy",
            "value": shannon_entropy,
        },
        ignore_index=True,
    )

    slide_area = counts * tile_area
    slide_area.index.name = "Parent"

    mask, mask_values = convert_tiles_to_mask(
        tiles_df, slide_width, slide_height, "Classification"
    )

    resized_mask = resize_array(mask, resize_factor)
    shape_features_df = extract_shape_features(
        resized_mask, mask_values, include_smaller_regions, properties
    )

    ann_region_polygons = [
        box(
            row.x_coord,
            row.y_coord,
            row.x_coord + row.xy_extent,
            row.y_coord + row.xy_extent,
        )
        for _, row in tiles_df.iterrows()
    ]
    tiles_gdf = gpd.GeoDataFrame(
        data=tiles_df, geometry=ann_region_polygons, crs="EPSG:4326"
    )

    logger.info("Spatially joining tiles and objects")
    gdf = object_gdf.sjoin(tiles_gdf, how="inner", predicate="within")
    if len(gdf) == 0:
        logger.info("No objects found within tiles")
        return None
    try:
        measurement_keys = list(gdf.measurements.iloc[0].keys())
        gdf = gdf.join(gdf.measurements.apply(lambda x: pd.Series(x)))
    except Exception:
        measurements = gdf.measurements.apply(
            lambda x: pd.DataFrame(json.loads(x)).set_index("name").squeeze()
        )
        measurement_keys = list(measurements.columns.values)
        gdf = gdf.join(measurements)
    gdf = gdf.join(gdf.classification.apply(lambda x: pd.Series(x)))
    gdf = gdf.rename(columns={"name": "Class", "Classification": "Parent"})

    gdf.Parent = gdf.Parent.astype("category")
    gdf.Class = gdf.Class.astype("category")

    if detection_probability_threshold:
        gdf = gdf.query(f"`Detection probability` > {detection_probability_threshold}")

    agg_keys = measurement_keys.copy()
    agg_keys.remove("Detection probability")
    logger.info("Calculating object measurement statistics")
    gb = gdf.groupby(by=["Parent", "Class"])[agg_keys]
    agg_funs = STATISTICAL_DESCRIPTOR_MAP[statistical_descriptors]
    agg_df = gb.agg(agg_funs)
    agg_df.columns = [" ".join(col).strip() for col in agg_df.columns.values]

    cell_density = None
    if "Cell: Area µm^2 sum" in agg_df.columns:
        cell_density = agg_df["Cell: Area µm^2 sum"] / (slide_area / 4)

    if cellular_features != CellularFeatures.ALL:
        agg_df = agg_df.filter(regex=cellular_features)

    if property_type != PropertyType.ALL:
        property_types = PROPERTY_TYPE_MAP[property_type]
        agg_df = agg_df.filter(regex="|".join(property_types))

    agg_df["Object Counts"] = gb.size()
    agg_df["Normalized Cell Density"] = agg_df["Object Counts"] / slide_area
    if cell_density is not None:
        agg_df["Cell Density"] = cell_density

    logger.info(
        "Calculating obj count log ratios between all tile label obj classification groups"
    )
    count_col = agg_df.columns.get_loc("Object Counts")
    idx0, idx1 = np.triu_indices(len(agg_df), 1)
    np.seterr(divide="ignore")
    ratio_df = pd.DataFrame(
        data={
            "variable": np.array(
                [
                    "Object Count Log Ratio to " + " ".join(row).strip()
                    for row in agg_df.index.values
                ]
            )[idx1],
            "value": np.log(agg_df.iloc[idx0, count_col].values)
            - np.log(agg_df.iloc[idx1, count_col].values),
        },
        index=agg_df.index[idx0],
    )

    mdf = pd.melt(agg_df.reset_index(), id_vars=["Parent", "Class"]).dropna()
    mdf = pd.concat([mdf, ratio_df.reset_index(), shape_features_df, entropy_df])

    if slide_id:
        mdf.insert(loc=0, column="slide_id", value=slide_id)

    mdf[["Parent", "Class", "variable"]] = mdf[["Parent", "Class", "variable"]].replace(
        r"_", " ", regex=True
    )

    with ofs.open(output_fpath, "wb") as of:
        mdf.to_parquet(of)

    props = {
        "shape_features_url": ofs.unstrip_protocol(str(output_fpath)),
        "num_features": len(mdf),
    }

    logger.info(props)

    return props

`cli(slide_urlpath='???', object_urlpath='???', tiles_urlpath='???', output_urlpath='.', resize_factor=16, detection_probability_threshold=None, statistical_descriptors=StatisticalDescriptors.ALL, cellular_features=CellularFeatures.ALL, property_type=PropertyType.ALL, include_smaller_regions=False, label_cols=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Extracts shape and spatial features (HIF features) from a slide mask.

Args: slide_urlpath (str): URL/path to slide (tiffslide supported formats) object_urlpath (str): URL/path to object file (geopandas supported formats) tiles_urlpath (str): URL/path to tiles manifest (parquet) output_urlpath (str): URL/path to output parquet file resize_factor (int): factor to downsample slide image detection_probability_threshold (Optional[float]): detection probability threshold statistical_descriptors (str): statistical descriptors to calculate. One of All, Quantiles, Stats, or Density cellular_features (str): cellular features to include. One of All, Nucleus, Cell, Cytoplasm, and Membrane property_type (str): properties to include. One of All, Geometric, or Stain include_smaller_regions (bool): include smaller regions in output label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions local_config (str): local config yaml file

Returns:

Name	Type	Description
`dict`		output paths and the number of features generated

Source code in src/luna/pathology/cli/extract_tile_shape_features.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    object_urlpath: str = "???",
    tiles_urlpath: str = "???",
    output_urlpath: str = ".",
    resize_factor: int = 16,
    detection_probability_threshold: Optional[float] = None,
    statistical_descriptors: str = StatisticalDescriptors.ALL,
    cellular_features: str = CellularFeatures.ALL,
    property_type: str = PropertyType.ALL,
    include_smaller_regions: bool = False,
    label_cols: List[str] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Extracts shape and spatial features (HIF features) from a slide mask.

     Args:
        slide_urlpath (str): URL/path to slide (tiffslide supported formats)
        object_urlpath (str): URL/path to object file (geopandas supported formats)
        tiles_urlpath (str): URL/path to tiles manifest (parquet)
        output_urlpath (str): URL/path to output parquet file
        resize_factor (int): factor to downsample slide image
        detection_probability_threshold (Optional[float]): detection probability threshold
        statistical_descriptors (str): statistical descriptors to calculate. One of All, Quantiles, Stats, or Density
        cellular_features (str): cellular features to include. One of All, Nucleus, Cell, Cytoplasm, and Membrane
        property_type (str): properties to include. One of All, Geometric, or Stain
        include_smaller_regions (bool): include smaller regions in output
        label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): local config yaml file

    Returns:
        dict: output paths and the number of features generated
    """
    config = get_config(vars())

    slide_id = Path(config["slide_urlpath"]).stem

    statistical_descriptors = config["statistical_descriptors"].capitalize()
    cellular_features = config["cellular_features"].capitalize()
    property_type = config["property_type"].capitalize()

    properties = __extract_tile_shape_features(
        config["object_urlpath"],
        config["tiles_urlpath"],
        config["slide_urlpath"],
        config["output_urlpath"],
        config["resize_factor"],
        config["detection_probability_threshold"],
        slide_id,
        statistical_descriptors,
        cellular_features,
        property_type,
        config["include_smaller_regions"],
        config["label_cols"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return properties

extract_tile_shape_features(slide_manifest, output_urlpath, resize_factor=16, detection_probability_threshold=None, statistical_descriptors=StatisticalDescriptors.ALL, cellular_features=CellularFeatures.ALL, property_type=PropertyType.ALL, include_smaller_regions=False, label_cols=None, storage_options={}, output_storage_options={}, objects_column='stardist_geojson_url', annotation_column='tile_shape_features_url', properties=['area', 'convex_area', 'eccentricity', 'equivalent_diameter', 'euler_number', 'extent', 'label', 'major_axis_length', 'minor_axis_length', 'perimeter', 'solidity']) ¶

Extracts shape and spatial features (HIF features) from a slide mask.

Args: slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl output_urlpath (str): output URL/path resize_factor (int): factor to downsample slide image detection_probability_threshold (Optional[float]): detection probability threshold statistical_descriptors (str): statistical descriptors to calculate. One of All, Quantiles, Stats, or Density cellular_features (str): cellular features to include. One of All, Nucleus, Cell, Cytoplasm, and Membrane property_type (str): properties to include. One of All, Geometric, or Stain include_smaller_regions (bool): include smaller regions in output label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions local_config (str): local config yaml file objects_column (str): slide manifest column name with stardist geoJSON URLs annotation_column (str): column to add to slide manifest with url to extracted features properties (List[str]): properties to extract

Returns:

Type	Description
	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/extract_tile_shape_features.py

def extract_tile_shape_features(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    resize_factor: int = 16,
    detection_probability_threshold: Optional[float] = None,
    statistical_descriptors: StatisticalDescriptors = StatisticalDescriptors.ALL,
    cellular_features: CellularFeatures = CellularFeatures.ALL,
    property_type: PropertyType = PropertyType.ALL,
    include_smaller_regions: bool = False,
    label_cols: List[str] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    objects_column="stardist_geojson_url",
    annotation_column="tile_shape_features_url",
    properties: List[str] = [
        "area",
        "convex_area",
        "eccentricity",
        "equivalent_diameter",
        "euler_number",
        "extent",
        "label",
        "major_axis_length",
        "minor_axis_length",
        "perimeter",
        "solidity",
    ],
):
    """Extracts shape and spatial features (HIF features) from a slide mask.

     Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (str): output URL/path
        resize_factor (int): factor to downsample slide image
        detection_probability_threshold (Optional[float]): detection probability threshold
        statistical_descriptors (str): statistical descriptors to calculate. One of All, Quantiles, Stats, or Density
        cellular_features (str): cellular features to include. One of All, Nucleus, Cell, Cytoplasm, and Membrane
        property_type (str): properties to include. One of All, Geometric, or Stain
        include_smaller_regions (bool): include smaller regions in output
        label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): local config yaml file
        objects_column (str): slide manifest column name with stardist geoJSON URLs
        annotation_column (str): column to add to slide manifest with url to extracted features
        properties (List[str]): properties to extract

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    client = get_or_create_dask_client()

    futures = []
    for _, row in slide_manifest.iterrows():
        future = client.submit(
            __extract_tile_shape_features,
            row[objects_column],
            row["tiles_url"],
            row["url"],
            output_urlpath,
            resize_factor,
            detection_probability_threshold,
            row["id"],
            statistical_descriptors,
            cellular_features,
            property_type,
            include_smaller_regions,
            label_cols,
            storage_options,
            output_storage_options,
            properties,
        )
        futures.append(future)

    progress(futures)
    results = client.gather(futures)

    return slide_manifest.assign(
        **{annotation_column: [x["shape_features_url"] for x in results]}
    )

`extract_tile_statistics` ¶

`cli(tiles_urlpath='???', output_urlpath='???', storage_options={}, output_storage_options={}, local_config='')` ¶

Extracts statistics over tiles

Parameters:

Name	Type	Description	Default
`tiles_urlpath`	`str`	Tiles parquet file for slide(s). Absolute or relative filepath. Prefix with protocol to read from alternative filesystems	`'???'`
`output_urlpath`	`str`	Output prefix. Absolute or relative filepath. Prefix with protocol to write to alternative filesystems	`'???'`
`storage_options`	`dict`	extra options that make sense for reading from a particular storage connection	`{}`
`output_storage_options`	`dict`	extra options that make sense for writing to a particular storage connection	`{}`
`local_config`	`str`	local config yaml file	`''`

Source code in src/luna/pathology/cli/extract_tile_statistics.py

@timed
@save_metadata
def cli(
    tiles_urlpath: str = "???",
    output_urlpath: str = "???",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Extracts statistics over tiles

    Args:
        tiles_urlpath (str): Tiles parquet file for slide(s). Absolute or relative filepath. Prefix with protocol to read from alternative filesystems
        output_urlpath (str): Output prefix. Absolute or relative filepath. Prefix with protocol to write to alternative filesystems
        storage_options (dict): extra options that make sense for reading from a particular storage connection
        output_storage_options (dict): extra options that make sense for writing to a particular storage connection
        local_config (str): local config yaml file

    """
    config = get_config(vars())

    df_feature_data = extract_tile_statistics(
        config["tiles_urlpath"],
        config["storage_options"],
    )

    fs, output_path_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )

    o = urlparse(config["tiles_urlpath"])
    id = Path(o.path).stem

    output_feature_file = Path(output_path_prefix) / f"{id}_tile_stats.parquet"

    logger.info(df_feature_data)
    with fs.open(output_feature_file, "wb") as f:
        df_feature_data.to_parquet(f)

    properties = {"feature_data": str(output_feature_file)}

    return properties

`extract_tile_statistics(tiles_urlpath, storage_options)` ¶

Extracts statistics over tiles

Parameters:

Name	Type	Description	Default
`tiles_urlpath`	`str`	Tiles parquet file for slide(s). Absolute or relative filepath. Prefix with protocol to read from alternative filesystems	required
`output_urlpath`	`str`	Output prefix. Absolute or relative filepath. Prefix with protocol to write to alternative filesystems	required
`storage_options`	`dict`	extra options that make sense for reading from a particular storage connection	required
`output_storage_options`	`dict`	extra options that make sense for writing to a particular storage connection	required

Returns:

Type	Description
	pd.DataFrame: metadata about function call

Source code in src/luna/pathology/cli/extract_tile_statistics.py

def extract_tile_statistics(
    tiles_urlpath: str,
    storage_options: dict,
):
    """Extracts statistics over tiles

    Args:
        tiles_urlpath (str): Tiles parquet file for slide(s). Absolute or relative filepath. Prefix with protocol to read from alternative filesystems
        output_urlpath (str): Output prefix. Absolute or relative filepath. Prefix with protocol to write to alternative filesystems
        storage_options (dict): extra options that make sense for reading from a particular storage connection
        output_storage_options (dict): extra options that make sense for writing to a particular storage connection

    Returns:
        pd.DataFrame: metadata about function call
    """

    df = (
        pd.read_parquet(tiles_urlpath, storage_options=storage_options)
        .reset_index()
        .set_index("address")
        .drop(
            columns=["x_coord", "y_coord", "tile_size", "xy_extent", "tile_units"],
            errors="ignore",
        )
    )
    print(df.columns)

    dict_feature_data = {}

    for col in df.columns:
        dict_feature_data.update(
            luna.common.stats.compute_stats_1d(pd.to_numeric(df[col]), col)
        )

    df_feature_data = pd.DataFrame([dict_feature_data])

    return df_feature_data

`generate_mask` ¶

`cli(slide_urlpath='???', roi_urlpath='???', output_urlpath='???', annotation_name='???', storage_options={}, output_storage_options={}, local_config='')` ¶

Generate a full resolution mask image (.tif) from vector annotations (polygons, shapes)

Inputs: input_slide_image: slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) input_slide_roi: roi containing vector shapes (.annotations, .json) Outputs: slide_mask Example: generate_mask ./slides/10001.svs ./halo/10001.job18484.annotations -an Tumor -o ./masks/10001/

Source code in src/luna/pathology/cli/generate_mask.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    roi_urlpath: str = "???",
    output_urlpath: str = "???",
    annotation_name: str = "???",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Generate a full resolution mask image (.tif) from vector annotations (polygons, shapes)

    \b
    Inputs:
        input_slide_image: slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        input_slide_roi: roi containing vector shapes (*.annotations, *.json)
    \b
    Outputs:
        slide_mask
    \b
    Example:
        generate_mask ./slides/10001.svs ./halo/10001.job18484.annotations
            -an Tumor
            -o ./masks/10001/
    """
    config = get_config(vars())
    df = generate_mask(
        config["slide_urlpath"],
        config["roi_urlpath"],
        config["output_urlpath"],
        config["annotation_name"],
        config["storage_options"],
        config["output_storage_options"],
    )

    fs, output_urlpath_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )
    output_filename = Path(output_urlpath_prefix) / "mask_data.parquet"
    with fs.open(output_filename, "wb") as of:
        df.to_parquet(of)

    slide_id = Path(config["roi_urlpath"]).stem
    properties = {
        "slide_mask": Path(output_urlpath_prefix) / "mask_full_res.tif",
        "feature_data": output_filename,
        "mask_size": df["mask_size"].tolist(),
        "segment_keys": {"slide_id": slide_id},
    }

    return properties

`generate_mask(slide_urlpath, roi_urlpath, output_urlpath, annotation_name, storage_options, output_storage_options)` ¶

Generate a full resolution mask image (.tif) from vector annotations (polygons, shapes)

Take into account positive and negative spaces. Essentially rasterizes a polygon file.

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) absolute or relative path. prefix with scheme to use alternative file systems.	required
`roi_urlpath`	`str`	halo or other polygonal annotation file (.xml, .geojson) absolute or relative path. prefix with scheme to use alternative file systems.	required
`output_urlpath`	`str`	output/working absolute or relative path. prefix with scheme to use alternative file systems.	required
`annotation_name`	`str`	name of annotation layer to use	required
`storage_options`	`dict`	storage options that make sense for the file storage used	required

Returns:

Name	Type	Description
`DataFrame`		mask properties

Source code in src/luna/pathology/cli/generate_mask.py

@local_cache_urlpath(
    dir_key_write_mode={
        "output_urlpath": "w",
    }
)
def generate_mask(
    slide_urlpath: str,
    roi_urlpath: str,
    output_urlpath: str,
    annotation_name: str,
    storage_options: dict,
    output_storage_options: dict,
):
    """Generate a full resolution mask image (.tif) from vector annotations (polygons, shapes)

    Take into account positive and negative spaces.  Essentially rasterizes a polygon file.

    Args:
        slide_urlpath (str): slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) absolute or relative path. prefix with scheme to use alternative file systems.
        roi_urlpath (str):  halo or other polygonal annotation file (.xml, .geojson) absolute or relative path. prefix with scheme to use alternative file systems.
        output_urlpath (str): output/working absolute or relative path. prefix with scheme to use alternative file systems.
        annotation_name (str): name of annotation layer to use
        storage_options (dict): storage options that make sense for the file storage used

    Returns:
        DataFrame: mask properties
    """
    mask_properties = {}

    with open(slide_urlpath, **storage_options) as of:
        slide = tiffslide.TiffSlide(of)
        thumbnail = slide.get_thumbnail((1000, 1000))

    with open(Path(output_urlpath) / "slide_thumbnail.png", "wb") as of:
        thumbnail.save(of, format="PNG")

    wsi_shape = (
        slide.dimensions[1],
        slide.dimensions[0],
    )  # Annotation file has flipped dimensions w.r.t openslide conventions
    logger.info(f"Slide shape={wsi_shape}")

    layer_names = get_layer_names(roi_urlpath, storage_options)
    logger.info(f"Available layer names={layer_names}")

    mask_properties["layer_names"] = list(layer_names)
    mask_properties["mask_size"] = list(wsi_shape)

    mask_arr, xml_region_properties = convert_xml_to_mask(
        roi_urlpath, wsi_shape, annotation_name, storage_options=storage_options
    )

    mask_properties.update(xml_region_properties)

    logger.info(f"Generating mask thumbnail, mask size={mask_arr.shape}")
    mask_thumbnail = openslide.ImageSlide(
        Image.fromarray(
            255 * block_reduce(mask_arr, block_size=(10, 10), func=np.mean, cval=0.0)
        )
    ).get_thumbnail((1000, 1000))

    with open(Path(output_urlpath) / "mask_thumbnail.png", "wb") as of:
        mask_thumbnail.save(of, format="PNG")

    slide_mask_file = Path(output_urlpath) / "mask_full_res.tif"
    with open(slide_mask_file, "wb") as of:
        tifffile.imwrite(of, mask_arr)

    return pd.DataFrame(mask_properties)

`generate_tile_labels` ¶

`cli(annotation_urlpath='???', tiles_urlpath='???', slide_id='???', output_urlpath='???', storage_options={}, output_storage_options={}, local_config='')` ¶

Queries the dataset at input_slide_annotation_dataset for a slide_id matching input_slide_tiles

Adds regional_label, intersection_area columns to slide tiles, where the former is the annotation label, and the latter the fraction of intersecting area between the tile and annotation regions

Parameters:

Name	Type	Description	Default
`annotation_urlpath`	`str`	url/path to parquet annotation dataset	`'???'`
`tiles_urlpath`	`str`	url/path to a slide-tile manifest file (.tiles.parquet)	`'???'`
`slide_id`	`str`	slide ID	`'???'`
`output_urlpath`	`str`	output url/path prefix	`'???'`
`storage_options`	`dict`	options to pass to reading functions	`{}`
`output_storage_options`	`dict`	options to pass to writing functions	`{}`
`local_config`	`str`	url/path to local config YAML file	`''`

Returns: dict: metadata

Source code in src/luna/pathology/cli/generate_tile_labels.py

@timed
@save_metadata
def cli(
    annotation_urlpath: str = "???",
    tiles_urlpath: str = "???",
    slide_id: str = "???",
    output_urlpath: str = "???",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Queries the dataset at input_slide_annotation_dataset for a slide_id matching input_slide_tiles

    Adds regional_label, intersection_area columns to slide tiles, where the former is the annotation label, and the latter the fraction of intersecting area between the tile and annotation regions

    Args:
        annotation_urlpath (str): url/path to parquet annotation dataset
        tiles_urlpath (str): url/path to a slide-tile manifest file (.tiles.parquet)
        slide_id (str): slide ID
        output_urlpath (str): output url/path prefix
        storage_options (dict): options to pass to reading functions
        output_storage_options (dict): options to pass to writing functions
        local_config (str): url/path to local config YAML file
    Returns:
        dict: metadata
    """
    config = get_config(vars())

    df_tiles = generate_tile_labels(
        config["annotation_urlpath"],
        config["tiles_urlpath"],
        config["slide_id"],
        config["storage_options"],
    )

    fs, output_urlpath_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )
    output_header_file = (
        Path(output_urlpath_prefix)
        / f"{config['slide_id']}.regional_label.tiles.parquet"
    )
    with fs.open(output_header_file, "wb") as of:
        df_tiles.to_parquet(of)

    properties = {
        "slide_tiles": output_header_file,  # "Tiles" are the metadata that describe them
    }

    return properties

`generate_tile_labels(annotation_urlpath, tiles_urlpath, slide_id, storage_options={})` ¶

Queries the dataset at input_slide_annotation_dataset for a slide_id matching input_slide_tiles

Adds regional_label, intersection_area columns to slide tiles, where the former is the annotation label, and the latter the fraction of intersecting area between the tile and annotation regions

Parameters:

Name	Type	Description	Default
`annotation_urlpath`	`str`	url/path to parquet annotation dataset	required
`tiles_urlpath`	`str`	url/path to a slide-tile manifest file (.tiles.parquet)	required
`slide_id`	`str`	slide ID	required
`storage_options`	`dict`	options to pass to reading functions	`{}`

Returns: pd.DataFrame: tile dataframe with regional_label, and intersection_area columns

Source code in src/luna/pathology/cli/generate_tile_labels.py

def generate_tile_labels(
    annotation_urlpath: str,
    tiles_urlpath: str,
    slide_id: str,
    storage_options: dict = {},
):
    """Queries the dataset at input_slide_annotation_dataset for a slide_id matching input_slide_tiles

    Adds regional_label, intersection_area columns to slide tiles, where the former is the annotation label, and the latter the fraction of intersecting area between the tile and annotation regions

    Args:
        annotation_urlpath (str): url/path to parquet annotation dataset
        tiles_urlpath (str): url/path to a slide-tile manifest file (.tiles.parquet)
        slide_id (str): slide ID
        storage_options (dict): options to pass to reading functions
    Returns:
        pd.DataFrame: tile dataframe with regional_label, and intersection_area columns
    """
    slide_id = str(slide_id)
    logger.info(f"slide_id={slide_id}")

    with open(annotation_urlpath, **storage_options) as of:
        df_annotation = pd.read_parquet(of)

    if slide_id not in df_annotation.index:
        raise RuntimeError("No matching annotations found for slide!")

    df_annotation = df_annotation.loc[[slide_id]].query("type=='geojson'")

    if not len(df_annotation):
        raise RuntimeError("No matching geojson annotations found!")

    slide_geojson, collection_name, annotation_name = (
        df_annotation.slide_geojson.item(),
        df_annotation.collection_name.item(),
        df_annotation.annotation_name.item(),
    )

    print(slide_geojson, collection_name, annotation_name)

    with open(slide_geojson) as f:
        features = json.load(f)["features"]

    d_collections = {}

    for feature in features:
        label = feature["properties"]["label"]

        if label not in d_collections.keys():
            d_collections[label] = []

        d_collections[label].append(shape(feature["geometry"]).buffer(0))

    for label in d_collections.keys():
        d_collections[label] = GeometryCollection(d_collections[label])

    with open(tiles_urlpath, **storage_options) as of:
        df_tiles = pd.read_parquet(of).reset_index().set_index("address")
    l_regional_labels = []
    l_intersection_areas = []

    for _, row in tqdm(df_tiles.iterrows(), total=len(df_tiles)):
        tile_x, tile_y, tile_extent = row.x_coord, row.y_coord, row.xy_extent

        tile_polygon = Polygon(
            [
                (tile_x, tile_y),
                (tile_x, tile_y + tile_extent),
                (tile_x + tile_extent, tile_y + tile_extent),
                (tile_x + tile_extent, tile_y),
            ]
        )

        tile_label = None
        max_overlap = 0.0
        for label in d_collections.keys():
            intersection_area = (
                d_collections[label].intersection(tile_polygon).area / tile_polygon.area
            )
            if intersection_area > max_overlap:
                tile_label, max_overlap = label, intersection_area

        l_regional_labels.append(tile_label)
        l_intersection_areas.append(max_overlap)

    df_tiles["regional_label"] = l_regional_labels
    df_tiles["intersection_area"] = l_intersection_areas

    logger.info(df_tiles.loc[df_tiles.intersection_area > 0])

    return df_tiles

`generate_tile_mask` ¶

`cli(slide_urlpath='???', tiles_urlpath='', label_cols='???', output_urlpath='.', storage_options={}, output_storage_options={})` ¶

Converts categorical tile labels to a slide image mask. This mask can be used for feature extraction and spatial analysis.

Args: slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) tiles_urlpath (str): url/path to valid SlideTiles table label_cols (List[str]): list of label columns in the input_slide_tiles table to generate the mask with output_urlpath (str): output url/path prefix storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions

Returns:

Name	Type	Description
`dict`		output properties

Source code in src/luna/pathology/cli/generate_tile_mask.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    tiles_urlpath: str = "",
    label_cols: List[str] = "???",  # type: ignore
    output_urlpath: str = ".",
    storage_options: dict = {},
    output_storage_options: dict = {},
):
    """Converts categorical tile labels to a slide image mask. This mask can be used for feature extraction and spatial analysis.

     Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        tiles_urlpath (str): url/path to valid SlideTiles table
        label_cols (List[str]): list of label columns in the input_slide_tiles table to generate the mask with
        output_urlpath (str): output url/path prefix
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        dict: output properties

    """
    config = get_config(vars())

    logger.info("Reading SlideTiles")
    with open(config["tiles_urlpath"], "rb", **config["storage_options"]) as of:
        tiles_df = pd.read_parquet(of).reset_index().set_index("address")

    with open(config["slide_urlpath"], **config["storage_options"]) as of:
        slide = tiffslide.TiffSlide(of)
        slide_width = slide.dimensions[0]
        slide_height = slide.dimensions[1]

    mask_arr, mask_values = convert_tiles_to_mask(
        tiles_df,
        slide_width,
        slide_height,
        config["label_cols"],
        config["output_urlpath"],
        config["output_storage_options"],
    )

    fs, output_path = fsspec.core.url_to_fs(config["output_urlpath"])

    slide_mask = Path(output_path) / "tile_mask.tif"
    properties = {
        "slide_mask": fs.unstrip_protocol(str(slide_mask)),
        "mask_values": mask_values,
        "mask_size": mask_arr.shape,
    }
    logger.info(properties)
    return properties

`convert_tiles_to_mask(tiles_df, slide_width, slide_height, label_cols, output_urlpath='', output_storage_options={})` ¶

Converts categorical tile labels to a slide image mask. This mask can be used for feature extraction and spatial analysis.

Args: tiles_df (pd.DataFrame): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) slide_width (int): slide width slide_height (int): slide height label_cols (Union[str, List[str]]): column with labels or list of label columns in the tiles_urlpath table to generate the mask with

Returns:

Type	Description
	np.ndarray, Dict[int, str]: image mask, mask value mapping

Source code in src/luna/pathology/cli/generate_tile_mask.py

@multimethod
@local_cache_urlpath(
    dir_key_write_mode={"output_urlpath": "w"},
)
def convert_tiles_to_mask(
    tiles_df: pd.DataFrame,
    slide_width: int,
    slide_height: int,
    label_cols: Union[str, List[str]],
    output_urlpath: str = "",
    output_storage_options: dict = {},
):
    """Converts categorical tile labels to a slide image mask. This mask can be used for feature extraction and spatial analysis.

     Args:
        tiles_df (pd.DataFrame): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        slide_width (int): slide width
        slide_height (int): slide height
        label_cols (Union[str, List[str]]): column with labels or list of label columns in the tiles_urlpath table to generate the mask with

    Returns:
        np.ndarray, Dict[int, str]: image mask, mask value mapping

    """

    TileSchema.validate(tiles_df.reset_index())

    mask_arr = np.zeros((slide_height, slide_width), dtype=np.int8)

    if type(label_cols) == str:
        uniques = tiles_df[label_cols].unique()
        tiles_df["mask"] = tiles_df[label_cols].astype("category")
        mask_values = {k: v + 1 for v, k in enumerate(uniques)}
    else:
        tiles_df["mask"] = tiles_df[label_cols].idxmax(axis=1)
        tiles_df["mask"] = tiles_df["mask"].astype("category")
    mask_values = dict(zip(tiles_df["mask"], tiles_df["mask"].cat.codes + 1))

    logger.info(f"Mapping label column to mask values: {mask_values}")

    for address, row in tiles_df.iterrows():
        x, y, extent = int(row.x_coord), int(row.y_coord), int(row.xy_extent)

        value = mask_values[row["mask"]]

        # permuted rows and columns due to differences in indexing between openslide and skimage/numpy
        mask_arr[y : y + extent, x : x + extent] = value

        logger.info(f"{address}, {row['mask']}, {value}")

    if output_urlpath:
        slide_mask = Path(output_urlpath) / "tile_mask.tif"
        logger.info(f"Saving output mask to {slide_mask}")
        with open(slide_mask, "wb") as of:
            tifffile.imwrite(of, mask_arr)

    return mask_arr, mask_values

`generate_tiles` ¶

`__generate_tiles(slide_urlpath, tile_size, output_urlpath, force, requested_magnification=None, storage_options={}, output_storage_options={})` ¶

Rasterize a slide into smaller tiles

Tiles addresses and arrays are saved as key-value pairs in (tiles.h5), and the corresponding manifest/header file (tiles.csv) is also generated

Necessary data for the manifest file are: address, tile_image_file, full_resolution_tile_size, tile_image_size_xy

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	slide url/path	required
`tile_size`	`int`	size of tiles to use (at the requested magnification)	required
`requested_magnification`	`float`	Magnification scale at which to perform computation	`None`

Returns:

Type	Description
`dict`	DataFrame[TileSchema]: tile manifest

Source code in src/luna/pathology/cli/generate_tiles.py

def __generate_tiles(
    slide_urlpath: str,
    tile_size: int,
    output_urlpath: str,
    force: bool,
    requested_magnification: Optional[int] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
) -> dict:
    """Rasterize a slide into smaller tiles

    Tiles addresses and arrays are saved as key-value pairs in (tiles.h5),
    and the corresponding manifest/header file (tiles.csv) is also generated

    Necessary data for the manifest file are:
    address, tile_image_file, full_resolution_tile_size, tile_image_size_xy

    Args:
        slide_urlpath (str): slide url/path
        tile_size (int): size of tiles to use (at the requested magnification)
        requested_magnification (float): Magnification scale at which to perform computation

    Returns:
        DataFrame[TileSchema]: tile manifest
    """
    slide_id = Path(slide_urlpath).stem
    ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)
    output_file = str(Path(output_path) / f"{slide_id}.tiles.parquet")
    if not force and ofs.exists(output_file):
        logger.info("Output file exists: {ofs.unstrip_protocol(output_file)}")
        return

    with fsspec.open(slide_urlpath, "rb", **storage_options) as f:
        slide = TiffSlide(f)
        logger.info(f"Slide size = [{slide.dimensions[0]},{slide.dimensions[1]}]")

        to_mag_scale_factor = get_scale_factor_at_magnification(
            slide, requested_magnification=requested_magnification
        )

        if not to_mag_scale_factor % 1 == 0:
            logger.error(f"Bad magnfication scale factor = {to_mag_scale_factor}")
            raise ValueError(
                "You chose a combination of requested tile sizes and magnification that resulted in non-integer tile sizes at different scales"
            )

        full_resolution_tile_size = int(tile_size * to_mag_scale_factor)
        logger.info(
            f"Normalized magnification scale factor for {requested_magnification}x is {to_mag_scale_factor}",
        )
        logger.info(
            f"Requested tile size={tile_size}, tile size at full magnification={full_resolution_tile_size}"
        )

    # get DeepZoomGenerator, level
    full_generator, full_level = get_full_resolution_generator(
        slide_urlpath,
        tile_size=full_resolution_tile_size,
        storage_options=storage_options,
    )
    tile_x_count, tile_y_count = full_generator.level_tiles[full_level]
    logger.info(f"tiles x {tile_x_count}, tiles y {tile_y_count}")

    # populate address, coordinates
    tiles = DataFrame[TileSchema](
        [
            Tile(
                address=coord_to_address(address, requested_magnification),
                x_coord=(address[0]) * full_resolution_tile_size,
                y_coord=(address[1]) * full_resolution_tile_size,
                xy_extent=full_resolution_tile_size,
                tile_size=tile_size,
                tile_units="px",
            ).__dict__
            for address in itertools.product(
                range(1, tile_x_count - 1), range(1, tile_y_count - 1)
            )
        ]
    )

    logger.info(f"Number of tiles in raster: {len(tiles)}")
    #    logger.info("Creating lazy tiles")
    #    lazy_tiles = [
    #            [dask.delayed(get_tile_from_slide)(tiles_df(x, y),
    #                                               full_resolution_tile_size,
    #                                               tile_size,
    #                                               slide)
    #             for y in range(1, tile_y_count - 1)]
    #            for x in range(1, tile_x_count - 1)]
    #    sample = lazy_tiles[0][0].compute()
    #
    #    lazy_arrays = da.stack([
    #        da.stack([da.from_delayed(lazy_tile, dtype=sample.dtype, shape=sample.shape)
    #                        for lazy_tile in inner] )
    #        for inner in lazy_tiles
    #        ])
    #    logger.info(f"lazy tiles: {lazy_arrays.shape}")

    with ofs.open(output_file, mode="wb") as of:
        tiles.to_parquet(of)

    properties = {
        "tiles_url": ofs.unstrip_protocol(
            output_file
        ),  # "Tiles" are the metadata that describe them
        "total_tiles": len(tiles),
    }

    return properties

`cli(slide_urlpath='???', tile_size='???', requested_magnification=None, storage_options={}, output_storage_options={}, dask_options={}, local_config='', output_urlpath='.', force=False)` ¶

Rasterize a slide into smaller tiles, saving tile metadata as rows in a csv file

Necessary data for the manifest file are: address, x_coord, y_coord, xy_extent, tile_size, tile_units

Inputs: input_slide_image: slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) Outputs: slide_tiles Example: generate_tiles 10001.svs -rts 244 -rmg 10 -o 10001/tiles

Source code in src/luna/pathology/cli/generate_tiles.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    tile_size: int = "???",  # type: ignore
    requested_magnification: Optional[int] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    dask_options: dict = {},
    local_config: str = "",
    output_urlpath: str = ".",
    force: bool = False,
) -> dict:
    """Rasterize a slide into smaller tiles, saving tile metadata as rows in a csv file

    Necessary data for the manifest file are:
    address, x_coord, y_coord, xy_extent, tile_size, tile_units

    \b
    Inputs:
        input_slide_image: slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
    Outputs:
        slide_tiles
    \b
    Example:
        generate_tiles 10001.svs
            -rts 244 -rmg 10
            -o 10001/tiles
    """
    config = get_config(vars())

    configure_dask_client(**config["dask_options"])

    properties = __generate_tiles(
        config["slide_urlpath"],
        config["tile_size"],
        config["output_urlpath"],
        config["force"],
        config["requested_magnification"],
        config["storage_options"],
        config["output_storage_options"],
    )

    return properties

`infer_tile_labels` ¶

`__infer_tile_labels(tiles_urlpath, slide_id, output_urlpath, force, torch_model_repo_or_dir, model_name, num_cores, batch_size, kwargs, use_gpu, insecure, storage_options, output_storage_options)` ¶

Run inference using a model and transform definition (either local or using torch.hub)

Decorates existing slide_tiles with additional columns corresponding to class prediction/scores from the model

Parameters:

Name	Type	Description	Default
`tiles_urlpath`	`str`	path to a slide-tile manifest file (.tiles.parquet)	required
`slide_id`	`str`	slide ID	required
`output_urlpath`	`str`	output/working directory	required
`torch_model_repo_or_dir`	`str`	repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)	required
`model_name`	`str`	torch hub model name (a nn.Module at the repo repo_name)	required
`num_cores`	`int`	Number of cores to use for CPU parallelization	required
`batch_size`	`int`	size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)	required
`kwargs`	`dict`	additional keywords to pass to model initialization	required
`use_gpu`	`bool`	use GPU if available	required
`insecure`	`bool`	insecure SSL	required
`storage_options`	`dict`	storage options to pass to reading functions	required
`output_storage_options`	`dict`	storage options to pass to writing functions	required

Returns:

Name	Type	Description
`dict`		metadata

Source code in src/luna/pathology/cli/infer_tile_labels.py

def __infer_tile_labels(
    tiles_urlpath: str,
    slide_id: str,
    output_urlpath: str,
    force: bool,
    torch_model_repo_or_dir: str,
    model_name: str,
    num_cores: int,
    batch_size: int,
    kwargs: dict,
    use_gpu: bool,
    insecure: bool,
    storage_options: dict,
    output_storage_options: dict,
):
    """Run inference using a model and transform definition (either local or using torch.hub)

    Decorates existing slide_tiles with additional columns corresponding to class prediction/scores from the model

    Args:
        tiles_urlpath (str): path to a slide-tile manifest file (.tiles.parquet)
        slide_id (str): slide ID
        output_urlpath (str): output/working directory
        torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
        model_name (str): torch hub model name (a nn.Module at the repo repo_name)
        num_cores (int): Number of cores to use for CPU parallelization
        batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
        kwargs (dict): additional keywords to pass to model initialization
        use_gpu (bool): use GPU if available
        insecure (bool): insecure SSL
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        dict: metadata
    """
    if insecure:
        ssl._create_default_https_context = ssl._create_unverified_context

    ofs, output_path_prefix = fsspec.core.url_to_fs(
        output_urlpath,
        **output_storage_options,
    )

    output_file = str(Path(output_path_prefix) / f"{slide_id}.tiles.parquet")

    if not force and ofs.exists(output_file):
        logger.info(f"outputs already exist: {output_file}")
        return

    tiles_df = (
        pd.read_parquet(tiles_urlpath, storage_options=storage_options)
        .reset_index()
        .set_index("address")
    )

    # Get our model and transforms and construct the Tile Dataset and Classifier
    if os.path.exists(torch_model_repo_or_dir):
        source = "local"
    else:
        source = "github"

    logger.info(f"Torch hub source = {source} @ {torch_model_repo_or_dir}")

    # if source == "github":
    # logger.info(f"Available models: {torch.hub.list(torch_model_repo_or_dir, trust_repo=False)}")

    ttm = torch.hub.load(
        torch_model_repo_or_dir,
        model_name,
        source=source,
        **kwargs,
        force_reload=True,
        trust_repo=True,
    )

    if not isinstance(ttm, TorchTransformModel):
        raise RuntimeError(f"Not a valid model, loaded model was of type {type(ttm)}")

    pin_memory = False
    if use_gpu and torch.cuda.is_available():
        pin_memory = True
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    logger.info(f"Using device = {device}")

    preprocess = ttm.get_preprocess()
    transform = ttm.transform
    ttm.model.to(device)

    ds = HDF5Dataset(tiles_df, preprocess=preprocess, storage_options=storage_options)
    loader = DataLoader(
        ds, num_workers=num_cores, batch_size=batch_size, pin_memory=pin_memory
    )

    # Generate aggregate dataframe
    with torch.no_grad():
        df_scores = pd.concat(
            [
                pd.DataFrame(
                    post_transform_to_2d(transform(data.to(device))), index=index
                )
                for data, index in tqdm(loader, file=sys.stdout)
            ]
        )

    if hasattr(ttm, "column_labels"):
        logger.info(f"Mapping column labels -> {ttm.column_labels}")
        df_scores = df_scores.rename(columns=ttm.column_labels)

    df_output = tiles_df.join(df_scores)
    df_output.columns = df_output.columns.astype(str)
    df_output.index.name = "address"

    logger.info(df_output)

    with ofs.open(output_file, "wb") as of:
        df_output.to_parquet(of)

    # Save our properties and params
    properties = {
        "tiles_url": ofs.unstrip_protocol(output_file),
        "total_tiles": len(df_output),
        "available_labels": list(df_output.columns),
    }

    return properties

`cli(slide_urlpath='', tiles_urlpath='', tile_size=None, filter_query='', requested_magnification=None, torch_model_repo_or_dir='???', model_name='???', num_cores=4, batch_size=8, output_urlpath='.', force=False, kwargs={}, use_gpu=False, dask_options={}, insecure=False, storage_options={}, output_storage_options={})` ¶

Run inference using a model and transform definition (either local or using torch.hub)

Decorates existing slide_tiles with additional columns corresponding to class prediction/scores from the model

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with TiffSlide, .svs, .tif, .scn, ...)	`''`
`tiles_urlpath`	`str`	path to a slide-tile manifest file (.tiles.csv)	`''`
`tile_size`	`Optional[int]`	size of tiles to use (at the requested magnification)	`None`
`filter_query`	`str`	pandas query by which to filter tiles based on their various tissue detection scores	`''`
`requested_magnification`	`Optional[int]`	Magnification scale at which to perform computation	`None`
`torch_model_repo_or_dir`	`str`	repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)	`'???'`
`model_name`	`str`	torch hub model name (a nn.Module at the repo repo_name)	`'???'`
`num_cores`	`int`	Number of cores to use for CPU parallelization	`4`
`batch_size`	`int`	size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)	`8`
`output_urlpath`	`str`	output/working directory	`'.'`
`force`	`bool`	overwrite outputs if they exist	`False`
`kwargs`	`dict`	additional keywords to pass to model initialization	`{}`
`use_gpu`	`bool`	use GPU if available	`False`
`dask_options`	`dict`	options to pass to dask client	`{}`
`insecure`	`bool`	insecure SSL	`False`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`

Returns:

Name	Type	Description
`dict`		metadata

Source code in src/luna/pathology/cli/infer_tile_labels.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "",
    tiles_urlpath: str = "",
    tile_size: Optional[int] = None,
    filter_query: str = "",
    requested_magnification: Optional[int] = None,
    torch_model_repo_or_dir: str = "???",
    model_name: str = "???",
    num_cores: int = 4,
    batch_size: int = 8,
    output_urlpath: str = ".",
    force: bool = False,
    kwargs: dict = {},
    use_gpu: bool = False,
    dask_options: dict = {},
    insecure: bool = False,
    storage_options: dict = {},
    output_storage_options: dict = {},
):
    """Run inference using a model and transform definition (either local or using torch.hub)

    Decorates existing slide_tiles with additional columns corresponding to class prediction/scores from the model

    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with TiffSlide, .svs, .tif, .scn, ...)
        tiles_urlpath (str): path to a slide-tile manifest file (.tiles.csv)
        tile_size (Optional[int]): size of tiles to use (at the requested magnification)
        filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
        requested_magnification (Optional[int]): Magnification scale at which to perform computation
        torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
        model_name (str): torch hub model name (a nn.Module at the repo repo_name)
        num_cores (int): Number of cores to use for CPU parallelization
        batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
        output_urlpath (str): output/working directory
        force (bool): overwrite outputs if they exist
        kwargs (dict): additional keywords to pass to model initialization
        use_gpu (bool): use GPU if available
        dask_options (dict): options to pass to dask client
        insecure (bool): insecure SSL
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        dict: metadata
    """
    config = get_config(vars())
    configure_dask_client(**config["dask_options"])

    if not config["slide_urlpath"] and not config["tiles_urlpath"]:
        raise fire.core.FireError("Specify either tiles_urlpath or slide_urlpath")

    if not config["tile_size"] and not config["tiles_urlpath"]:
        raise fire.core.FireError("Specify either tiles_urlpath or tile_size")

    if config["slide_urlpath"]:
        slide_id = Path(config["slide_urlpath"]).stem
    else:
        slide_id = Path(config["tiles_urlpath"]).stem.removesuffix(".tiles")

    tiles_urlpath = config["tiles_urlpath"]
    with make_temp_directory() as temp_dir:
        if not tiles_urlpath:
            tiles_result = __generate_tiles(
                config["slide_urlpath"],
                config["tile_size"],
                (Path(temp_dir) / "generate_tiles").as_uri(),
                config["force"],
                config["tile_magnification"],
                config["storage_options"],
            )
            detect_tissue_result = __detect_tissue(
                config["slide_urlpath"],
                tiles_result["tiles_url"],
                slide_id,
                config["thumbnail_magnification"],
                config["filter_query"],
                config["batch_size"],
                (Path(temp_dir) / "detect_tissue").as_uri(),
                config["force"],
                config["storage_options"],
            )
            save_tiles_result = _save_tiles(
                detect_tissue_result["tiles_urlpath"],
                config["slide_urlpath"],
                (Path(temp_dir) / "save_tiles").as_uri(),
                config["force"],
                config["batch_size"],
                config["storage_options"],
            )
            tiles_urlpath = save_tiles_result["tiles_url"]

        return __infer_tile_labels(
            tiles_urlpath,
            slide_id,
            config["output_urlpath"],
            config["force"],
            config["torch_model_repo_or_dir"],
            config["model_name"],
            config["num_cores"],
            config["batch_size"],
            config["kwargs"],
            config["use_gpu"],
            config["insecure"],
            config["storage_options"],
            config["output_storage_options"],
        )

`infer_tile_labels(slide_manifest, tile_size=None, filter_query='', thumbnail_magnification=None, tile_magnification=None, torch_model_repo_or_dir='', model_name='', num_cores=1, batch_size=2000, output_urlpath='.', force=True, kwargs={}, use_gpu=False, insecure=False, storage_options={}, output_storage_options={})` ¶

Run inference using a model and transform definition (either local or using torch.hub)

Decorates existing tiles manifests with additional columns corresponding to class prediction/scores from the model

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame`	slide manifest from slide_etl	required
`tile_size`	`Optional[int]`	size of tiles to use (at the requested magnification)	`None`
`filter_query`	`str`	pandas query by which to filter tiles based on their various tissue detection scores	`''`
`thumbnail_magnification`	`Optional[int]`	Magnification scale at which to detect tissue	`None`
`tile_magnification`	`Optional[int]`	Magnification scale at which to generate tiles	`None`
`torch_model_repo_or_dir`	`str`	repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)	`''`
`model_name`	`str`	torch hub model name (a nn.Module at the repo repo_name)	`''`
`num_cores`	`int`	Number of cores to use for CPU parallelization	`1`
`batch_size`	`int`	size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)	`2000`
`output_urlpath`	`str`	output/working directory	`'.'`
`force`	`bool`	overwrite outputs if they exist	`True`
`kwargs`	`dict`	additional keywords to pass to model initialization	`{}`
`use_gpu`	`bool`	use GPU if available	`False`
`insecure`	`bool`	insecure SSL	`False`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`

Returns:

Type	Description
`DataFrame[SlideSchema]`	pd.DataFrame: slide manifest

Source code in src/luna/pathology/cli/infer_tile_labels.py

def infer_tile_labels(
    slide_manifest: DataFrame[SlideSchema],
    tile_size: Optional[int] = None,
    filter_query: str = "",
    thumbnail_magnification: Optional[int] = None,
    tile_magnification: Optional[int] = None,
    torch_model_repo_or_dir: str = "",
    model_name: str = "",
    num_cores: int = 1,
    batch_size: int = 2000,
    output_urlpath: str = ".",
    force: bool = True,
    kwargs: dict = {},
    use_gpu: bool = False,
    insecure: bool = False,
    storage_options: dict = {},
    output_storage_options: dict = {},
) -> DataFrame[SlideSchema]:
    """Run inference using a model and transform definition (either local or using torch.hub)

    Decorates existing tiles manifests with additional columns corresponding to class prediction/scores from the model

    Args:
        slide_manifest (DataFrame): slide manifest from slide_etl
        tile_size (Optional[int]): size of tiles to use (at the requested magnification)
        filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
        thumbnail_magnification (Optional[int]): Magnification scale at which to detect tissue
        tile_magnification (Optional[int]): Magnification scale at which to generate tiles
        torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
        model_name (str): torch hub model name (a nn.Module at the repo repo_name)
        num_cores (int): Number of cores to use for CPU parallelization
        batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
        output_urlpath (str): output/working directory
        force (bool): overwrite outputs if they exist
        kwargs (dict): additional keywords to pass to model initialization
        use_gpu (bool): use GPU if available
        insecure (bool): insecure SSL
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        pd.DataFrame: slide manifest
    """
    client = get_or_create_dask_client()

    if "tiles_url" not in slide_manifest.columns:
        if tile_size is None:
            raise RuntimeError("Need to have generated tiles or specify tile_size")
        # generate tiles
        slide_manifest = detect_tissue(
            slide_manifest,
            None,
            tile_size=tile_size,
            thumbnail_magnification=thumbnail_magnification,
            tile_magnification=tile_magnification,
            filter_query=filter_query,
            batch_size=batch_size,
            storage_options=storage_options,
            output_urlpath=output_urlpath,
            force=force,
            output_storage_options=output_storage_options,
        )

        slide_manifest = save_tiles(
            slide_manifest,
            output_urlpath,
            force,
            batch_size,
            storage_options,
            output_storage_options,
        )

    futures = []
    for row in slide_manifest.itertuples(name="Slide"):
        future = client.submit(
            __infer_tile_labels,
            row.tiles_url,
            row.id,
            output_urlpath,
            force,
            torch_model_repo_or_dir,
            model_name,
            num_cores,
            batch_size,
            kwargs,
            use_gpu,
            insecure,
            storage_options,
            output_storage_options,
        )
        futures.append(future)

    progress(futures)
    results = client.gather(futures)
    return slide_manifest.assign(tiles_url=[x["tiles_url"] for x in results])

`merge_shape_features` ¶

`cli(shape_features_urlpaths='???', output_urlpath='.', flatten_index=True, fraction_not_null=0.5, storage_options={}, output_storage_options={}, local_config='')` ¶

Merges shape features dataframes

Parameters:

Name	Type	Description	Default
`shape_features_urlpaths`	`List[str]`	URL/paths to shape featurs parquet files	`'???'`
`output_urlpath`	`str`	URL/path to output parquet file	`'.'`
`fraction_not_null`	`float`	fraction not null to keep column to keep in wide format	`0.5`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`
`local_config`	`str`	local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		output paths and the number of features generated

Source code in src/luna/pathology/cli/merge_shape_features.py

@timed
@save_metadata
def cli(
    shape_features_urlpaths: Union[str, List[str]] = "???",
    output_urlpath: str = ".",
    flatten_index: bool = True,
    fraction_not_null: float = 0.5,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Merges shape features dataframes

    Args:
        shape_features_urlpaths (List[str]): URL/paths to shape featurs parquet files
        output_urlpath (str): URL/path to output parquet file
        fraction_not_null (float): fraction not null to keep column to keep in wide format
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): local config yaml file

    Returns:
        dict: output paths and the number of features generated
    """
    config = get_config(vars())

    dfs = []  # type: list[str]
    if type(config["shape_features_urlpaths"]) == list:
        for urlpath in config["shape_features_urlpaths"]:
            fs, path = fsspec.core.url_to_fs(urlpath, **config["storage_options"])
            with fs.open(path, "rb") as of:
                df = pd.read_parquet(of)
            dfs.append(df)
    else:
        fs, path_prefix = fsspec.core.url_to_fs(
            config["shape_features_urlpaths"], **config["storage_options"]
        )
        for path in fs.glob(f"{path_prefix}/**/shape_features.parquet"):
            with fs.open(path, "rb") as of:
                df = pd.read_parquet(of)
            dfs.append(df)

    df = pd.concat(dfs)
    fs, path_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )
    path = Path(path_prefix) / "long_shape_features.parquet"

    with fs.open(path, "wb", **config["output_storage_options"]) as of:
        df.to_parquet(of)

    df.variable = (
        df.variable.str.replace("µ", "u")
        .replace(r"(: |:)", " ", regex=True)
        .replace("[^a-zA-Z0-9 \n]", "", regex=True)
    )
    wide_path = Path(path_prefix) / "wide_shape_features.parquet"
    wide_df = df.pivot(
        index="slide_id", columns=["Parent", "Class", "variable"], values="value"
    )
    wide_df = wide_df.loc[
        :, wide_df.isna().sum() < len(wide_df) * config["fraction_not_null"]
    ]
    if config["flatten_index"]:
        wide_df.columns = ["_".join(col).strip() for col in wide_df.columns.values]
        wide_df.columns = wide_df.columns.str.replace(" ", "_")

    with fs.open(wide_path, "wb", **config["output_storage_options"]) as of:
        wide_df.to_parquet(of)

    return {
        "long_shape_features": fs.unstrip_protocol(str(path)),
        "wide_shape_features": fs.unstrip_protocol(str(wide_path)),
        "num_features": len(wide_df.columns),
    }

`run_stardist_cell_detection` ¶

`__stardist_cell_lymphocyte(slide_urlpath, output_urlpath, slide_id, num_cores, use_gpu=False, image='mskmind/qupath-stardist:0.4.3', use_singularity=False, max_heap_size='64G', storage_options={}, output_storage_options={})` ¶

Run stardist using qupath CLI

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	required
`output_urlpath`	`str`	output url/path	required
`num_cores`	`int`	Number of cores to use for CPU parallelization	required
`use_gpu`	`bool`	use GPU	`False`
`image`	`str`	docker/singularity image	`'mskmind/qupath-stardist:0.4.3'`
`use_singularity`	`bool`	use singularity instead of docker	`False`
`max_heap_size`	`str`	maximum heap size to pass to java options	`'64G'`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`

Returns:

Name	Type	Description
`dict`	`dict`	run metadata

Source code in src/luna/pathology/cli/run_stardist_cell_detection.py

@local_cache_urlpath(
    file_key_write_mode={"slide_urlpath": "r"},
    dir_key_write_mode={"output_urlpath": "w"},
)
def __stardist_cell_lymphocyte(
    slide_urlpath: str,
    output_urlpath: str,
    slide_id: str,
    num_cores: int,
    use_gpu: bool = False,
    image: str = "mskmind/qupath-stardist:0.4.3",
    use_singularity: bool = False,
    max_heap_size: str = "64G",
    storage_options: dict = {},
    output_storage_options: dict = {},
) -> dict:
    """Run stardist using qupath CLI

    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        output_urlpath (str): output url/path
        num_cores (int): Number of cores to use for CPU parallelization
        use_gpu (bool): use GPU
        image (str): docker/singularity image
        use_singularity (bool): use singularity instead of docker
        max_heap_size (str): maximum heap size to pass to java options
        storage_options (dict): storage options to pass to reading functions

    Returns:
        dict: run metadata
    """
    fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options)
    ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)

    output_header_file = Path(output_path) / f"{slide_id}_cell_objects.parquet"
    if ofs.exists(output_header_file):
        logger.info(f"outputs already exist: {output_header_file}")
        return

    if ofs.protocol == "file" and not ofs.exists(output_path):
        ofs.mkdir(output_path)

    qupath_cmd = "QuPath-cpu"
    if use_gpu:
        qupath_cmd = "QuPath-gpu"

    runner_type = "DOCKER"
    if use_singularity:
        runner_type = "SINGULARITY"

    slide_filename = Path(slide_path).name
    command = f"{qupath_cmd} script --image /inputs/{slide_filename} /scripts/stardist_nuclei_and_lymphocytes.groovy"
    logger.info(f"Launching {runner_type} container:")
    logger.info(
        f"\tvolumes={slide_path}:'/inputs/{slide_filename}', {output_path}:'/output_dir'"
    )
    logger.info(f"\tnano_cpus={int(num_cores * 1e9)}")
    logger.info(f"\timage='{image}'")
    logger.info(f"\tcommand={command}")

    volumes_map = {
        slide_path: f"/inputs/{slide_filename}",
        output_path: "/output_dir",
    }

    runner_config = {
        "image": image,
        "command": command,
        "num_cores": num_cores,
        "max_heap_size": max_heap_size,
        "volumes_map": volumes_map,
        "use_gpu": use_gpu,
    }
    runner = runner_provider.get(runner_type, **runner_config)
    executor = runner.run()
    try:
        for line in executor:
            logger.info(line)
    except TypeError:
        print(executor, "is not iterable")

    stardist_output = Path(output_path) / "cell_detections.tsv"

    df = pd.read_csv(stardist_output, sep="\t")
    df.index = "cell-" + df.index.astype(int).astype(str)
    df.index.rename("cell_id", inplace=True)

    df = df.rename(
        columns={"Centroid X µm": "x_coord", "Centroid Y µm": "y_coord"}
    )  # x,ys follow this convention

    with fs.open(output_header_file, "wb") as of:
        df.to_parquet(of)

    logger.info("generated cell data:")
    logger.info(df)

    output_geojson_file = Path(output_path) / "cell_detections.geojson"

    properties = {
        "geojson_url": ofs.unstrip_protocol(str(output_geojson_file)),
        "tsv_url": ofs.unstrip_protocol(str(stardist_output)),
        "parquet_url": ofs.unstrip_protocol(str(output_header_file)),
        "spatial": True,
        "total_cells": len(df),
    }

    return properties

`__stardist_simple(slide_urlpath, cell_expansion_size, image_type, output_urlpath, debug_opts, num_cores, image, use_singularity, max_heap_size, storage_options, output_storage_options)` ¶

Run stardist using qupath CLI on slides in a slide manifest from slide_etl. URIs to resulting GeoJSON will be stored in a specified column of the returned slide manifest.

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	required
`cell_expansion_size`	`float`	size in pixels to expand cell cytoplasm	required
`num_cores`	`int`	Number of cores to use for CPU parallelization	required
`image_type`	`str`	qupath image type (BRIGHTFIELD_H_DAB)	required
`output_urlpath`	`str`	output url/path	required
`debug_opts`	`str`	debug options passed as arguments to groovy script	required
`image`	`str`	docker/singularity image	required
`use_singularity`	`bool`	use singularity instead of docker	required
`max_heap_size`	`str`	maximum heap size to pass to java options	required
`storage_options`	`dict`	storage options to pass to reading functions	required
`output_storage_options`	`dict`	storage options to pass to writing functions	required

Returns:

Name	Type	Description
`dict`	`dict`	run metadata

Source code in src/luna/pathology/cli/run_stardist_cell_detection.py

@local_cache_urlpath(
    file_key_write_mode={"slide_urlpath": "r"},
    dir_key_write_mode={"output_urlpath": "w"},
)
def __stardist_simple(
    slide_urlpath: str,
    cell_expansion_size: float,
    image_type: str,
    output_urlpath: str,
    debug_opts: str,
    num_cores: int,
    image: str,
    use_singularity: bool,
    max_heap_size: str,
    storage_options: dict,
    output_storage_options: dict,
) -> dict:
    """Run stardist using qupath CLI on slides in a slide manifest from
    slide_etl. URIs to resulting GeoJSON will be stored in a specified column
    of the returned slide manifest.

    Args:
        slide_urlpath (str): path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        cell_expansion_size (float): size in pixels to expand cell cytoplasm
        num_cores (int): Number of cores to use for CPU parallelization
        image_type (str): qupath image type (BRIGHTFIELD_H_DAB)
        output_urlpath (str): output url/path
        debug_opts (str): debug options passed as arguments to groovy script
        image (str): docker/singularity image
        use_singularity (bool): use singularity instead of docker
        max_heap_size (str): maximum heap size to pass to java options
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        dict: run metadata
    """
    fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options)
    ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)

    slide_id = Path(slide_urlpath).stem
    output_header_file = Path(output_path) / f"{slide_id}_cell_objects.parquet"
    if ofs.exists(output_header_file):
        logger.info(f"outputs already exist: {output_header_file}")
        return

    if ofs.protocol == "file" and not ofs.exists(output_path):
        ofs.mkdir(output_path)

    runner_type = "DOCKER"
    if use_singularity:
        runner_type = "SINGULARITY"

    slide_filename = Path(slide_path).name
    command = f"echo QuPath script --image /inputs/{slide_filename} --args [cellSize={cell_expansion_size},imageType={image_type},{debug_opts}] /scripts/stardist_simple.groovy"
    logger.info(f"Launching QuPath via {runner_type}:{image} ...")
    logger.info(
        f"\tvolumes={slide_urlpath}:'/inputs/{slide_filename}', {slide_path}:'/output_dir'"
    )
    logger.info(f"\tnano_cpus={int(num_cores * 1e9)}")
    logger.info(f"\timage='{image}'")
    logger.info(f"\tcommand={command}")

    volumes_map = {
        slide_path: f"/inputs/{slide_filename}",
        output_path: "/output_dir",
    }

    runner_config = {
        "image": image,
        "command": command,
        "num_cores": num_cores,
        "max_heap_size": max_heap_size,
        "volumes_map": volumes_map,
    }
    runner = runner_provider.get(runner_type, **runner_config)
    executor = runner.run()
    try:
        for line in executor:
            logger.info(line)
    except TypeError:
        print(executor, "is not iterable")

    stardist_output = Path(output_path) / "cell_detections.tsv"

    df = pd.read_csv(stardist_output, sep="\t")
    df.index = "cell-" + df.index.astype(int).astype(str)
    df.index.rename("cell_id", inplace=True)

    df = df.rename(
        columns={"Centroid X µm": "x_coord", "Centroid Y µm": "y_coord"}
    )  # x,ys follow this convention

    with ofs.open(output_header_file, "wb") as of:
        df.to_parquet(of)

    logger.info("generated cell data:")
    logger.info(df)

    output_geojson_file = Path(output_path) / "cell_detections.geojson"

    properties = {
        "geojson_url": ofs.unstrip_protocol(str(output_geojson_file)),
        "tsv_url": ofs.unstrip_protocol(str(stardist_output)),
        "parquet_url": ofs.unstrip_protocol(str(output_header_file)),
        "spatial": True,
        "total_cells": len(df),
    }

    return properties

`stardist_cell_lymphocyte(slide_manifest, output_urlpath, num_cores, use_gpu=False, image='mskmind/qupath-stardist:0.4.3', use_singularity=False, max_heap_size='64G', storage_options={}, output_storage_options={}, annotation_column='lymphocyte_geojson_url')` ¶

Run stardist using qupath CLI

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`output_urlpath`	`str`	output url/path	required
`num_cores`	`int`	Number of cores to use for CPU parallelization	required
`use_gpu`	`bool`	use GPU	`False`
`image`	`str`	docker/singularity image	`'mskmind/qupath-stardist:0.4.3'`
`use_singularity`	`bool`	use singularity instead of docker	`False`
`max_heap_size`	`str`	maximum heap size to pass to java options	`'64G'`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`
`annotation_column`	`str`	name of column in resulting slide manifest to store GeoJson URIs	`'lymphocyte_geojson_url'`

Returns:

Type	Description
`DataFrame[SlideSchema]`	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/run_stardist_cell_detection.py

def stardist_cell_lymphocyte(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    num_cores: int,
    use_gpu: bool = False,
    image: str = "mskmind/qupath-stardist:0.4.3",
    use_singularity: bool = False,
    max_heap_size: str = "64G",
    storage_options: dict = {},
    output_storage_options: dict = {},
    annotation_column: str = "lymphocyte_geojson_url",
) -> DataFrame[SlideSchema]:
    """Run stardist using qupath CLI

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (str): output url/path
        num_cores (int): Number of cores to use for CPU parallelization
        use_gpu (bool): use GPU
        image (str): docker/singularity image
        use_singularity (bool): use singularity instead of docker
        max_heap_size (str): maximum heap size to pass to java options
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        annotation_column (str): name of column in resulting slide manifest to store GeoJson URIs

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    client = get_or_create_dask_client()

    futures = []
    for row in slide_manifest.itertuples(name="Slide"):
        fs, output_path = fsspec.core.url_to_fs(
            output_urlpath, **output_storage_options
        )
        future = client.submit(
            __stardist_cell_lymphocyte,
            row.url,
            fs.unstrip_protocol(str(Path(output_path) / row.id)),
            row.id,
            num_cores,
            use_gpu,
            image,
            use_singularity,
            max_heap_size,
            storage_options,
            output_storage_options,
        )
        futures.append(future)
    results = client.gather(futures)
    return slide_manifest.assign(
        **{annotation_column: [x["geojson_url"] for x in results]}
    )

`stardist_cell_lymphocyte_cli(slide_urlpath='???', output_urlpath='.', num_cores=1, use_gpu=False, image='mskmind/qupath-stardist:0.4.3', use_singularity=False, max_heap_size='64G', storage_options={}, output_storage_options={})` ¶

Run stardist using qupath CLI

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	`'???'`
`output_urlpath`	`str`	output url/path	`'.'`
`num_cores`	`int`	Number of cores to use for CPU parallelization	`1`
`use_gpu`	`bool`	use GPU	`False`
`image`	`str`	docker/singularity image	`'mskmind/qupath-stardist:0.4.3'`
`use_singularity`	`bool`	use singularity instead of docker	`False`
`max_heap_size`	`str`	maximum heap size to pass to java options	`'64G'`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`

Returns:

Name	Type	Description
`dict`	`dict`	run metadata

Source code in src/luna/pathology/cli/run_stardist_cell_detection.py

@timed
@save_metadata
def stardist_cell_lymphocyte_cli(
    slide_urlpath: str = "???",
    output_urlpath: str = ".",
    num_cores: int = 1,
    use_gpu: bool = False,
    image: str = "mskmind/qupath-stardist:0.4.3",
    use_singularity: bool = False,
    max_heap_size: str = "64G",
    storage_options: dict = {},
    output_storage_options: dict = {},
) -> dict:
    """Run stardist using qupath CLI

    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        output_urlpath (str): output url/path
        num_cores (int): Number of cores to use for CPU parallelization
        use_gpu (bool): use GPU
        image (str): docker/singularity image
        use_singularity (bool): use singularity instead of docker
        max_heap_size (str): maximum heap size to pass to java options
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions

    Returns:
        dict: run metadata
    """
    config = get_config(vars())
    slide_id = Path(config["slide_urlpath"]).stem
    properties = __stardist_cell_lymphocyte(
        config["slide_urlpath"],
        config["output_urlpath"],
        slide_id,
        config["num_cores"],
        config["use_gpu"],
        config["image"],
        config["use_singularity"],
        config["max_heap_size"],
        config["storage_options"],
        config["output_storage_options"],
    )
    return properties

`stardist_simple(slide_manifest, cell_expansion_size, image_type, output_urlpath, debug_opts, num_cores, image, use_singularity, max_heap_size, storage_options, output_storage_options, annotation_column='stardist_geojson_url')` ¶

Run stardist using qupath CLI on slides in a slide manifest from slide_etl. URIs to resulting GeoJSON will be stored in a specified column of the returned slide manifest.

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`cell_expansion_size`	`float`	size in pixels to expand cell cytoplasm	required
`image_type`	`str`	qupath image type (BRIGHTFIELD_H_DAB)	required
`output_urlpath`	`str`	output url/path	required
`debug_opts`	`str`	debug options passed as arguments to groovy script	required
`num_cores`	`int`	Number of cores to use for CPU parallelization	required
`image`	`str`	docker/singularity image	required
`use_singularity`	`bool`	use singularity instead of docker	required
`max_heap_size`	`str`	maximum heap size to pass to java options	required
`storage_options`	`dict`	storage options to pass to reading functions	required
`output_storage_options`	`dict`	storage options to pass to writing functions	required
`annotation_column`	`str`	name of column in resulting slide manifest to store GeoJson URIs	`'stardist_geojson_url'`

Returns:

Type	Description
`DataFrame[SlideSchema]`	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/run_stardist_cell_detection.py

def stardist_simple(
    slide_manifest: DataFrame[SlideSchema],
    cell_expansion_size: float,
    image_type: str,
    output_urlpath: str,
    debug_opts: str,
    num_cores: int,
    image: str,
    use_singularity: bool,
    max_heap_size: str,
    storage_options: dict,
    output_storage_options: dict,
    annotation_column: str = "stardist_geojson_url",
) -> DataFrame[SlideSchema]:
    """Run stardist using qupath CLI on slides in a slide manifest from
    slide_etl. URIs to resulting GeoJSON will be stored in a specified column
    of the returned slide manifest.

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        cell_expansion_size (float): size in pixels to expand cell cytoplasm
        image_type (str): qupath image type (BRIGHTFIELD_H_DAB)
        output_urlpath (str): output url/path
        debug_opts (str): debug options passed as arguments to groovy script
        num_cores (int): Number of cores to use for CPU parallelization
        image (str): docker/singularity image
        use_singularity (bool): use singularity instead of docker
        max_heap_size (str): maximum heap size to pass to java options
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        annotation_column (str): name of column in resulting slide manifest to store GeoJson URIs

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """

    client = get_or_create_dask_client()

    futures = []
    for row in slide_manifest.itertuples(name="Slide"):
        future = client.submit(
            __stardist_simple,
            row.url,
            cell_expansion_size,
            image_type,
            output_urlpath,
            debug_opts,
            num_cores,
            image,
            use_singularity,
            max_heap_size,
            storage_options,
            output_storage_options,
        )
        futures.append(future)
    results = client.gather(futures)
    return slide_manifest.assign(
        **{annotation_column: [x["geojson_url"] for x in results]}
    )

`stardist_simple_cli(slide_urlpath='???', cell_expansion_size='???', image_type='???', output_urlpath='.', debug_opts='', num_cores=1, image='mskmind/qupath-stardist:0.4.3', use_singularity=False, max_heap_size='64G', storage_options={}, output_storage_options={}, local_config='')` ¶

Run stardist using qupath CLI

Parameters:

Name	Type	Description	Default
`input_slide_image`	`str`	path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	required
`cell_expansion_size`	`float`	size in pixels to expand cell cytoplasm	`'???'`
`num_cores`	`int`	Number of cores to use for CPU parallelization	`1`
`image_type`	`str`	qupath image type (BRIGHTFIELD_H_DAB)	`'???'`
`output_urlpath`	`str`	output url/path	`'.'`
`debug_opts`	`str`	debug options passed as arguments to groovy script	`''`
`image`	`str`	docker/singularity image	`'mskmind/qupath-stardist:0.4.3'`
`use_singularity`	`bool`	use singularity instead of docker	`False`
`max_heap_size`	`str`	maximum heap size to pass to java options	`'64G'`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`
`local_config`	`str`	local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/run_stardist_cell_detection.py

@timed
@save_metadata
def stardist_simple_cli(
    slide_urlpath: str = "???",
    cell_expansion_size: float = "???",  # type: ignore
    image_type: str = "???",
    output_urlpath: str = ".",
    debug_opts: str = "",
    num_cores: int = 1,
    image: str = "mskmind/qupath-stardist:0.4.3",
    use_singularity: bool = False,
    max_heap_size: str = "64G",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Run stardist using qupath CLI

    Args:
        input_slide_image (str): path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        cell_expansion_size (float): size in pixels to expand cell cytoplasm
        num_cores (int): Number of cores to use for CPU parallelization
        image_type (str): qupath image type (BRIGHTFIELD_H_DAB)
        output_urlpath (str): output url/path
        debug_opts (str): debug options passed as arguments to groovy script
        image (str): docker/singularity image
        use_singularity (bool): use singularity instead of docker
        max_heap_size (str): maximum heap size to pass to java options
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): local config yaml file

    Returns:
        dict: metadata about function call
    """

    config = get_config(vars())

    return __stardist_simple(
        config["slide_urlpath"],
        config["cell_expansion_size"],
        config["image_type"],
        config["output_urlpath"],
        config["debug_opts"],
        config["num_cores"],
        config["image"],
        config["use_singularity"],
        config["max_heap_size"],
        config["storage_options"],
        config["output_storage_options"],
    )

`run_tissue_detection` ¶

`cli(slide_urlpath='???', tiles_urlpath='', filter_query='???', tile_size=None, thumbnail_magnification=None, tile_magnification=None, batch_size=2000, output_urlpath='.', force=False, dask_options={}, storage_options={}, output_storage_options={}, local_config='')` ¶

Run simple/deterministic tissue detection algorithms based on a filter query, to reduce tiles to those (likely) to contain actual tissue Args: slide_urlpath (str): url/path to slide image (virtual slide formats compatible with pyvips, .svs, .tif, .scn, ...) tiles_urlpath (str): url/path to tiles manifest (parquet) filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores tile_size (int): size of tiles to use (at the requested magnification) thumbnail_magnification (Optional[int]): Magnification scale at which to create thumbnail for tissue detection tile_magnification (Optional[int]): Magnification scale at which to generate tiles batch_size (int): batch size for processing output_urlpath (str): Output url/path force (bool): overwrite outputs if they exist dask_options (dict): dask options storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions local_config (str): local config file Returns: dict: metadata about cli function call

Source code in src/luna/pathology/cli/run_tissue_detection.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    tiles_urlpath: str = "",
    filter_query: str = "???",
    tile_size: Optional[int] = None,
    thumbnail_magnification: Optional[int] = None,
    tile_magnification: Optional[int] = None,
    batch_size: int = 2000,
    output_urlpath: str = ".",
    force: bool = False,
    dask_options: dict = {},
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
) -> dict:
    """Run simple/deterministic tissue detection algorithms based on a filter query, to reduce tiles to those (likely) to contain actual tissue
    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with pyvips, .svs, .tif, .scn, ...)
        tiles_urlpath (str): url/path to tiles manifest (parquet)
        filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
        tile_size (int): size of tiles to use (at the requested magnification)
        thumbnail_magnification (Optional[int]): Magnification scale at which to create thumbnail for tissue detection
        tile_magnification (Optional[int]): Magnification scale at which to generate tiles
        batch_size (int): batch size for processing
        output_urlpath (str): Output url/path
        force (bool): overwrite outputs if they exist
        dask_options (dict): dask options
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): local config file
    Returns:
        dict: metadata about cli function call

    """
    config = get_config(vars())

    configure_dask_client(**config["dask_options"])

    if not config["tile_size"] and not config["tiles_urlpath"]:
        raise fire.core.FireError("Specify either tiles_urlpath or tile_size")

    slide_id = Path(config["slide_urlpath"]).stem

    tiles_urlpath = config["tiles_urlpath"]

    with make_temp_directory() as temp_dir:
        if not tiles_urlpath:
            result = __generate_tiles(
                config["slide_urlpath"],
                config["tile_size"],
                temp_dir,
                config["force"],
                config["tile_magnification"],
                config["storage_options"],
            )
            tiles_urlpath = result["tiles_url"]

        properties = __detect_tissue(
            config["slide_urlpath"],
            tiles_urlpath,
            slide_id,
            config["thumbnail_magnification"],
            config["filter_query"],
            config["batch_size"],
            config["output_urlpath"],
            config["force"],
            config["storage_options"],
            config["output_storage_options"],
        )

    return properties

`compute_otsu_score(tile, slide_path, otsu_threshold)` ¶

Return otsu score for the tile. Args: row (pd.Series): row with tile metadata slide_path (str): path to slide otsu_threshold (float): otsu threshold value

Source code in src/luna/pathology/cli/run_tissue_detection.py

def compute_otsu_score(tile: Tile, slide_path: str, otsu_threshold: float) -> float:
    """
    Return otsu score for the tile.
    Args:
        row (pd.Series): row with tile metadata
        slide_path (str): path to slide
        otsu_threshold (float): otsu threshold value
    """
    with TiffSlide(slide_path) as slide:
        tile_arr = get_array_from_tile(tile, slide, 10)
    score = np.mean((rgb2gray(tile_arr) < otsu_threshold).astype(int))
    return score

`compute_purple_score(tile, slide_path)` ¶

Return purple score for the tile. Args: row (pd.Series): row with tile metadata slide_url (str): path to slide

Source code in src/luna/pathology/cli/run_tissue_detection.py

def compute_purple_score(
    tile: Tile,
    slide_path: str,
) -> float:
    """
    Return purple score for the tile.
    Args:
        row (pd.Series): row with tile metadata
        slide_url (str): path to slide
    """
    with TiffSlide(slide_path) as slide:
        tile_arr = get_array_from_tile(tile, slide, 10)
    return get_purple_score(tile_arr)

`compute_stain_score(tile, slide_path, vectors, channel, stain_threshold)` ¶

Returns stain score for the tile Args: row (pd.Series): row with tile metadata slide_url (str): path to slide vectors (np.ndarray): stain vectors channel (int): stain channel stain_threshold (float): stain threshold value

Source code in src/luna/pathology/cli/run_tissue_detection.py

def compute_stain_score(
    tile: Tile,
    slide_path: str,
    vectors,
    channel,
    stain_threshold: float,
) -> np.floating:
    """
    Returns stain score for the tile
    Args:
        row (pd.Series): row with tile metadata
        slide_url (str): path to slide
        vectors (np.ndarray): stain vectors
        channel (int): stain channel
        stain_threshold (float): stain threshold value
    """
    with TiffSlide(slide_path) as slide:
        tile_arr = get_array_from_tile(tile, slide, 10)
    stain = pull_stain_channel(tile_arr, vectors=vectors, channel=channel)
    score = np.mean(stain > stain_threshold)
    return score

`detect_tissue(slide_manifest, tile_size=None, thumbnail_magnification=None, tile_magnification=None, filter_query='', batch_size=2000, force=True, storage_options={}, output_urlpath='.', output_storage_options={})` ¶

Run simple/deterministic tissue detection algorithms based on a filter query, to reduce tiles to those (likely) to contain actual tissue Args: slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl tile_size (int): size of tiles to use (at the requested magnification) thumbnail_magnification (Optional[int]): Magnification scale at which to create thumbnail for tissue detection tile_magnification (Optional[int]): Magnification scale at which to generate tiles filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores batch_size (int): batch size for processing force (bool): overwite outputs if they exist storage_options (dict): storage options to pass to reading functions output_urlpath (str): Output url/path output_storage_options (dict): storage options to pass to writing functions Returns: DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/run_tissue_detection.py

def detect_tissue(
    slide_manifest: DataFrame[SlideSchema],
    tile_size: Optional[int] = None,
    thumbnail_magnification: Optional[int] = None,
    tile_magnification: Optional[int] = None,
    filter_query: str = "",
    batch_size: int = 2000,
    force: bool = True,
    storage_options: dict = {},
    output_urlpath: str = ".",
    output_storage_options: dict = {},
) -> DataFrame[SlideSchema]:
    """Run simple/deterministic tissue detection algorithms based on a filter query, to reduce tiles to those (likely) to contain actual tissue
    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        tile_size (int): size of tiles to use (at the requested magnification)
        thumbnail_magnification (Optional[int]): Magnification scale at which to create thumbnail for tissue detection
        tile_magnification (Optional[int]): Magnification scale at which to generate tiles
        filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
        batch_size (int): batch size for processing
        force (bool): overwite outputs if they exist
        storage_options (dict): storage options to pass to reading functions
        output_urlpath (str): Output url/path
        output_storage_options (dict): storage options to pass to writing functions
    Returns:
        DataFrame[SlideSchema]: slide manifest

    """
    client = get_or_create_dask_client()

    with make_temp_directory() as temp_dir:
        if "tiles_url" not in slide_manifest.columns:
            slide_manifest = generate_tiles(
                slide_manifest,
                tile_size,
                temp_dir,
                tile_magnification,
                storage_options,
            )

        futures = []
        for slide in slide_manifest.itertuples(name="Slide"):
            future = client.submit(
                __detect_tissue,
                slide.url,
                slide.tiles_url,
                slide.id,
                thumbnail_magnification,
                filter_query,
                batch_size,
                output_urlpath,
                force,
                storage_options,
                output_storage_options,
            )
            futures.append(future)
        progress(futures)

        results = client.gather(futures)

        slide_manifest = slide_manifest.assign(
            tiles_url=[x["tiles_url"] for x in results]
        )

    return slide_manifest

`save_tiles` ¶

`__save_tiles(tiles_urlpath, slide_urlpath, output_h5_path, batch_size=2000, storage_options={}, output_storage_options={})` ¶

Saves tiles to disk

Tiles addresses and arrays are saved as key-value pairs in (tiles.h5), and the corresponding manifest/header file (tiles.parquet) is also generated

Parameters:

Name	Type	Description	Default
`tiles_urlpath`	`str`	tile manifest	required
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	required
`output_urlpath`	`str`	output url/path	required
`batch_size`	`int`	size in batch dimension to chuck jobs	`2000`
`output_storage_options`	`dict`	storage options to writing functions	`{}`

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/save_tiles.py

@local_cache_urlpath(
    file_key_write_mode={
        "slide_urlpath": "r",
        "output_h5_path": "w",
    },
)
def __save_tiles(
    tiles_urlpath: str,
    slide_urlpath: str,
    output_h5_path: str,
    batch_size: int = 2000,
    storage_options: dict = {},
    output_storage_options: dict = {},
):
    """Saves tiles to disk

    Tiles addresses and arrays are saved as key-value pairs in (tiles.h5),
    and the corresponding manifest/header file (tiles.parquet) is also generated

    Args:
        tiles_urlpath (str): tile manifest
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        output_urlpath (str): output url/path
        batch_size (int): size in batch dimension to chuck jobs
        output_storage_options (dict): storage options to writing functions

    Returns:
        dict: metadata about function call
    """

    tiles_df = pd.read_parquet(tiles_urlpath, storage_options=storage_options)

    get_or_create_dask_client()

    def f_many(iterator):
        with TiffSlide(slide_urlpath) as slide:
            return [(x.address, get_array_from_tile(x, slide=slide)) for x in iterator]

    chunks = db.from_sequence(
        tiles_df.itertuples(name="Tile"), partition_size=batch_size
    )

    ProgressBar().register()
    results = chunks.map_partitions(f_many)
    with h5py.File(output_h5_path, "w") as hfile:
        for result in results.compute():
            address, tile_arr = result
            hfile.create_dataset(address, data=tile_arr)

    return tiles_df

`cli(slide_urlpath='???', tiles_urlpath='???', batch_size=2000, output_urlpath='.', force=False, storage_options={}, output_storage_options={}, dask_options={}, local_config='')` ¶

Saves tiles to disk

Tiles addresses and arrays are saved as key-value pairs in (tiles.h5), and the corresponding manifest/header file (tiles.parquet) is also generated

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	`'???'`
`tiles_urlpath`	`str`	url/path to tile manifest (.parquet)	`'???'`
`batch_size`	`int`	size in batch dimension to chuck jobs	`2000`
`output_urlpath`	`str`	output url/path prefix	`'.'`
`force`	`bool`	overwrite outputs if they exist	`False`
`storage_options`	`dict`	storage options to reading functions	`{}`
`output_storage_options`	`dict`	storage options to writing functions	`{}`
`dask_options`	`dict`	dask options	`{}`
`local_config`	`str`	url/path to local config yaml file	`''`

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/save_tiles.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    tiles_urlpath: str = "???",
    batch_size: int = 2000,
    output_urlpath: str = ".",
    force: bool = False,
    storage_options: dict = {},
    output_storage_options: dict = {},
    dask_options: dict = {},
    local_config: str = "",
):
    """Saves tiles to disk

    Tiles addresses and arrays are saved as key-value pairs in (tiles.h5),
    and the corresponding manifest/header file (tiles.parquet) is also generated

    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        tiles_urlpath (str): url/path to tile manifest (.parquet)
        batch_size (int): size in batch dimension to chuck jobs
        output_urlpath (str): output url/path prefix
        force (bool): overwrite outputs if they exist
        storage_options (dict): storage options to reading functions
        output_storage_options (dict): storage options to writing functions
        dask_options (dict): dask options
        local_config (str): url/path to local config yaml file

    Returns:
        dict: metadata about function call
    """
    config = get_config(vars())

    configure_dask_client(**config["dask_options"])

    properties = _save_tiles(
        config["tiles_urlpath"],
        config["slide_urlpath"],
        config["output_urlpath"],
        config["force"],
        config["batch_size"],
        config["storage_options"],
        config["output_storage_options"],
    )

    return properties

`save_tiles(slide_manifest, output_urlpath, force=True, batch_size=2000, storage_options={}, output_storage_options={})` ¶

Saves tiles to disk

Tiles addresses and arrays are saved as key-value pairs in (tiles.h5), and the corresponding manifest/header file (tiles.parquet) is also generated

Parameters:

Name	Type	Description	Default
`slide_manifest`	`DataFrame[SlideSchema]`	slide manifest from slide_etl	required
`output_urlpath`	`str`	output url/path prefix	required
`force`	`bool`	overwrite outputs if they exist	`True`
`batch_size`	`int`	size in batch dimension to chuck jobs	`2000`
`storage_options`	`dict`	storage options to reading functions	`{}`
`output_storage_options`	`dict`	storage options to writing functions	`{}`

Returns:

Type	Description
`DataFrame[SlideSchema]`	DataFrame[SlideSchema]: slide manifest

Source code in src/luna/pathology/cli/save_tiles.py

def save_tiles(
    slide_manifest: DataFrame[SlideSchema],
    output_urlpath: str,
    force: bool = True,
    batch_size: int = 2000,
    storage_options: dict = {},
    output_storage_options: dict = {},
) -> DataFrame[SlideSchema]:
    """Saves tiles to disk

    Tiles addresses and arrays are saved as key-value pairs in (tiles.h5),
    and the corresponding manifest/header file (tiles.parquet) is also generated

    Args:
        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
        output_urlpath (str): output url/path prefix
        force (bool): overwrite outputs if they exist
        batch_size (int): size in batch dimension to chuck jobs
        storage_options (dict): storage options to reading functions
        output_storage_options (dict): storage options to writing functions

    Returns:
        DataFrame[SlideSchema]: slide manifest
    """
    client = get_or_create_dask_client()

    if "tiles_url" not in slide_manifest.columns:
        raise ValueError("Generate tiles first")

    output_filesystem, output_path_prefix = fsspec.core.url_to_fs(
        output_urlpath, **output_storage_options
    )

    if not output_filesystem.exists(output_urlpath):
        output_filesystem.mkdir(output_urlpath)

    futures = []
    for slide in slide_manifest.itertuples(name="Slide"):
        future = client.submit(
            _save_tiles,
            slide.tiles_url,
            slide.url,
            output_urlpath,
            force,
            batch_size,
            storage_options,
            output_storage_options,
        )
        futures.append(future)

    results = client.gather(futures)
    return slide_manifest.assign(tiles_url=[x["tiles_url"] for x in results])

`slide_etl` ¶

`SlideBuilder` ¶

Source code in src/luna/pathology/cli/slide_etl.py

class SlideBuilder:
    def __init__(self, storage_options: dict = {}, output_storage_options: dict = {}):
        self.storage_options = storage_options
        self.output_storage_options = output_storage_options

    def __generate_properties(self, slide, url):
        with open(url, **self.storage_options) as f:
            s = TiffSlide(f)
            slide.properties = s.properties
            try:
                to_mag_scale_factor = get_scale_factor_at_magnification(
                    s, requested_magnification=1
                )
                sample_arr = get_downscaled_thumbnail(s, to_mag_scale_factor)
                stain_vectors = get_stain_vectors_macenko(sample_arr)
                slide.channel0_R = stain_vectors[0, 0]
                slide.channel0_G = stain_vectors[0, 1]
                slide.channel0_B = stain_vectors[0, 2]
                slide.channel1_R = stain_vectors[1, 0]
                slide.channel1_G = stain_vectors[1, 1]
                slide.channel1_B = stain_vectors[1, 2]
            except Exception as err:
                logger.warning(f"Couldn't get stain vectors: {url} - {err}")

    def copy_slide(self, slide, output_urlpath, chunksize=50000000):
        new_slide = slide.copy()
        name = Path(slide.url).name
        fs, output_path = fsspec.core.url_to_fs(
            output_urlpath, **self.output_storage_options
        )
        p = Path(output_path) / name
        with open(slide.url, "rb", **self.storage_options) as f1:
            with fs.open(p, "wb") as f2:
                while True:
                    data = f1.read(chunksize)
                    if not data:
                        break
                    f2.write(data)
        new_slide.url = fs.unstrip_protocol(str(p))
        return new_slide

    def get_slide(self, url, project_name="", comment="") -> Slide:
        """Extract openslide properties and write slide to storage location

        Args:
            path (string): path to slide image

        Returns:
            slide (Slide): slide object
        """

        fs, path = fsspec.core.url_to_fs(url, **self.storage_options)

        id = Path(path).stem
        size = fs.du(path)
        slide = Slide(
            id=id,
            project_name=project_name,
            comment=comment,
            slide_size=size,
            url=url,
            uuid=str(uuid.uuid3(uuid.NAMESPACE_URL, url)),
        )

        self.__generate_properties(slide, url)

        return slide

`get_slide(url, project_name='', comment='')` ¶

Extract openslide properties and write slide to storage location

Parameters:

Name	Type	Description	Default
`path`	`string`	path to slide image	required

Returns:

Name	Type	Description
`slide`	`Slide`	slide object

Source code in src/luna/pathology/cli/slide_etl.py

def get_slide(self, url, project_name="", comment="") -> Slide:
    """Extract openslide properties and write slide to storage location

    Args:
        path (string): path to slide image

    Returns:
        slide (Slide): slide object
    """

    fs, path = fsspec.core.url_to_fs(url, **self.storage_options)

    id = Path(path).stem
    size = fs.du(path)
    slide = Slide(
        id=id,
        project_name=project_name,
        comment=comment,
        slide_size=size,
        url=url,
        uuid=str(uuid.uuid3(uuid.NAMESPACE_URL, url)),
    )

    self.__generate_properties(slide, url)

    return slide

`cli(slide_urlpath='???', project_name='', comment='', subset_csv_urlpath='', debug_limit=0, output_urlpath='', storage_options={}, output_storage_options={}, local_config='', no_copy=False, metadata_extension='parquet')` ¶

Ingest slide by adding them to a file or s3 based storage location and generating metadata about them

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	path to slide image	`'???'`
`project_name`	`str`	project name underwhich the slides should reside	`''`
`comment`	`str`	comment and description of dataset	`''`
`subset_csv_urlpath`	`str`	url/path to subset csv	`''`
`debug_limit`	`int`	limit number of slides	`0`
`output_urlpath`	`str`	url/path to output table	`''`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`
`local_config`	`str`	url/path to YAML config file	`''`
`no_copy`	`bool`	determines whether we copy slides to output_urlpath	`False`
`metadata_extension(str)`		file extension of generated metadata file (either 'csv' or 'parquet')	required

Source code in src/luna/pathology/cli/slide_etl.py

@timed
def cli(
    slide_urlpath: str = "???",
    project_name: str = "",
    comment: str = "",
    subset_csv_urlpath: str = "",
    debug_limit: int = 0,
    output_urlpath: str = "",
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
    no_copy: bool = False,
    metadata_extension: str = "parquet",
):
    """Ingest slide by adding them to a file or s3 based storage location and generating metadata about them


    Args:
        slide_urlpath (str): path to slide image
        project_name (str): project name underwhich the slides should reside
        comment (str): comment and description of dataset
        subset_csv_urlpath (str): url/path to subset csv
        debug_limit (int): limit number of slides
        output_urlpath (str): url/path to output table
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): url/path to YAML config file
        no_copy (bool): determines whether we copy slides to output_urlpath
        metadata_extension(str): file extension of generated metadata file (either 'csv' or 'parquet')
    """

    config = get_config(vars())
    filesystem, slide_path = fsspec.core.url_to_fs(
        config["slide_urlpath"], **config["storage_options"]
    )
    slide_paths = []  # type: list[str]
    if any([slide_path.endswith(ext) for ext in VALID_SLIDE_EXTENSIONS]):
        slide_paths += slide_path
    else:
        for ext in VALID_SLIDE_EXTENSIONS:
            slide_paths += filesystem.glob(f"{slide_path}/*{ext}")

    if config["metadata_extension"]:
        extension = config["metadata_extension"].lower().replace(".", "")

    if config["subset_csv_urlpath"]:
        slide_paths = apply_csv_filter(
            slide_paths, config["subset_csv_urlpath"], config["storage_options"]
        )
    if config["debug_limit"] > 0:
        slide_paths = slide_paths[: config["debug_limit"]]

    configure_dask_client()

    if len(slide_paths) == 0:
        return None

    slide_urls = [filesystem.unstrip_protocol(slide_path) for slide_path in slide_paths]

    df = slide_etl(
        slide_urls,
        config["project_name"],
        config["comment"],
        config["storage_options"],
        config["output_urlpath"],
        config["output_storage_options"],
        config["no_copy"],
    )

    logger.info(df)
    if config["output_urlpath"]:
        output_filesystem, output_path = fsspec.core.url_to_fs(
            config["output_urlpath"], **config["output_storage_options"]
        )

        f = Path(output_path) / f"slide_ingest_{config['project_name']}.{extension}"
        with output_filesystem.open(f, "wb") as of:
            if extension == "csv":
                logger.info("Writing to csv file")
                df.to_csv(of)
            elif extension == "parquet":
                logger.info("Writing to parquet file")
                df.to_parquet(of)

`slide_etl(slide_urls, project_name, comment='', storage_options={}, output_urlpath='', output_storage_options={}, no_copy=False)` ¶

Ingest slides by adding them to a file or s3 based storage location and generating metadata about them

Parameters:

Name	Type	Description	Default
`slide_urls`	`Union[str, List[str]`	path to slide image(s)	required
`project_name`	`str`	project name underwhich the slides should reside	required
`comment`	`str`	comment and description of dataset	`''`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_urlpath`	`str`	url/path to output table	`''`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`
`no_copy`	`bool`	do not copy slides to output path	`False`

Returns:

Type	Description
`DataFrame`	DataFrame[SlideSchema]: dataframe containing the metadata of all the slides

Source code in src/luna/pathology/cli/slide_etl.py

def slide_etl(
    slide_urls: Union[str, List[str]],
    project_name: str,
    comment: str = "",
    storage_options: dict = {},
    output_urlpath: str = "",
    output_storage_options: dict = {},
    no_copy: bool = False,
) -> DataFrame:
    """Ingest slides by adding them to a file or s3 based storage location and generating metadata about them

    Args:
        slide_urls (Union[str, List[str])): path to slide image(s)
        project_name (str): project name underwhich the slides should reside
        comment (str): comment and description of dataset
        storage_options (dict): storage options to pass to reading functions
        output_urlpath (str): url/path to output table
        output_storage_options (dict): storage options to pass to writing functions
        no_copy (bool): do not copy slides to output path


    Returns:
        DataFrame[SlideSchema]: dataframe containing the metadata of all the slides
    """
    sb = SlideBuilder(storage_options, output_storage_options=output_storage_options)
    if isinstance(slide_urls, str):
        return __slide_etl(
            sb, slide_urls, project_name, comment, output_urlpath, no_copy
        )

    client = get_or_create_dask_client()

    futures = [
        client.submit(
            __slide_etl,
            sb,
            slide_url,
            project_name,
            comment,
            output_urlpath,
            no_copy,
        )
        for slide_url in slide_urls
    ]
    progress(futures)
    dfs = client.gather(futures)
    return pd.concat(dfs)

`visualize_tile_labels_png` ¶

`cli(slide_urlpath='???', tiles_urlpath='', mpp_units=False, plot_labels='???', output_urlpath='.', requested_magnification=None, tile_size=None, storage_options={}, output_storage_options={}, local_config='')` ¶

Generate nice tile markup images with continuous or discrete tile scores

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	`'???'`
`tiles_urlpath`	`str`	url/path to a slide-tile manifest file (.tiles.csv)	`''`
`mpp_units`	`bool`	if true, additional rescaling is applied to match micro-meter and pixel coordinate systems	`False`
`plot_labels`	`List[str]`	labels to plot	`'???'`
`output_urlpath`	`str`	output url/path prefix	`'.'`
`requested_magnification`	`int`	Magnification scale at which to perform computation	`None`
`tile_size`	`int`	tile size	`None`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`
`output_storage_options`	`dict`	storage options to pass to writing functions	`{}`
`local_config`	`str`	url/path to local config YAML file	`''`

Returns:

Name	Type	Description
`dict`		metadata about function call

Source code in src/luna/pathology/cli/visualize_tile_labels_png.py

@timed
@save_metadata
def cli(
    slide_urlpath: str = "???",
    tiles_urlpath: str = "",
    mpp_units: bool = False,
    plot_labels: List[str] = "???",  # type: ignore
    output_urlpath: str = ".",
    requested_magnification: Optional[int] = None,
    tile_size: Optional[int] = None,
    storage_options: dict = {},
    output_storage_options: dict = {},
    local_config: str = "",
):
    """Generate nice tile markup images with continuous or discrete tile scores

    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        tiles_urlpath (str): url/path to a slide-tile manifest file (.tiles.csv)
        mpp_units (bool): if true, additional rescaling is applied to match micro-meter and pixel coordinate systems
        plot_labels (List[str]): labels to plot
        output_urlpath (str): output url/path prefix
        requested_magnification (int): Magnification scale at which to perform computation
        tile_size (int): tile size
        storage_options (dict): storage options to pass to reading functions
        output_storage_options (dict): storage options to pass to writing functions
        local_config (str): url/path to local config YAML file

    Returns:
        dict: metadata about function call
    """
    config = get_config(vars())

    if not config["tile_size"] and not config["tiles_urlpath"]:
        raise fire.core.FireError("Specify either tiles_urlpath or tile_size")

    thumbnails_overlayed = visualize_tiles(
        config["slide_urlpath"],
        config["tiles_urlpath"],
        config["mpp_units"],
        config["plot_labels"],
        config["requested_magnification"],
        config["tile_size"],
        config["storage_options"],
    )

    fs, output_path_prefix = fsspec.core.url_to_fs(
        config["output_urlpath"], **config["output_storage_options"]
    )

    images = {}
    for score_type, thumbnail_overlayed in thumbnails_overlayed.items():
        output_file = (
            Path(output_path_prefix)
            / f"tile_scores_and_labels_visualization_{score_type}.png"
        )
        thumbnail_overlayed = Image.fromarray(thumbnail_overlayed)
        with fs.open(output_file, "wb") as of:
            thumbnail_overlayed.save(of, format="PNG")
        images[score_type] = str(output_file)
        logger.info(f"Saved {score_type} visualization at {output_file}")

    properties = {
        "data": fs.unstrip_protocol(output_path_prefix),
        "images": images,
    }

    return properties

`visualize_tiles(slide_urlpath, tiles_urlpath, mpp_units, plot_labels, requested_magnification=None, tile_size=None, storage_options={})` ¶

Generate nice tile markup images with continuous or discrete tile scores

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)	required
`tiles_urlpath`	`str`	url/path to a slide-tile manifest file (.tiles.csv)	required
`mpp_units`	`bool`	if true, additional rescaling is applied to match micro-meter and pixel coordinate systems	required
`plot_labels`	`List[str]`	labels to plot	required
`requested_magnification`	`int`	Magnification scale at which to perform computation	`None`
`tile_size`	`int`	tile size	`None`
`storage_options`	`dict`	storage options to pass to reading functions	`{}`

Returns:

Type	Description
	dict[str,np.ndarray]: score type to numpy array representation of overlayed thumbnail

Source code in src/luna/pathology/cli/visualize_tile_labels_png.py

def visualize_tiles(
    slide_urlpath: str,
    tiles_urlpath: str,
    mpp_units: bool,
    plot_labels: List[str],
    requested_magnification: Optional[int] = None,
    tile_size: Optional[int] = None,
    storage_options: dict = {},
):
    """Generate nice tile markup images with continuous or discrete tile scores

    Args:
        slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
        tiles_urlpath (str): url/path to a slide-tile manifest file (.tiles.csv)
        mpp_units (bool): if true, additional rescaling is applied to match micro-meter and pixel coordinate systems
        plot_labels (List[str]): labels to plot
        requested_magnification (int): Magnification scale at which to perform computation
        tile_size (int): tile size
        storage_options (dict): storage options to pass to reading functions

    Returns:
        dict[str,np.ndarray]: score type to numpy array representation of overlayed thumbnail
    """
    if type(plot_labels) == str:
        plot_labels = [plot_labels]

    # Get tiles
    if tiles_urlpath:
        with open(tiles_urlpath, **storage_options) as of:
            df = pd.read_parquet(of).reset_index().set_index("address")
    elif type(tile_size) == int:
        df = generate_tiles(
            slide_urlpath, tile_size, storage_options, requested_magnification
        )
    else:
        raise RuntimeError("Specify tile size or url/path to tiling data")

    with open(slide_urlpath, **storage_options) as of:
        slide = tiffslide.TiffSlide(of)

        to_mag_scale_factor = get_scale_factor_at_magnification(
            slide, requested_magnification=requested_magnification
        )

        # Create thumbnail image for scoring
        sample_arr = get_downscaled_thumbnail(slide, to_mag_scale_factor)

        # See if we need to adjust scale_factor to account for different units
        if mpp_units:
            unit_sf = 0.0
            for mpp_key in ("aperio.MPP", "openslide.mpp-x"):
                if mpp_key in slide.properties:
                    unit_sf = float(slide.properties[mpp_key])
            if unit_sf:
                to_mag_scale_factor *= unit_sf
            else:
                logger.warning(
                    "No MPP scale factor was recognized in slide properties."
                )

    # only visualize tile scores that were able to be computed
    all_score_types = set(plot_labels)
    score_types_to_visualize = set(list(df.columns)).intersection(all_score_types)

    thumbnails_overlayed = {}  # type: Dict[str,np.ndarray]
    for score_type in score_types_to_visualize:
        thumbnails_overlayed[score_type] = visualize_tiling_scores(
            df, sample_arr, to_mag_scale_factor, score_type
        )

    return thumbnails_overlayed

`common` ¶

`annotation_utils` ¶

`check_slideviewer_and_download_bmp(sv_project_id, slideviewer_path, slide_id, users, SLIDE_BMP_DIR, SLIDEVIEWER_API_URL, TMP_ZIP_DIR)` ¶

download bitmap annotation from slideviwer

Parameters:

Name	Type	Description	Default
`sv_project_id`	`str`	slideviewer project id	required
`slideviewer_path`	`str`	filepath to the input slide	required
`slide_id`	`str`	slide id	required
`users`	`List[str]`	list of users who provided annotations	required
`SLIDE_BMP_DIR`	`str`	output folder to save bitmap to	required
`SLIDEVIEWER_API_URL`	`str`	API url for slide viewer	required

Returns:

Type	Description
`Union[None, List]`	Union[None, List]: returns none if there are no annotations to process, or returns a list containing output parameters

Source code in src/luna/pathology/common/annotation_utils.py

def check_slideviewer_and_download_bmp(
    sv_project_id: str,
    slideviewer_path: str,
    slide_id: str,
    users: List,
    SLIDE_BMP_DIR: str,
    SLIDEVIEWER_API_URL: str,
    TMP_ZIP_DIR: str,
) -> Union[None, List]:
    """download bitmap annotation from slideviwer

    Args:
        sv_project_id (str): slideviewer project id
        slideviewer_path (str): filepath to the input slide
        slide_id (str): slide id
        users (List[str]): list of users who provided annotations
        SLIDE_BMP_DIR (str): output folder to save bitmap to
        SLIDEVIEWER_API_URL (str): API url for slide viewer
        TMP_ZIP_DIR (str) temporary directory to save ziped bitmap files to

    Returns:
        Union[None, List]: returns none if there are no annotations to process, or
            returns a list containing output parameters
    """
    slide_id = str(slide_id)

    outputs = []
    output_dict_base = {
        "sv_project_id": sv_project_id,
        "slideviewer_path": slideviewer_path,
        "slide_id": slide_id,
        "user": "n/a",
        "bmp_filepath": "n/a",
        "npy_filepath": "n/a",
        "geojson": "n/a",
        "geojson_path": "n/a",
        "date": datetime.now(),
    }
    outputs.append(output_dict_base)

    for user in users:
        # download bitmap
        bmp_record_uuid, bmp_filepath = get_slide_bitmap(
            slideviewer_path,
            user,
            slide_id,
            SLIDE_BMP_DIR,
            SLIDEVIEWER_API_URL,
            TMP_ZIP_DIR,
            sv_project_id,
        )
        # convert to npy
        if bmp_record_uuid != "n/a" or bmp_filepath != "n/a":

            output_dict = copy.deepcopy(output_dict_base)
            output_dict["user"] = user
            output_dict["bmp_filepath"] = bmp_filepath
            outputs.append(output_dict)
    # at this point if outputs is empty, return early
    if len(outputs) <= 1:
        return None
    else:
        return outputs

`convert_bmp_to_npy(bmp_file, output_folder)` ¶

convert bitmap to numpy

Reads a bmp file and creates friendly numpy ndarray file in the uint8 format in the output directory specified, with extention .annot.npy

Troubleshooting

Make sure Pillow is upgraded to version 8.0.0 if getting an Unsupported BMP Size OS Error

Parameters:

Name	Type	Description	Default
`bmp_file`	`str`	path to .bmp image	required
`output_folder`	`str`	path to output folder	required

Returns str: filepath to file containing numpy array

Source code in src/luna/pathology/common/annotation_utils.py

def convert_bmp_to_npy(bmp_file: str, output_folder: str) -> str:
    """convert bitmap to numpy

    Reads a bmp file and creates friendly numpy ndarray file in the uint8 format in the output
    directory specified, with extention .annot.npy

    Troubleshooting:
        Make sure Pillow is upgraded to version 8.0.0 if getting an Unsupported BMP Size OS Error

    Args:
        bmp_file (str): path to .bmp image
        output_folder (str): path to output folder

    Returns
        str: filepath to file containing numpy array
    """
    Image.MAX_IMAGE_PIXELS = None

    if ".bmp" not in bmp_file:
        return ""

    new_image_name = os.path.basename(bmp_file).replace(".bmp", ".npy")
    bmp_caseid_folder = os.path.basename(os.path.dirname(bmp_file))
    output_caseid_folder = os.path.join(output_folder, bmp_caseid_folder)

    if not os.path.exists(output_caseid_folder):
        os.makedirs(output_caseid_folder)

    output_filepath = os.path.join(output_caseid_folder, new_image_name)

    np.save(output_filepath, np.array(Image.open(bmp_file)))
    return output_filepath

`convert_slide_bitmap_to_geojson(outputs, all_labelsets, contour_level, SLIDE_NPY_DIR, slide_store_dir)` ¶

convert slide bitmap to geoJSON

Parameters:

Name	Type	Description	Default
`outputs`	`List[dict]`	list of output parameter dict	required
`all_labelsets`	`List[dict]`	a list of dictionaries containing label sets	required
`contour_level`	`float`	value along which to find contours	required
`SLIDE_NPY_DIR`	`str`	directory containing the slide saved as a .npy	required
`slide_store_dir`	`str`	directory of the datastore	required

Returns:

Type	Description
`Tuple[str, List]`	Tuple[str, List]: a pair of slide id and output geojson tables

Source code in src/luna/pathology/common/annotation_utils.py

def convert_slide_bitmap_to_geojson(
    outputs,
    all_labelsets: List[dict],
    contour_level: float,
    SLIDE_NPY_DIR: str,
    slide_store_dir: str,
) -> Tuple[str, List]:
    """convert slide bitmap to geoJSON

    Args:
        outputs (List[dict]): list of output parameter dict
        all_labelsets (List[dict]): a list of dictionaries containing label sets
        contour_level (float): value along which to find contours
        SLIDE_NPY_DIR (str): directory containing the slide saved as a .npy
        slide_store_dir (str): directory of the datastore

    Returns:
        Tuple[str, List]: a pair of slide id and output geojson tables
    """

    import warnings

    warnings.warn("convert_slide_bitmap_to_geojson() is currently depreciated!")
    return None

`get_slide_bitmap(full_filename, user, slide_id, SLIDE_BMP_DIR, SLIDEVIEWER_API_URL, TMP_ZIP_DIR, sv_project_id)` ¶

get slide bitmap

Parameters:

Name	Type	Description	Default
`full_filename`	`str`	filename of input slide	required
`user`	`str`	name of pathologist/annotater who labled the input slide	required
`SLIDE_BMP_DIR`	`str`	output folder to save bitmap to	required
`SLIDEVIEWER_API_URL`	`str`	API url for slide viewer	required
`sv_project_id`	`str`	slide viewer project id	required

Returns:

Type	Description
`Tuple[str, str]`	Tuple[str, str]: a tuple of the bitmap record uuid and filepath to saved bitmap

Source code in src/luna/pathology/common/annotation_utils.py

def get_slide_bitmap(
    full_filename: str,
    user: str,
    slide_id: str,
    SLIDE_BMP_DIR: str,
    SLIDEVIEWER_API_URL: str,
    TMP_ZIP_DIR: str,
    sv_project_id: str,
) -> Tuple[str, str]:
    """get slide bitmap

    Args:
        full_filename (str): filename of input slide
        user (str): name of pathologist/annotater who labled the input slide
        SLIDE_BMP_DIR (str): output folder to save bitmap to
        SLIDEVIEWER_API_URL (str): API url for slide viewer
        TMP_ZIP_DIR (str) temporary directory to save ziped bitmap files to
        sv_project_id (str): slide viewer project id

    Returns:
        Tuple[str, str]: a tuple of the bitmap record uuid and filepath to saved bitmap
    """

    full_filename_without_ext = full_filename.replace(".svs", "")

    bmp_dirname = os.path.join(
        SLIDE_BMP_DIR, full_filename_without_ext.replace(";", "_")
    )
    bmp_dest_path = os.path.join(bmp_dirname, str(slide_id) + "_" + user + "_annot.bmp")

    if os.path.exists(bmp_dest_path):
        logger.debug("Removing temporary file " + bmp_dest_path)
        os.remove(bmp_dest_path)

    # download bitmap file using api (from brush and fill tool), download zips into TMP_ZIP_DIR
    os.makedirs(TMP_ZIP_DIR, exist_ok=True)
    zipfile_path = os.path.join(
        TMP_ZIP_DIR, full_filename_without_ext + "_" + user + ".zip"
    )

    url = (
        SLIDEVIEWER_API_URL
        + "slides/"
        + str(user)
        + "@mskcc.org/projects;"
        + str(sv_project_id)
        + ";"
        + full_filename
        + "/getLabelFileBMP"
    )

    logger.debug(f"Pulling from Slideviewer URL={url}")

    success = download_zip(url, zipfile_path)

    bmp_record_uuid = "n/a"
    bmp_filepath = "n/a"

    if not success:
        os.remove(zipfile_path)
        return (bmp_record_uuid, bmp_filepath)

    unzipped_file_descriptor = unzip(zipfile_path)

    if unzipped_file_descriptor is None:
        return (bmp_record_uuid, bmp_filepath)

    # create bmp file from unzipped file
    os.makedirs(os.path.dirname(bmp_dest_path), exist_ok=True)
    with open(bmp_dest_path, "wb") as ff:
        ff.write(
            unzipped_file_descriptor.read("labels.bmp")
        )  # all bmps from slideviewer are called labels.bmp

    logger.info(
        "Added slide " + str(slide_id) + " to " + str(bmp_dest_path) + "  * * * * "
    )

    bmp_hash = FileHash("sha256").hash_file(bmp_dest_path)
    bmp_record_uuid = f"SVBMP-{bmp_hash}"
    bmp_filepath = (
        bmp_dirname + "/" + slide_id + "_" + user + "_" + bmp_record_uuid + "_annot.bmp"
    )
    os.rename(bmp_dest_path, bmp_filepath)

    # cleanup
    if os.path.exists(zipfile_path):
        os.remove(zipfile_path)

    return (bmp_record_uuid, bmp_filepath)

`build_geojson` ¶

`add_contours_for_label(annotation_geojson, annotation, label_num, mappings, contour_level)` ¶

creates geoJSON feature dictionary for labels

Finds the contours for a label mask, builds a polygon and then converts the polygon to geoJSON feature dictionary

Parameters:

Name	Type	Description	Default
`annotation_geojson`	`dict[str, any]`	geoJSON result to populate	required
`annotation`	`ndarray`	npy array of bitmap	required
`label_num`	`int`	the integer cooresponding to the annotated label	required
`mappings`	`dict`	label map for specified label set	required
`contour_level`	`float`	value along which to find contours in the array	required

Returns:

Type	Description
`Dict[str, any]`	dict[str, any]: geoJSON with label countours

Source code in src/luna/pathology/common/build_geojson.py

def add_contours_for_label(
    annotation_geojson: Dict[str, any],
    annotation: np.ndarray,
    label_num: int,
    mappings: dict,
    contour_level: float,
) -> Dict[str, any]:
    """creates geoJSON feature dictionary for labels

    Finds the contours for a label mask, builds a polygon and then converts the polygon
    to geoJSON feature dictionary

    Args:
        annotation_geojson (dict[str, any]): geoJSON result to populate
        annotation (np.ndarray): npy array of bitmap
        label_num (int): the integer cooresponding to the annotated label
        mappings (dict): label map for specified label set
        contour_level (float): value along which to find contours in the array

    Returns:
         dict[str, any]: geoJSON with label countours
    """

    if label_num in annotation:
        print("Building contours for label " + str(label_num))

        num_pixels = np.count_nonzero(annotation == label_num)
        print("num_pixels with label", num_pixels)

        mask = np.where(annotation == label_num, 1, 0).astype(np.int8)
        contours = measure.find_contours(mask, level=contour_level)
        print("num_contours", len(contours))

        polygons = [Polygon(np.squeeze(c)) for c in contours]
        parent_nums = find_parents(polygons)

        polygon_by_index_number = {}

        for index, parent in enumerate(parent_nums):
            contour = contours[index]
            contour_list = contour.tolist()

            # switch coordinates, otherwise gets flipped
            for coord in contour_list:
                x = int(coord[0])
                y = int(coord[1])
                coord[0] = y
                coord[1] = x

            # this polygon does not have parent, so this is a parent object (top level)
            if parent == -1:
                polygon = {
                    "type": "Feature",
                    "properties": {},
                    "geometry": {"type": "Polygon", "coordinates": []},
                }
                polygon["properties"]["label_num"] = int(label_num)
                polygon["properties"]["label_name"] = mappings[label_num]
                polygon["geometry"]["coordinates"].append(contour_list)
                polygon_by_index_number[index] = polygon
            else:
                # this is a child object, add coordinates as a hole to the parent polygon

                # fetch parent's polygon
                parent_polygon = polygon_by_index_number[parent]

                # append as hole to parent
                parent_polygon["geometry"]["coordinates"].append(contour_list)

        # add parent polygon feature dicts to running annotation geojson object
        for index, polygon in polygon_by_index_number.items():
            annotation_geojson["features"].append(polygon)

    else:
        print("No label " + str(label_num) + " found")

    return annotation_geojson

`build_all_geojsons_from_default(default_annotation_geojson, all_labelsets, contour_level)` ¶

builds geoJSON objects from a set of labels

wraps build_labelset_specific_geojson with logic to generate annotations from multiple labelsets

Parameters:

Name	Type	Description	Default
`default_annotation_geojson`	`dict[str, any]`	input geoJSON	required
`all_labelsets`	`list[dict]`	a list of dictionaries containing label sets	required
`contour_level`	`float`	value along which to find contours	required

Returns:

Name	Type	Description
`dict`	`dict`	a dictionary with labelset name and cooresponding geoJSON as key, value
	`dict`	pairs

Source code in src/luna/pathology/common/build_geojson.py

def build_all_geojsons_from_default(
    default_annotation_geojson: Dict[str, any],
    all_labelsets: List[dict],
    contour_level: float,
) -> dict:
    """builds geoJSON objects from a set of labels

    wraps build_labelset_specific_geojson with logic to generate annotations
    from multiple labelsets

    Args:
        default_annotation_geojson (dict[str, any]): input geoJSON
        all_labelsets (list[dict]): a list of dictionaries containing label sets
        contour_level (float):  value along which to find contours

    Returns:
        dict: a dictionary with labelset name and cooresponding geoJSON as key, value
        pairs

    """

    labelset_name_to_labelset_specific_geojson = {}

    for labelset_name, labelset in all_labelsets.items():
        if labelset_name != DEFAULT_LABELSET_NAME:
            # use default labelset geojson to build labelset specific geojson
            annotation_geojson = build_labelset_specific_geojson(
                default_annotation_geojson, labelset
            )
        else:
            annotation_geojson = default_annotation_geojson

        # only add if geojson not none (built correctly and contains >= 1 polygon)
        if annotation_geojson:
            labelset_name_to_labelset_specific_geojson[labelset_name] = json.dumps(
                annotation_geojson
            )

    return labelset_name_to_labelset_specific_geojson

`build_default_geojson_from_annotation(annotation_npy_filepath, all_labelsets, contour_level)` ¶

builds geoJSONS from numpy annotation with default label set

Parameters:

Name	Type	Description	Default
`annotation_npy_filepath`	`str`	string to numpy annotation	required
`all_labelsets`	`dict`	a dictionary of label sets	required
`contour_level`	`float`	value along which to find contours	required

Returns:

Type	Description
	dict[str, any]: the default geoJSON annotation

Source code in src/luna/pathology/common/build_geojson.py

def build_default_geojson_from_annotation(
    annotation_npy_filepath: str, all_labelsets: dict, contour_level: float
):
    """builds geoJSONS from numpy annotation with default label set

    Args:
        annotation_npy_filepath (str): string to numpy annotation
        all_labelsets (dict): a dictionary of label sets
        contour_level (float):  value along which to find contours

    Returns:
        dict[str, any]: the default geoJSON annotation
    """

    annotation = np.load(annotation_npy_filepath)
    default_annotation_geojson = copy.deepcopy(geojson_base)

    # signal logic doesn't work in dask distributed setup

    default_labelset = all_labelsets[DEFAULT_LABELSET_NAME]

    if not (annotation > 0).any():
        print(
            f"No annotated pixels detected in bitmap loaded from {annotation_npy_filepath}"
        )
        return None

    # vectorize all
    for label_num in default_labelset:
        default_annotation_geojson = add_contours_for_label(
            default_annotation_geojson,
            annotation,
            label_num,
            default_labelset,
            float(contour_level),
        )

    # empty geojson created, return nan and delete from geojson table
    if len(default_annotation_geojson["features"]) == 0:
        print(
            f"Something went wrong with building default geojson from {annotation_npy_filepath}, quitting"
        )
        return None

    return default_annotation_geojson

`build_geojson_from_annotation(df)` ¶

Builds geoJSON for all annotation labels in the specified labelset.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	input regional annotation table	required

Returns:

Name	Type	Description
`pandasDataFrame`	`DataFrame`	dataframe with geoJSON field poopulated

Source code in src/luna/pathology/common/build_geojson.py

def build_geojson_from_annotation(df: pd.DataFrame) -> pd.DataFrame:
    """Builds geoJSON for all annotation labels in the specified labelset.

    Args:
        df (pandas.DataFrame): input regional annotation table

    Returns:
        pandasDataFrame: dataframe with geoJSON field poopulated
    """

    labelsets = df.label_config.values[0]
    annotation_npy_filepath = df.npy_filepath.values[0]
    labelset = df.labelset.values[0]
    contour_level = df.contour_level.values[0]

    labelsets = ast.literal_eval(labelsets)
    mappings = labelsets[labelset]

    print("\nBuilding GeoJSON annotation from npy file:", annotation_npy_filepath)

    annotation = np.load(annotation_npy_filepath)
    annotation_geojson = copy.deepcopy(geojson_base)

    signal.signal(signal.SIGALRM, handler)
    signal.alarm(TIMEOUT_SECONDS)

    try:
        for label_num in mappings:
            annotation_geojson = add_contours_for_label(
                annotation_geojson,
                annotation,
                label_num,
                mappings,
                float(contour_level),
            )
    except TimeoutError as err:
        print(
            "Timeout Error occured while building geojson from slide",
            annotation_npy_filepath,
        )
        raise err

    # disables alarm
    signal.alarm(0)

    # empty geojson created, return nan and delete from geojson table
    if len(annotation_geojson["features"]) == 0:
        return df

    df["geojson"] = json.dumps(annotation_geojson)
    return df

`build_geojson_from_pointclick_json(labelsets, labelset, sv_json)` ¶

Build geoJSON m slideviewer JSON

This method extracts point annotations from a slideviwer json object and converts them to a standardized geoJSON format

Parameters:

Name	Type	Description	Default
`labelsets`	`dict`	dictionary of label set as string (e.g. {labelset: {label_number: label_name}})	required
`labelset`	`str`	the name of the labelset e.g. default_labels	required
`sv_json`	`list[dict]`	annotatations from slideviwer in the form of a list of dictionaries	required

Returns:

Name	Type	Description
`list`	`list`	a list of geoJSON annotation objects

Source code in src/luna/pathology/common/build_geojson.py

def build_geojson_from_pointclick_json(
    labelsets: dict, labelset: str, sv_json: List[dict]
) -> list:
    """Build geoJSON m slideviewer JSON

    This method extracts point annotations from a slideviwer json object and
    converts them to a standardized geoJSON format

    Args:
        labelsets (dict): dictionary of label set as string (e.g. {labelset:
            {label_number: label_name}})
        labelset (str): the name of the labelset e.g. default_labels
        sv_json (list[dict]): annotatations from slideviwer in the form of a list of dictionaries

    Returns:
        list: a list of geoJSON annotation objects
    """

    labelsets = ast.literal_eval(labelsets)
    mappings = labelsets[labelset]

    output_geojson = []
    for entry in sv_json:
        point = {}
        x = int(entry["x"])
        y = int(entry["y"])
        class_num = int(entry["class"])
        if class_num not in mappings:
            continue
        class_name = mappings[class_num]
        coordinates = [x, y]

        point["type"] = "Feature"
        point["id"] = "PathAnnotationObject"
        point["geometry"] = {"type": "Point", "coordinates": coordinates}
        point["properties"] = {"classification": {"name": class_name}}
        output_geojson.append(point)

    return output_geojson

`build_labelset_specific_geojson(default_annotation_geojson, labelset)` ¶

builds geoJSON for labelset

Instead of working with a large geJSON object, you can extact polygons that coorspond to specific labels into a smaller object.

Parameters:

Name	Type	Description	Default
`default_annotation_geojson`	`dict[str, any]`	geoJSON annotation file	required
`labelset`	`dict`	label set dictionary	required

Returns:

Type	Description
`Dict[str, any]`	dict[str, any]: geoJSON with only polygons from provided labelset

Source code in src/luna/pathology/common/build_geojson.py

def build_labelset_specific_geojson(
    default_annotation_geojson: Dict[str, any], labelset: dict
) -> Dict[str, any]:
    """builds geoJSON for labelset

    Instead of working with a large geJSON object, you can extact polygons
    that coorspond to specific labels into a smaller object.

    Args:
        default_annotation_geojson (dict[str, any]):  geoJSON annotation file
        labelset (dict): label set dictionary

    Returns:
        dict[str, any]: geoJSON with only polygons from provided labelset
    """

    annotation_geojson = copy.deepcopy(geojson_base)

    for feature in default_annotation_geojson["features"]:

        # number is fixed
        label_num = feature["properties"]["label_num"]
        # add polygon to json, change name potentially needed
        if label_num in labelset:
            new_feature_polygon = copy.deepcopy(feature)

            # get new name and change
            new_label_name = labelset[label_num]
            new_feature_polygon["properties"]["label_name"] = new_label_name

            # add to annotation_geojson being built
            annotation_geojson["features"].append(new_feature_polygon)

    # no polygons containing labels in labelset
    if len(annotation_geojson["features"]) == 0:
        return None

    return annotation_geojson

`concatenate_regional_geojsons(geojson_list)` ¶

concatenate regional annotations

Concatenates geojsons if there are more than one annotations for the labelset.

Parameters:

Name	Type	Description	Default
`geojson_list`	`list[dict[str, any]]`	list of geoJSON strings	required

Returns:

Type	Description
`Dict[str, any]`	dict[str, any]: a single concatenated geoJSON

Source code in src/luna/pathology/common/build_geojson.py

def concatenate_regional_geojsons(geojson_list: List[Dict[str, any]]) -> Dict[str, any]:
    """concatenate regional annotations

    Concatenates geojsons if there are more than one annotations for the labelset.

    Args:
        geojson_list (list[dict[str, any]]): list of geoJSON strings

    Returns:
        dict[str, any]: a single concatenated geoJSON
    """
    # create json from str representations
    geojson_list = [json.loads(geojson) for geojson in geojson_list]

    concat_geojson = geojson_list[0]
    if len(geojson_list) == 1:
        return concat_geojson

    # create concatenated geojson
    for json_dict in geojson_list[1:]:
        print(f"Concatenating {len(geojson_list)} geojsons")
        concat_geojson["features"].extend(json_dict["features"])

    return concat_geojson

`find_parents(polygons)` ¶

determines of parent child relationships of polygons

Returns a list of size n (where n is the number of input polygons in the input list polygons) where the value at index n cooresponds to the nth polygon's parent. In the case of no parent, -1 is used. for example, parent_nums[0] = 2 means that polygon 0's parent is polygon 2

Parameters:

Name	Type	Description	Default
`polygons`	`list`	a list of shapely polygon objects	required

Returns:

Name	Type	Description
`list`	`list`	a list of parent-child relationships for the polygon objects

Source code in src/luna/pathology/common/build_geojson.py

def find_parents(polygons: list) -> list:
    """determines of parent child relationships of polygons

    Returns a list of size n (where n is the number of input polygons in the input list
    polygons) where the value at index n cooresponds to the nth polygon's parent. In
    the case of no parent, -1 is used. for example, parent_nums[0] = 2 means that
    polygon 0's parent is polygon 2

    Args:
        polygons (list): a list of shapely polygon objects

    Returns:
        list: a list of parent-child relationships for the polygon objects

    """
    parent_nums = []
    for child in polygons:
        found_parent = False
        for parent_idx, parent in enumerate(polygons):
            if child == parent:
                continue
            # found parent for child
            if parent.contains(child):
                parent_nums.append(parent_idx)
                found_parent = True
                break
        # finished looping through all potential parents, so child is a parent
        if not found_parent:
            parent_nums.append(-1)

    print(parent_nums)

    return parent_nums

`handler(signum, frame)` ¶

signal handler for geojson

Parameters:

Name	Type	Description	Default
`signum`	`str`	signal number	required
`fname`	`str`	filename for which exception occurred	required

Returns:

Type	Description
`None`	None

Source code in src/luna/pathology/common/build_geojson.py

def handler(signum: str, frame: str) -> None:
    """signal handler for geojson

    Args:
        signum (str): signal number
        fname (str): filename for which exception occurred

    Returns:
        None
    """

    raise TimeoutError("Geojson generation timed out.")

`deepzoom` ¶

`DeepZoomGenerator` ¶

Source code in src/luna/pathology/common/deepzoom.py

class DeepZoomGenerator:
    BOUNDS_OFFSET_PROPS = (
        tiffslide.PROPERTY_NAME_BOUNDS_X,
        tiffslide.PROPERTY_NAME_BOUNDS_Y,
    )
    BOUNDS_SIZE_PROPS = (
        tiffslide.PROPERTY_NAME_BOUNDS_WIDTH,
        tiffslide.PROPERTY_NAME_BOUNDS_HEIGHT,
    )

    def __init__(
        self,
        urlpath: Union[str, fsspec.core.OpenFile],
        tile_size: int = 254,
        overlap: int = 1,
        limit_bounds: bool = False,
        storage_options: dict = {},
    ) -> None:
        self._z_t_downsample = tile_size
        self._z_overlap = overlap
        self._limit_bounds = limit_bounds

        self._storage_options = storage_options
        if isinstance(urlpath, str):
            self._openfile = fsspec.open(urlpath, **storage_options)
        else:
            self._openfile = urlpath

        with self._openfile as f, tiffslide.TiffSlide(f) as tiff:
            if limit_bounds:
                # Level 0 coordinate offset
                self._l0_offset = tuple(
                    int(tiff.properties.get(prop, 0))
                    for prop in self.BOUNDS_OFFSET_PROPS
                )
                # Slide level dimensions scale factor in each axis
                size_scale = tuple(
                    int(tiff.properties.get(prop, l0_lim)) / l0_lim
                    for prop, l0_lim in zip(self.BOUNDS_SIZE_PROPS, tiff.dimensions)
                )
                # Dimensions of active area
                self._l_dimensions = tuple(
                    tuple(
                        int(math.ceil(l_lim * scale))
                        for l_lim, scale in zip(l_size, size_scale)
                    )
                    for l_size in tiff.level_dimensions
                )
            else:
                self._l_dimensions = tiff.level_dimensions
                self._l0_offset = (0, 0)
            self._l0_dimensions = self._l_dimensions[0]
            # Deep Zoom level
            z_size = self._l0_dimensions
            z_dimensions = [z_size]
            while z_size[0] > 1 or z_size[1] > 1:
                z_size = tuple(max(1, int(math.ceil(z / 2))) for z in z_size)
                z_dimensions.append(z_size)
            self._z_dimensions = tuple(reversed(z_dimensions))

            # Tile
            def tiles(z_lim):
                return int(math.ceil(z_lim / self._z_t_downsample))

            self._t_dimensions = tuple(
                (tiles(z_w), tiles(z_h)) for z_w, z_h in self._z_dimensions
            )

            # Deep Zoom level count
            self._dz_levels = len(self._z_dimensions)

            # Total downsamples for each Deep Zoom level
            l0_z_downsamples = tuple(
                2 ** (self._dz_levels - dz_level - 1)
                for dz_level in range(self._dz_levels)
            )

            # Preferred slide levels for each Deep Zoom level
            self._slide_from_dz_level = tuple(
                tiff.get_best_level_for_downsample(d) for d in l0_z_downsamples
            )

            # Piecewise downsamples
            self._l0_l_downsamples = tiff.level_downsamples
            self._l_z_downsamples = tuple(
                l0_z_downsamples[dz_level]
                / self._l0_l_downsamples[self._slide_from_dz_level[dz_level]]
                for dz_level in range(self._dz_levels)
            )

            # Slide background color
            bg_color = tiff.properties.get(tiffslide.PROPERTY_NAME_BACKGROUND_COLOR)
            if bg_color:
                self._bg_color = "#" + bg_color
            else:
                self._bg_color = "#ffffff"

    @property
    def level_count(self):
        """The number of Deep Zoom levels in the image."""
        return self._dz_levels

    @property
    def level_tiles(self):
        """A list of (tiles_x, tiles_y) tuples for each Deep Zoom level."""
        return self._t_dimensions

    @property
    def level_dimensions(self):
        """A list of (pixels_x, pixels_y) tuples for each Deep Zoom level."""
        return self._z_dimensions

    @property
    def tile_count(self):
        """The total number of Deep Zoom tiles in the image."""
        return sum(t_cols * t_rows for t_cols, t_rows in self._t_dimensions)

    def get_tile(self, level, address):
        """Return an RGB PIL.Image for a tile.

        level:     the Deep Zoom level.
        address:   the address of the tile within the level as a (col, row)
                   tuple."""

        # Read tile
        args, z_size = self._get_tile_info(level, address)
        with self._openfile as f, tiffslide.TiffSlide(f) as tiff:
            tile = tiff.read_region(*args)

            # Apply on solid background
            # bg = Image.new('RGB', tile.size, self._bg_color)
            # tile = Image.composite(tile, bg, tile)

            # Scale to the correct size
            if tile.size != z_size:
                # Image.Resampling added in Pillow 9.1.0
                # Image.LANCZOS removed in Pillow 10
                tile.thumbnail(z_size, getattr(Image, "Resampling", Image).LANCZOS)

            return tile

    def _get_tile_info(self, dz_level, t_location):
        # Check parameters
        if dz_level < 0 or dz_level >= self._dz_levels:
            raise ValueError("Invalid level")
        for t, t_lim in zip(t_location, self._t_dimensions[dz_level]):
            if t < 0 or t >= t_lim:
                raise ValueError(f"Invalid address: {dz_level}:{t_location}")

        # Get preferred slide level
        slide_level = self._slide_from_dz_level[dz_level]

        # Calculate top/left and bottom/right overlap
        z_overlap_tl = tuple(self._z_overlap * int(t != 0) for t in t_location)
        z_overlap_br = tuple(
            self._z_overlap * int(t != t_lim - 1)
            for t, t_lim in zip(t_location, self.level_tiles[dz_level])
        )

        # Get final size of the tile
        z_size = tuple(
            min(self._z_t_downsample, z_lim - self._z_t_downsample * t) + z_tl + z_br
            for t, z_lim, z_tl, z_br in zip(
                t_location, self._z_dimensions[dz_level], z_overlap_tl, z_overlap_br
            )
        )

        # Obtain the region coordinates
        z_location = [self._z_from_t(t) for t in t_location]
        l_location = [
            self._l_from_z(dz_level, z - z_tl)
            for z, z_tl in zip(z_location, z_overlap_tl)
        ]
        # Round location down and size up, and add offset of active area
        l0_location = tuple(
            int(self._l0_from_l(slide_level, loc) + l0_off)
            for loc, l0_off in zip(l_location, self._l0_offset)
        )
        l_size = tuple(
            int(min(math.ceil(self._l_from_z(dz_level, dz)), l_lim - math.ceil(loc)))
            for loc, dz, l_lim in zip(
                l_location, z_size, self._l_dimensions[slide_level]
            )
        )

        # Return read_region() parameters plus tile size for final scaling
        return ((l0_location, slide_level, l_size), z_size)

    def _l0_from_l(self, slide_level, loc):
        return self._l0_l_downsamples[slide_level] * loc

    def _l_from_z(self, dz_level, z):
        return self._l_z_downsamples[dz_level] * z

    def _z_from_t(self, t):
        return self._z_t_downsample * t

`level_count` `property` ¶

The number of Deep Zoom levels in the image.

`level_dimensions` `property` ¶

A list of (pixels_x, pixels_y) tuples for each Deep Zoom level.

`level_tiles` `property` ¶

A list of (tiles_x, tiles_y) tuples for each Deep Zoom level.

`tile_count` `property` ¶

The total number of Deep Zoom tiles in the image.

`get_tile(level, address)` ¶

Return an RGB PIL.Image for a tile.

level: the Deep Zoom level. address: the address of the tile within the level as a (col, row) tuple.

Source code in src/luna/pathology/common/deepzoom.py

def get_tile(self, level, address):
    """Return an RGB PIL.Image for a tile.

    level:     the Deep Zoom level.
    address:   the address of the tile within the level as a (col, row)
               tuple."""

    # Read tile
    args, z_size = self._get_tile_info(level, address)
    with self._openfile as f, tiffslide.TiffSlide(f) as tiff:
        tile = tiff.read_region(*args)

        # Apply on solid background
        # bg = Image.new('RGB', tile.size, self._bg_color)
        # tile = Image.composite(tile, bg, tile)

        # Scale to the correct size
        if tile.size != z_size:
            # Image.Resampling added in Pillow 9.1.0
            # Image.LANCZOS removed in Pillow 10
            tile.thumbnail(z_size, getattr(Image, "Resampling", Image).LANCZOS)

        return tile

`schemas` ¶

`SlideTiles` ¶

Source code in src/luna/pathology/common/schemas.py

class SlideTiles:
    REQ_COLUMNS = set(
        ["address", "x_coord", "y_coord", "xy_extent", "tile_size", "tile_units"]
    )

    @classmethod
    def check(self, slide_tiles):
        """Returns True if the given path is readable as "SlideTiles <slide_tiles>", else, reaises SchemaMismatchError"""
        df = pd.read_parquet(slide_tiles).reset_index()

        if not set(df.columns).intersection(self.REQ_COLUMNS) == self.REQ_COLUMNS:
            raise SchemaMismatchError(
                "SlideTile failed schema check: missing columns: ",
                (set(df.columns).intersection(self.REQ_COLUMNS)).symmetric_difference(
                    self.REQ_COLUMNS
                ),
            )

        return True

`check(slide_tiles)` `classmethod` ¶

Returns True if the given path is readable as "SlideTiles ", else, reaises SchemaMismatchError

Source code in src/luna/pathology/common/schemas.py

@classmethod
def check(self, slide_tiles):
    """Returns True if the given path is readable as "SlideTiles <slide_tiles>", else, reaises SchemaMismatchError"""
    df = pd.read_parquet(slide_tiles).reset_index()

    if not set(df.columns).intersection(self.REQ_COLUMNS) == self.REQ_COLUMNS:
        raise SchemaMismatchError(
            "SlideTile failed schema check: missing columns: ",
            (set(df.columns).intersection(self.REQ_COLUMNS)).symmetric_difference(
                self.REQ_COLUMNS
            ),
        )

    return True

`slideviewer_client` ¶

Created on January 31, 2021

@author: pashaa@mskcc.org

Functions for downloading annotations from SlideViewer

`download_sv_point_annotation(url)` ¶

download slideviwer point annotation

Calls slideviewer API with the given url

Parameters:

Name	Type	Description	Default
`url`	`str`	slide viewer api to call	required

Returns:

Type	Description
`Dict[str, any]`	dict[str, any]: json response

Source code in src/luna/pathology/common/slideviewer_client.py

def download_sv_point_annotation(url: str) -> Dict[str, any]:
    """download slideviwer point annotation

    Calls slideviewer API with the given url

    Args:
        url (str): slide viewer api to call

    Returns:
        dict[str, any]: json response
    """
    try:
        response = requests.get(url)
        data = response.json()
    except Exception:
        logger.exception("General exception raised while trying " + url)
        return None

    logger.info("Found data = " + str(data))
    if str(data) != "[]":
        return data
    else:
        logger.warning("Label annotation file does not exist for slide and user.")
        return None

`download_zip(url, dest_path, chunk_size=128)` ¶

Download zip file

Downloads zip from the specified URL and saves it to the specified file path. see https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url

Parameters:

Name	Type	Description	Default
`url`	`str`	slideviewer url to download zip from	required
`dest_path`	`str`	file path where zipfile should be saved	required
`chunk_size`	`int`	size in bytes of chunks to batch out during download	`128`

Returns:

Name	Type	Description
`bool`	`bool`	True if zipfile downloaded and saved successfully, else false

Source code in src/luna/pathology/common/slideviewer_client.py

def download_zip(url: str, dest_path: str, chunk_size: int = 128) -> bool:
    """Download zip file

    Downloads zip from the specified URL and saves it to the specified file path.
    see https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url

    Args:
        url (str): slideviewer url to download zip from
        dest_path (str): file path where zipfile should be saved
        chunk_size (int): size in bytes of chunks to batch out during download

    Returns:
        bool: True if zipfile downloaded and saved successfully, else false
    """

    response = requests.get(url, stream=True)
    with open(dest_path, "wb") as fd:
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk == b"Label image not found.":  # message from slideviewer
                return False
            else:
                fd.write(chunk)
        return True

`fetch_slide_ids(url, project_id, dest_dir, csv_file=None)` ¶

get slide ids

Fetch the list of slide ids from the slideviewer server for the project with the specified project id. Alternately, a slideviewer csv file may be provided to override download from server.

Parameters:

Name	Type	Description	Default
`url`	`str or None`	slideviewer url. url may be None if csv_file is specified.	required
`project_id`	`int`	slideviewer project id from which to fetch slide ids	required
`dest_dir`	`str`	directory where csv file should be downloaded	required
`csv_file`	`str`	slideviewer csv file may be provided to override the need	`None`

Returns:

Name	Type	Description
`list`	`list`	list of (slideviewer_path, slide_id, sv_project_id)

Source code in src/luna/pathology/common/slideviewer_client.py

def fetch_slide_ids(
    url: str, project_id: int, dest_dir: str, csv_file: str = None
) -> list:
    """get slide ids

    Fetch the list of slide ids from the slideviewer server for the project with the
    specified project id. Alternately, a slideviewer csv file may be provided to
    override download from server.

    Args:
        url (str or None): slideviewer url. url may be None if csv_file is specified.
        project_id (int): slideviewer project id from which to fetch slide ids
        dest_dir (str): directory where csv file should be downloaded
        csv_file (str): slideviewer csv file may be provided to override the need
        to download the file

    Returns:
        list:  list of (slideviewer_path, slide_id, sv_project_id)
    """

    # run on all slides from specified SLIDEVIEWER_CSV file.
    # if file is not specified, then download file using slideviewer API
    # download entire slide set using project id
    # the file is then written to the dest directory
    new_csv_file = os.path.join(dest_dir, "project_" + str(project_id) + ".csv")

    if csv_file is None or csv_file == "" or not os.path.exists(csv_file):

        url = url + "exportProjectCSV?pid={pid}".format(pid=str(project_id))
        res = requests.get(url)

        with open(new_csv_file, "wb") as slideoutfile:
            slideoutfile.write(res.content)

    else:
        # copy given csv_file to dest directory
        shutil.copy(csv_file, new_csv_file)

    # read slide ids
    slides = []
    with open(new_csv_file) as slideoutfile:
        # skip first 4 lines
        count = 0
        for line in slideoutfile:
            count += 1
            if count == 4:
                break

        # read whole slide image file names contained in the project in slide viewer
        for line in slideoutfile:
            full_filename = line.strip()
            slidename = get_slide_id(full_filename)
            slides.append([full_filename, slidename, project_id])

    return slides

`get_slide_id(full_filename)` ¶

get slide id

Get slide id from the slideviewer full file name. The full_filename in the slideview csv is of the format: year;HOBS_ID;slide_id.svs for example: 2013;HobS13-283072057510;1435197.svs

Parameters:

Name	Type	Description	Default
`full_filename`	`str`	full filename of slide	required

Returns:

Name	Type	Description
`str`	`str`	numeric slide id

Source code in src/luna/pathology/common/slideviewer_client.py

def get_slide_id(full_filename: str) -> str:
    """get slide id

    Get slide id from the slideviewer full file name. The full_filename in
    the slideview csv is of the format: year;HOBS_ID;slide_id.svs
    for example: 2013;HobS13-283072057510;1435197.svs

    Args:
        full_filename (str): full filename of slide

    Returns:
        str: numeric slide id
    """
    return full_filename.split(";")[-1].replace(".svs", "")

`unzip(zipfile_path)` ¶

unzip zip file

Parameters:

Name	Type	Description	Default
`zipfile_path`	`str`	path of zipfile to unzip	required

Returns:

Type	Description
`any`	readfile pointer to unzippped file if successfully unzippped, else None

Source code in src/luna/pathology/common/slideviewer_client.py

def unzip(zipfile_path: str) -> any:
    """unzip zip file

    Args:
        zipfile_path (str): path of zipfile to unzip

    Returns:
        readfile pointer to unzippped file if successfully unzippped, else None
    """
    logger.info("Unzipping " + zipfile_path)
    try:
        return zipfile.ZipFile(zipfile_path)  # returns read file pointer
    except zipfile.BadZipFile:
        logger.exception("Dumping invalid Zipfile " + zipfile_path + ":")
        return None

`utils` ¶

`address_to_coord(s)` ¶

converts address into coordinates

Parameters:

Name	Type	Description	Default
`s`	`str`	a string consisting of an x_y_z address	required

Returns:

Type	Description
`Optional[Tuple[int, int]]`	Tuple[int, int]: a tuple consisting of an x, y pair

Source code in src/luna/pathology/common/utils.py

def address_to_coord(s: str) -> Optional[Tuple[int, int]]:
    """converts address into coordinates

    Args:
        s (str): a string consisting of an x_y_z address

    Returns:
        Tuple[int, int]: a tuple consisting of an x, y pair
    """
    s = str(s)
    p = re.compile(r"x(\d+)_y(\d+)", re.IGNORECASE)
    m = p.match(s)
    if m:
        x = int(m.group(1))
        y = int(m.group(2))
        return (x, y)
    return None

`convert_halo_xml_to_roi(xml_fn)` ¶

get roi from halo XML file

Read the rectangle ROI of a halo XML annotation file

Parameters:

Name	Type	Description	Default
`xml_fn`	`str`	file path to input halo XML file	required

Returns:

Type	Description
`Optional[Tuple[List, List]]`	Tuple[list, list]: returns a tuple of x, y coordinates of the recangular roi

Source code in src/luna/pathology/common/utils.py

def convert_halo_xml_to_roi(xml_fn: str) -> Optional[Tuple[List, List]]:
    """get roi from halo XML file

    Read the rectangle ROI of a halo XML annotation file

    Args:
        xml_fn: file path to input halo XML file

    Returns:
        Tuple[list, list]: returns a tuple of x, y coordinates of the recangular roi

    """

    ylist = list()
    xlist = list()

    print("Converting to ROI:", xml_fn)
    e = et.parse(xml_fn).getroot()
    for ann in e.findall("Annotation"):
        regions = ann.findall("Regions")[0]
        if len(regions) == 0:
            continue

        if not regions[0].get("Type") == "Rectangle":
            continue

        for i, r in enumerate(regions):
            vs = r.findall("Vertices")[0]
            vs = vs.findall("V")
            for v in vs:
                y, x = int(v.get("Y").split(".")[0]), int(v.get("X").split(".")[0])
                ylist.append(y)
                xlist.append(x)

    if xlist == [] or ylist == []:
        logger.warning("No Rectangle found, returning None!")
        return None

    if min(xlist) < 0:
        logger.warning("Somehow a negative x rectangle coordinate!")
        xlist = [0, max(xlist)]
    if min(ylist) < 0:
        logger.warning("Somehow a negative y rectangle coordinate!")
        ylist = [0, max(ylist)]

    return xlist, ylist

`convert_xml_to_mask(xml_urlpath, shape, annotation_name, storage_options={})` ¶

convert xml to bitmask

Converts a sparse halo XML annotation file (polygons) to a dense bitmask

Parameters:

Name	Type	Description	Default
`xml_urlpath`	`str`	file path to input halo XML file	required
`shape`	`list`	desired polygon shape	required
`annotation_name`	`str`	name of annotation	required

Returns:

Type	Description
`Optional[Tuple[ndarray, Dict[str, Any]]]`	Optional[Tuple[np.ndarray, Dict[str, Any]]]: annotation bitmask of specified shape

Source code in src/luna/pathology/common/utils.py

def convert_xml_to_mask(
    xml_urlpath: str,
    shape: list,
    annotation_name: str,
    storage_options: dict = {},
) -> Optional[Tuple[np.ndarray, Dict[str, Any]]]:
    """convert xml to bitmask

    Converts a sparse halo XML annotation file (polygons) to a dense bitmask

    Args:
        xml_urlpath (str): file path to input halo XML file
        shape (list): desired polygon shape
        annotation_name (str): name of annotation

    Returns:
        Optional[Tuple[np.ndarray, Dict[str, Any]]]: annotation bitmask of specified shape
    """

    ret = None
    # Annotations >>
    with open(xml_urlpath, **storage_options) as of:
        e = et.parse(of).getroot()
    e = e.findall("Annotation")
    n_regions = 0
    for ann in e:
        if ann.get("Name") != annotation_name:
            continue

        logger.debug(f"Found region {ann.get('Name')}")

        board_pos = np.zeros(shape, dtype=np.uint8)
        board_neg = np.zeros(shape, dtype=np.uint8)

        regions = ann.findall("Regions")
        assert len(regions) == 1

        rs = regions[0].findall("Region")

        for i, r in enumerate(rs):
            negative_flag = int(r.get("NegativeROA"))
            assert negative_flag == 0 or negative_flag == 1
            negative_flag = bool(negative_flag)

            vs = r.findall("Vertices")[0]
            vs = vs.findall("V")
            vs.append(vs[0])  # last dot should be linked to the first dot

            plist = list()
            for v in vs:
                x, y = int(v.get("X").split(".")[0]), int(v.get("Y").split(".")[0])
                plist.append((x, y))

            if negative_flag:
                board_neg = cv2.drawContours(
                    board_neg, [np.array(plist, dtype=np.int32)], -1, [0, 0, 0], -1
                )
            else:
                board_pos = cv2.drawContours(
                    board_pos,
                    [np.array(plist, dtype=np.int32)],
                    contourIdx=-1,
                    color=[255, 0, 0],
                    thickness=-1,
                )
            n_regions += 1

        ret = (board_pos > 0) * (board_neg == 0)

    if ret.any():
        mask = ret.astype(np.uint8)

        properties = {
            "n_regions": n_regions,
            "n_positive_pixels": np.where(mask > 0, 1, 0).sum(),
        }
        return mask, properties
    return None

`coord_to_address(s, magnification)` ¶

converts coordinate to address

Parameters:

Name	Type	Description	Default
`s`	`tuple[int, int]`	coordinate consisting of an (x, y) tuple	required
`magnification`	`int`	magnification factor	required

Returns:

Name	Type	Description
`str`	`str`	a string consisting of an x_y_z address

Source code in src/luna/pathology/common/utils.py

def coord_to_address(s: Tuple[int, int], magnification: Optional[int]) -> str:
    """converts coordinate to address

    Args:
        s (tuple[int, int]): coordinate consisting of an (x, y) tuple
        magnification (int): magnification factor

    Returns:
        str: a string consisting of an x_y_z address
    """

    x = s[0]
    y = s[1]
    address = f"x{x}_y{y}"
    if magnification:
        address += f"_z{magnification}"
    return address

`extract_patch_texture_features(image_patch, mask_patch, stain_vectors, stain_channel, plot=False)` ¶

extact patch texture features

Runs patch-wise extraction from an image_patch, mask_patch pair given a stain vector and stain channel.

Parameters:

Name	Type	Description	Default
`image_patch`	`ndarray`	input image patch	required
`mask_patch`	`ndarray`	input image mask	required
`stain_vectors`	`ndarray`	stain vectors extacted from the image patch	required
`stain_channel`	`int`	stain channel	required
`plot`	`(Optional, bool)`	unused?	`False`

Returns:

Type	Description
`Optional[Dict[str, ndarray]]`	Optional[Dict[str, np.ndarray]]: texture features from image patch

Source code in src/luna/pathology/common/utils.py

def extract_patch_texture_features(
    image_patch, mask_patch, stain_vectors, stain_channel, plot=False
) -> Optional[Dict[str, np.ndarray]]:
    """extact patch texture features

    Runs patch-wise extraction from an image_patch, mask_patch pair given a stain
    vector and stain channel.

    Args:
        image_patch (np.ndarray): input image patch
        mask_patch (np.ndarray): input image mask
        stain_vectors (np.ndarray): stain vectors extacted from the image patch
        stain_channel (int): stain channel
        plot (Optional, bool): unused?

    Returns:
        Optional[Dict[str, np.ndarray]]: texture features from image patch

    """

    # logging.getLogger("radiomics.featureextractor").setLevel(logging.WARNING)
    if not (len(np.unique(mask_patch)) > 1 and np.count_nonzero(mask_patch) > 1):
        return None

    output_dict = {}  # type: Dict[str, Any]

    stain_patch = pull_stain_channel(image_patch, stain_vectors, channel=stain_channel)

    original_pixels = stain_patch.astype(np.uint8)[
        np.where(mask_patch.astype(np.bool_))
    ].flatten()
    original_pixels_valid = original_pixels[original_pixels > 0]
    output_dict["original_pixels"] = original_pixels_valid

    extractor = radiomics.featureextractor.RadiomicsFeatureExtractor(binWidth=16)
    extractor.disableAllFeatures()
    extractor.enableImageTypeByName("Original")
    extractor.enableFeatureClassByName("glcm")
    # extractor.enableFeatureByName('original_glcm_MCC', enable=False)

    sitk_image = sitk.GetImageFromArray(stain_patch.astype(np.uint8))
    sitk_mask = sitk.GetImageFromArray(mask_patch.astype(np.uint8))

    try:
        bbox, _ = radiomics.imageoperations.checkMask(sitk_image, sitk_mask)
    except Exception as exc:
        logger.warning(f"Skipping this patch, mask pair due to '{exc}'")
        return None
    else:
        # cimg, cmas = radiomics.imageoperations.cropToTumorMask(sitk_image, sitk_mask, bbox)

        fts = extractor.execute(sitk_image, sitk_mask, voxelBased=True)

        for key in fts.keys():
            if "original_glcm" not in key:
                continue

            stainomics_patch = sitk.GetArrayFromImage(fts[key]).astype(np.float32)
            stainomics_nonzero = stainomics_patch[stainomics_patch != 0].flatten()
            stainomics_valid = stainomics_nonzero[~np.isnan(stainomics_nonzero)]

            output_dict[key] = stainomics_valid

        return output_dict

`get_downscaled_thumbnail(slide, scale_factor)` ¶

get downscaled thumbnail

yields a thumbnail image of a whole slide rescaled by a specified scale factor

Parameters:

Name	Type	Description	Default
`slide`	`TiffSlide`	slide object	required
`scale_factor`	`int`	integer scaling factor to resize the whole slide by	required

Returns:

Type	Description
`ndarray`	np.ndarray: downsized whole slie thumbnail

Source code in src/luna/pathology/common/utils.py

@timed
def get_downscaled_thumbnail(
    slide: TiffSlide, scale_factor: Union[int, float]
) -> np.ndarray:
    """get downscaled thumbnail

    yields a thumbnail image of a whole slide rescaled by a specified scale factor

    Args:
        slide (TiffSlide): slide object
        scale_factor (int): integer scaling factor to resize the whole slide by

    Returns:
        np.ndarray: downsized whole slie thumbnail
    """
    new_width = slide.dimensions[0] // scale_factor
    new_height = slide.dimensions[1] // scale_factor
    img = slide.get_thumbnail((int(new_width), int(new_height)))
    return np.array(img)

`get_full_resolution_generator(slide_urlpath, tile_size, storage_options={})` ¶

Return MinimalComputeAperioDZGenerator and generator level

Parameters:

Name	Type	Description	Default
`slide_urlpath`	`str`	slide urlpath	required

Returns:

Type	Description
`Tuple[DeepZoomGenerator, int]`	Tuple[MinimalComputeAperioDZGenerator, int]

Source code in src/luna/pathology/common/utils.py

def get_full_resolution_generator(
    slide_urlpath: str,
    tile_size: int,
    storage_options: dict = {},
) -> Tuple[DeepZoomGenerator, int]:
    """Return MinimalComputeAperioDZGenerator and generator level

    Args:
        slide_urlpath (str): slide urlpath

    Returns:
        Tuple[MinimalComputeAperioDZGenerator, int]
    """
    generator = DeepZoomGenerator(
        slide_urlpath,
        overlap=0,
        tile_size=tile_size,
        limit_bounds=False,
        storage_options=storage_options,
    )

    generator_level = generator.level_count - 1
    # assert generator.level_dimensions[generator_level] == slide.dimensions
    return generator, generator_level

`get_layer_names(xml_urlpath, storage_options={})` ¶

get available layer names

Finds all possible annotation layer names from a Halo generated xml ROI file

Parameters:

Name	Type	Description	Default
`xml_urlpath`	`str`	absolute or relativefile path to input halo XML file. prefix scheme to use alternative filesystems.	required

Returns:

Name	Type	Description
`set`		Available region names

Source code in src/luna/pathology/common/utils.py

def get_layer_names(xml_urlpath, storage_options={}):
    """get available layer names

    Finds all possible annotation layer names from a Halo generated xml ROI file

    Args:
        xml_urlpath (str): absolute or relativefile path to input halo XML file. prefix scheme to use alternative filesystems.

    Returns:
        set: Available region names
    """  # Annotations >>
    with open(xml_urlpath, "r", **storage_options) as of:
        e = et.parse(of).getroot()
    e = e.findall("Annotation")
    names = set()

    [names.add(ann.get("Name")) for ann in e]

    return names

`get_scale_factor_at_magnification(slide, requested_magnification)` ¶

get scale factor at magnification

Return a scale factor if slide scanned magnification and requested magnification are different.

Parameters:

Name	Type	Description	Default
`slide`	`TiffSlide`	slide object	required
`requested_magnification`	`Optional[int]`	requested magnification	required

Returns:

Name	Type	Description
`int`	`float`	scale factor required to achieve requested magnification

Source code in src/luna/pathology/common/utils.py

def get_scale_factor_at_magnification(
    slide: TiffSlide, requested_magnification: Optional[int]
) -> float:
    """get scale factor at magnification

    Return a scale factor if slide scanned magnification and
    requested magnification are different.

    Args:
        slide (TiffSlide): slide object
        requested_magnification (Optional[int]): requested magnification

    Returns:
        int: scale factor required to achieve requested magnification
    """
    # First convert to float to handle true integers encoded as string floats (e.g. '20.000')
    mag_value = float(slide.properties["aperio.AppMag"])

    # Then convert to integer
    scanned_magnification = int(mag_value)

    # # Make sure we don't have non-integer magnifications
    if not int(mag_value) == mag_value:
        raise RuntimeError(
            "Can't handle slides scanned at non-integer magnficiations! (yet)"
        )

    # Verify magnification valid
    scale_factor = 1.0
    if requested_magnification and scanned_magnification != requested_magnification:
        if scanned_magnification < requested_magnification:
            raise ValueError(
                f"Expected magnification <={scanned_magnification} but got {requested_magnification}"
            )
        elif (scanned_magnification % requested_magnification) == 0:
            scale_factor = scanned_magnification // requested_magnification
        else:
            logger.warning("Scale factor is not an integer, be careful!")
            scale_factor = scanned_magnification / requested_magnification

    return scale_factor

`get_stain_vectors_macenko(sample)` ¶

get_stain_vectors

Uses the staintools MacenkoStainExtractor to extract stain vectors

Parameters:

Name	Type	Description	Default
`sample`	`ndarray`	input patch	required

Returns: np.ndarray: the stain matrix

Source code in src/luna/pathology/common/utils.py

def get_stain_vectors_macenko(sample: np.ndarray) -> np.ndarray:
    """get_stain_vectors

    Uses the staintools MacenkoStainExtractor to extract stain vectors

    Args:
        sample (np.ndarray): input patch
    Returns:
        np.ndarray: the stain matrix

    """
    from staintools.stain_extraction.macenko_stain_extractor import (
        MacenkoStainExtractor,  # type: ignore
    )

    extractor = MacenkoStainExtractor()
    vectors = extractor.get_stain_matrix(sample)
    return vectors

`get_tile_array(row, storage_options={})` ¶

Returns a tile image as a numpy array.

Parameters:

Name	Type	Description	Default
`row`	`DataFrame`	row with address and tile_image_file columns	required

Source code in src/luna/pathology/common/utils.py

def get_tile_array(row: pd.DataFrame, storage_options: dict = {}) -> np.ndarray:
    """
    Returns a tile image as a numpy array.

    Args:
        row (pd.DataFrame): row with address and tile_image_file columns
    """
    fs, path = fsspec.core.url_to_fs(row.tile_store, **storage_options)
    cache_fs = fsspec.filesystem("filecache", fs=fs)
    with cache_fs.open(path, "rb", **storage_options) as of:
        with h5py.File(of, "r") as hf:
            tile = np.array(hf[row.name])
            return tile

`get_tile_arrays(indices, input_slide_urlpath, tile_size, storage_options={})` ¶

Get tile arrays for the tile indices

Parameters:

Name	Type	Description	Default
`indices`	`List[int]`	list of integers to return as tiles	required
`input_slide_image`	`str`	path to WSI	required
`tile_size`	`int`	width, height of generated tile	required

Returns:

Type	Description
`List[Tuple[int, ndarray]]`	a list of tuples (index, tile array) for given indices

Source code in src/luna/pathology/common/utils.py

def get_tile_arrays(
    indices: List[int],
    input_slide_urlpath: str,
    tile_size: int,
    storage_options: dict = {},
) -> List[Tuple[int, np.ndarray]]:
    """
    Get tile arrays for the tile indices

    Args:
        indices (List[int]): list of integers to return as tiles
        input_slide_image (str): path to WSI
        tile_size (int): width, height of generated tile

    Returns:
        a list of tuples (index, tile array) for given indices
    """
    full_generator, full_level = get_full_resolution_generator(
        input_slide_urlpath, tile_size=tile_size, storage_options=storage_options
    )
    return [
        (
            index,
            np.array(
                full_generator.get_tile(
                    full_level, address_to_coord(str(index))
                ).resize((tile_size, tile_size))
            ),
        )
        for index in indices
    ]

`get_tile_color(score)` ¶

get tile color

uses deafult color palette to return color of tile based on score

Parameters:

Name	Type	Description	Default
`score`	`Union[str, float]`	a value between [0,1] such as the Otsu threshold, puple score, a model output, etc.	required

Returns: Union[float, None]: returns the color is the input is of valid type else None

Source code in src/luna/pathology/common/utils.py

def get_tile_color(score: Union[str, float]) -> Optional[npt.ArrayLike]:
    """get tile color

    uses deafult color palette to return color of tile based on score

    Args:
        score (Union[str, float]): a value between [0,1] such as the
            Otsu threshold, puple score, a model output, etc.
    Returns:
        Union[float, None]: returns the color is the input is of valid type
            else None

    """
    # categorical
    if isinstance(score, str):
        if score in categorical_colors:
            return categorical_colors[score]
        else:
            tile_color = 255 * np.array(categorial[len(categorical_colors.keys())])
            categorical_colors[score] = tile_color
            return tile_color

    # float, expected to be value from [0,1]
    elif isinstance(score, float) and score <= 1.0 and score >= 0.0:
        tile_color = np.array([int(255 * i) for i in palette(score)[:3]])
        return tile_color

    else:
        print("Invalid Score Type")
        return None

`pull_stain_channel(patch, vectors, channel=None)` ¶

pull stain channel

adds 'stain channel' to the image patch

Parameters:

Name	Type	Description	Default
`patch`	`ndarray`	input image patch	required
`vectors`	`ndarray`	stain vectors	required
`channel`	`int`	stain channel	`None`

Returns:

Type	Description
`ndarray`	np.ndarray: the input image patch with an added stain channel

Source code in src/luna/pathology/common/utils.py

def pull_stain_channel(
    patch: np.ndarray, vectors: np.ndarray, channel: Optional[int] = None
) -> np.ndarray:
    """pull stain channel

    adds 'stain channel' to the image patch

    Args:
        patch (np.ndarray): input image patch
        vectors (np.ndarray): stain vectors
        channel (int): stain channel

    Returns:
        np.ndarray: the input image patch with an added stain channel
    """

    from staintools.miscellaneous.get_concentrations import (
        get_concentrations,  # type: ignore
    )

    tile_concentrations = get_concentrations(patch, vectors)
    identity = np.array([[1, 0, 0], [0, 1, 0]])
    tmp = 255 * (1 - np.exp(-1 * np.dot(tile_concentrations, identity)))
    tmp = tmp.reshape(patch.shape).astype(np.uint8)
    if channel is not None:
        return tmp[:, :, channel]
    else:
        return tmp

`visualize_tiling_scores(df, thumbnail_img, scale_factor, score_type_to_visualize, normalize=True)` ¶

visualize tile scores

draws colored boxes around tiles to indicate the value of the score

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	input dataframe	required
`thumbnail_img`	`ndarray`	input tile	required
`tile_size`	`int`	tile width/length	required
`score_type_to_visualize`	`str`	column name from data frame	required

Returns:

Type	Description
`ndarray`	np.ndarray: new thumbnail image with boxes around tiles passing indicating the
`ndarray`	value of the score

Source code in src/luna/pathology/common/utils.py

def visualize_tiling_scores(
    df: pd.DataFrame,
    thumbnail_img: np.ndarray,
    scale_factor: float,
    score_type_to_visualize: str,
    normalize=True,
) -> np.ndarray:
    """visualize tile scores

    draws colored boxes around tiles to indicate the value of the score

    Args:
        df (pd.DataFrame): input dataframe
        thumbnail_img (np.ndarray): input tile
        tile_size (int): tile width/length
        score_type_to_visualize (str): column name from data frame

    Returns:
        np.ndarray: new thumbnail image with boxes around tiles passing indicating the
        value of the score
    """

    assert isinstance(thumbnail_img, np.ndarray)

    if normalize and df[score_type_to_visualize].dtype.kind in "biuf":
        df[score_type_to_visualize] = (
            df[score_type_to_visualize] - np.min(df[score_type_to_visualize])
        ) / np.ptp(df[score_type_to_visualize])

    for _, row in tqdm(df.iterrows(), total=len(df)):
        if "regional_label" in row and pd.isna(row.regional_label):
            continue

        start = (
            row.y_coord / scale_factor,
            row.x_coord / scale_factor,
        )  # flip because OpenSlide uses (column, row), but skimage, uses (row, column)

        rr, cc = rectangle_perimeter(
            start=start,
            extent=(row.xy_extent / scale_factor, row.xy_extent / scale_factor),
            shape=thumbnail_img.shape,
        )

        # set color based on intensity of value instead of black border (1)
        score = row[score_type_to_visualize]

        thumbnail_img[rr, cc] = get_tile_color(score)

    return thumbnail_img

`dsa` ¶

`dsa_api_handler` ¶

`copy_item(gc, item_id, destination_id)` ¶

Copies the item to the destination.

Parameters:

Name	Type	Description	Default
`gc`		girder_client	required
`item_id`	`string`	uuid of the item to be copied	required
`destination_id`	`string`	uuid of the destination folder	required

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def copy_item(gc, item_id: str, destination_id: str):
    """
    Copies the item to the destination.

    Args:
        gc: girder_client
        item_id (string): uuid of the item to be copied
        destination_id (string): uuid of the destination folder
    """
    request_url = f"item/{item_id}/copy?folderId={destination_id}"
    try:
        gc.post(request_url)
    except Exception as err:
        logger.error(f"Error copying item: {err}")
        raise RuntimeError("Can not copy item")

`create_collection(gc, collection_name)` ¶

Creates a dsa collection and returns a collection uuid from the created collection on successful creation.

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`collection_name`	`string`	name of the collection	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA collection uuid. Or an error in the post request.

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def create_collection(gc, collection_name: str) -> Optional[str]:
    """
    Creates a dsa collection and returns a collection uuid from the created
    collection on successful creation.

    Args:
        gc: girder client
        collection_name (string): name of the collection

    Returns:
        string: DSA collection uuid. Or an error in the post request.
    """
    try:
        gc.createCollection(collection_name)
        logger.debug(f"Created collection {collection_name}")
        new_collection_id = get_collection_uuid(gc, collection_name)
        logger.debug(f"Collection {collection_name} has id {new_collection_id}")
    except Exception as err:
        logger.error(f"Couldn't create collection {collection_name} : {err}")
        return None

    return new_collection_id

`create_folder(gc, folder_name, parent_type, parent_id)` ¶

Creates a dsa folder and returns a folder uuid from the created folder on successful creation.

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`folder_name`	`string`	name of the folder in DSA	required
`parent_type`	`string`	type of the parent container (ie. folder, collection)	required
`parent_id`	`string`	uuid of the parent container	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA folder uuid. Or an error in the post request.

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def create_folder(
    gc, folder_name: str, parent_type: str, parent_id: str
) -> Optional[str]:
    """
    Creates a dsa folder and returns a folder uuid from the created
    folder on successful creation.

    Args:
        gc: girder client
        folder_name (string): name of the folder in DSA
        parent_type (string): type of the parent container (ie. folder, collection)
        parent_id (string): uuid of the parent container

    Returns:
        string: DSA folder uuid. Or an error in the post request.
    """
    try:
        gc.createFolder(parent_id, folder_name, parentType=parent_type)
        logger.debug(f"Created folder {folder_name}")
        new_folder_uuid = get_folder_uuid(gc, folder_name, parent_type, parent_id)
        logger.debug(f"Folder {folder_name} has id {new_folder_uuid}")
    except Exception as err:
        logger.error(f"Couldn't create folder {folder_name} : {err}")
        return None

    return new_folder_uuid

`create_s3_assetstore(gc, name, bucket, access, secret, service)` ¶

Creates a s3 assetstore.

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`bucket`	`string`	name of the folder in DSA	required
`access`	`string`	s3 access ID	required
`secret`	`string`	s3 password	required
`service`	`string)`	url of the s3 host	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA assetstore uuid. Or an error in the post request.

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def create_s3_assetstore(
    gc, name: str, bucket: str, access: str, secret: str, service: str
) -> Optional[str]:
    """
    Creates a s3 assetstore.

    Args:
        gc: girder client
        bucket (string): name of the folder in DSA
        access (string): s3 access ID
        secret (string): s3 password
        service (string) : url of the s3 host

    Returns:
        string: DSA assetstore uuid. Or an error in the post request.
    """
    request_url = (
        f"assetstore?name={name}&type=2&bucket={bucket}&accessKeyId={access}"
        + f"&secret={secret}&service={service}"
    )
    try:
        gc.post(request_url)
        logger.debug(f"Created assetstore {name}")
        new_assetstore_uuid = get_assetstore_uuid(gc, name)
        logger.debug(f"Assetstore {name} has id {new_assetstore_uuid}")
    except Exception as err:
        logger.error(f"Couldn't create assetstore {name}: {err}")
        raise RuntimeError("Unable to create s3 assetstore")

    return new_assetstore_uuid

`dsa_authenticate(gc, username, password)` ¶

Authenticate girder client

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`username`	`str`	DSA username	required
`password`	`str`	DSA password	required

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def dsa_authenticate(gc, username, password):
    """Authenticate girder client

    Args:
        gc: girder client
        username (str): DSA username
        password (str): DSA password
    """
    # Initial connnection
    try:
        gc.authenticate(username, password)
        logger.info(f"Connected to DSA @ {gc.urlBase}")
    except girder_client.AuthenticationError:
        logger.exception("Couldn't authenticate DSA due to AuthenticationError")
        raise RuntimeError("Connection to DSA endpoint failed.")
    except Exception:
        logger.exception("Couldn't authenticate DSA due to some other exception")
        raise RuntimeError("Connection to DSA endpoint failed.")

`get_annotation_df(gc, annotation_uuid)` ¶

Return annotation metadata (regions) for a given annotation as a dataframe

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`annotation_uuid`	`str`	DSA annotation uuid	required

Returns: pd.DataFrame: annotation/region metadata, with slide_item_uuid as additional indicies

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_annotation_df(gc, annotation_uuid):
    """Return annotation metadata (regions) for a given annotation as a dataframe

    Args:
        gc: girder client
        annotation_uuid (str): DSA annotation uuid
    Returns:
        pd.DataFrame: annotation/region metadata, with slide_item_uuid as additional indicies
    """
    # Here we get all the annotation data as a json document
    annot = gc.get(f"annotation/{annotation_uuid}")
    (
        df_summary,
        df_regions,
    ) = histomicstk.annotations_and_masks.annotation_and_mask_utils.parse_slide_annotations_into_tables(
        [annot]
    )

    # Lets process the coordiates a bit...
    df_regions["x_coords"] = [
        [int(x) for x in coords_x.split(",")] for coords_x in df_regions["coords_x"]
    ]
    df_regions["y_coords"] = [
        [int(x) for x in coords_x.split(",")] for coords_x in df_regions["coords_y"]
    ]
    df_regions = df_regions.drop(columns=["coords_x", "coords_y"])

    # And join the summary data with the regional data
    df_annotations = (
        df_summary.set_index("annotation_girder_id")
        .join(df_regions.set_index("annotation_girder_id"))
        .reset_index()
    )

    df_annotations = df_annotations.rename(columns={"itemId": "slide_item_uuid"})

    return df_annotations

`get_assetstore_uuid(gc, assetstore_name)` ¶

Returns the DSA assetstore uuid from the provided assetstore_name

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`assetstore_name`	`string`	name of the assetstore in DSA	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA assetstore uuid. None if nothing matches the assetstore name or an error in the get request

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_assetstore_uuid(gc, assetstore_name: str) -> Optional[str]:
    """Returns the DSA assetstore uuid from the provided `assetstore_name`

    Args:
        gc: girder client
        assetstore_name (string): name of the assetstore in DSA

    Returns:
        string: DSA assetstore uuid. None if nothing matches the assetstore name or an
                error in the get request
    """
    try:
        df_assetstores = pd.DataFrame(gc.get("assetstore?"))
        if len(df_assetstores):
            df_assetstores = df_assetstores.set_index("_id")
            df_assetstores = df_assetstores.query(f"name=='{assetstore_name}'")
        logger.debug(f"Found assetstores {df_assetstores}")
    except Exception as err:
        logger.error(f"Couldn't retrieve data from DSA: {err}")
        raise RuntimeError("Connection to DSA endpoint failed.")

    if len(df_assetstores) == 0:
        logger.debug(f"No matching assetstore '{assetstore_name}'")
        return None

    assetstore_uuid = df_assetstores.index.item()

    logger.info(
        f"Found assetstore id={assetstore_uuid} for assetstore={assetstore_name}"
    )

    return assetstore_uuid

`get_collection_metadata(collection_name, gc)` ¶

A function used to get the stylehseet associated with a DSA collection. The stylesheet can store the labels used in the annotation process

Parameters:

Name	Type	Description	Default
`collection_name`	`str`	name of DSA collection used to store the slides	required
`gc`		girder client	required

Returns: Optional[Tuple[str, Dict[str, any]]]: a tuple consisting of the collection uuid and thei stylesheet in JSON format or None if no stylesheet is associated with the provided collection

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_collection_metadata(
    collection_name: str, gc
) -> Optional[Tuple[str, Dict[str, any]]]:
    """A function used to get the stylehseet associated with a DSA collection. The stylesheet
    can store the labels used in the annotation process

    Args:
        collection_name (str): name of DSA collection used to store the slides
        gc: girder client
    Returns:
        Optional[Tuple[str, Dict[str, any]]]: a tuple consisting of the collection uuid
            and thei stylesheet in JSON format or None if no stylesheet is associated
            with the provided collection
    """

    collection_uuid = get_collection_uuid(gc, collection_name)

    if collection_uuid is not None:
        logger.debug("retreived collection uuid")

        # try get request from girder
        try:
            collection_response = gc.get(f"/collection/{collection_uuid}")
        except requests.exceptions.HTTPError as err:
            logger.error(
                f"Error in collection get request: {err.response.status_code}, {err.response.text}"
            )
            return None

        # if response successful, attempt to get stylehseet
        try:
            metadata_stylesheet = collection_response["meta"]["stylesheet"]
        except KeyError:
            logger.error(f"No stylesheet in collection: {collection_uuid}")
            metadata_stylesheet = None
    else:
        logger.warning(f"Invalid collection name: {collection_name}")
        return None

    return (collection_uuid, metadata_stylesheet)

`get_collection_uuid(gc, collection_name)` ¶

Returns the DSA collection uuid from the provided collection_name

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`collection_name`	`string`	name of the collection in DSA	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA collection uuid. None if nothing matches the collection name or an error in the get request

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_collection_uuid(gc, collection_name: str) -> Optional[str]:
    """Returns the DSA collection uuid from the provided `collection_name`

    Args:
        gc: girder client
        collection_name (string): name of the collection in DSA

    Returns:
        string: DSA collection uuid. None if nothing matches the collection name or an
                error in the get request
    """
    try:
        df_collections = pd.DataFrame(gc.listCollection())
        if len(df_collections):
            df_collections = df_collections.set_index("_id")
            df_collections = df_collections.query(f"name=='{collection_name}'")
        logger.debug(f"Found collections {df_collections}")
    except Exception as err:
        logger.error(f"Couldn't retrieve data from DSA: {err}")
        raise RuntimeError("Connection to DSA endpoint failed.")

    # Look for a collection called our collection name
    if len(df_collections) == 0:
        logger.debug(f"No matching collection '{collection_name}'")
        return None

    collection_uuid = df_collections.index.item()

    logger.info(
        f"Found collection id={collection_uuid} for collection={collection_name}"
    )

    return collection_uuid

`get_folder_uuid(gc, folder_name, parent_type, parent_id)` ¶

Returns the DSA folder uuid from the provided folder_name

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`folder_name`	`string`	name of the folder in DSA	required
`parent_type`	`string`	type of the parent container (ie. folder, collection)	required
`parent_id`	`string`	uuid of the parent container	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA folder uuid. None if nothing matches the collection name or an error in the get request

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_folder_uuid(
    gc, folder_name: str, parent_type: str, parent_id: str
) -> Optional[str]:
    """Returns the DSA folder uuid from the provided `folder_name`

    Args:
        gc: girder client
        folder_name (string): name of the folder in DSA
        parent_type (string): type of the parent container (ie. folder, collection)
        parent_id (string): uuid of the parent container

    Returns:
        string: DSA folder uuid. None if nothing matches the collection name or an
                error in the get request
    """
    try:
        df_folders = pd.DataFrame(gc.listFolder(parent_id, parent_type))
        if len(df_folders):
            df_folders = df_folders.set_index("_id")
            df_folders = df_folders.query(f"name=='{folder_name}'")
        logger.debug(f"Found folders {df_folders}")
    except Exception as err:
        logger.error(f"Couldn't retrieve data from DSA: {err}")
        raise RuntimeError("Connection to DSA endpoint failed.")

    if len(df_folders) == 0:
        logger.debug(f"No matching folders '{folder_name}'")
        return None

    folder_uuid = df_folders.index.item()

    logger.info(f"Found folder id={folder_uuid} for folder={folder_name}")

    return folder_uuid

`get_item_uuid(gc, image_name, collection_name)` ¶

Returns the DSA item uuid from the provided image_name

Parameters:

Name	Type	Description	Default
`image_name`	`string`	name of the image in DSA e.g. 123.svs	required
`collection_name`	`str`	name of DSA collection	required
`gc`		girder client	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA item uuid. None if nothing matches the collection/image name.

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_item_uuid(gc, image_name: str, collection_name: str) -> Optional[str]:
    """Returns the DSA item uuid from the provided `image_name`

    Args:
        image_name (string): name of the image in DSA e.g. 123.svs
        collection_name (str): name of DSA collection
        gc: girder client

    Returns:
        string: DSA item uuid. None if nothing matches the collection/image name.
    """

    collection_uuid = get_collection_uuid(gc, collection_name)
    if not collection_uuid:
        return None

    image_id = Path(image_name).stem

    try:
        uuid_response = gc.get(f'/item?text="{image_id}"')

    except requests.exceptions.HTTPError as err:
        logger.error(
            f"Error in item get request: {err.response.status_code}, {err.response.text}"
        )
        return None

    if uuid_response is not None and len(uuid_response) > 0:
        # multiple entries can come up based on substring matches, return the correct item id by checking name field in dictionary.
        for uuid_response_dict in uuid_response:
            if "name" in uuid_response_dict and "_id" in uuid_response_dict:
                if (
                    uuid_response_dict["name"] == image_name
                    and uuid_response_dict["baseParentId"] == collection_uuid
                ):
                    dsa_uuid = uuid_response_dict["_id"]
                    logger.debug(f"Image file {image_name} found with id: {dsa_uuid}")
                    return dsa_uuid
    logger.warning(f"Image file {image_name} not found")
    return None

`get_item_uuid_by_folder(gc, image_name, folder_uuid)` ¶

Returns the DSA item uuid from the provided folder

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`image_name`	`string`	name of the image in DSA e.g. 123.svs	required
`folder_uuid`	`string`	uuid of parent DSA folder	required

Returns:

Name	Type	Description
`string`	`Optional[str]`	DSA item uuid. None if nothing matches the folder uuid / image name.

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_item_uuid_by_folder(gc, image_name: str, folder_uuid: str) -> Optional[str]:
    """Returns the DSA item uuid from the provided folder

    Args:
        gc: girder client
        image_name (string): name of the image in DSA e.g. 123.svs
        folder_uuid (string): uuid of parent DSA folder

    Returns:
        string: DSA item uuid. None if nothing matches the folder uuid / image name.
    """
    image_id = Path(image_name).stem
    try:
        uuid_response = gc.get(f'/item?text="{image_id}"')

    except requests.exceptions.HTTPError as err:
        logger.error(
            f"Error in item get request: {err.response.status_code}, {err.response.text}"
        )
        return None

    if uuid_response is not None and len(uuid_response) > 0:
        # multiple entries can come up based on substring matches, return the correct item id by checking name field in dictionary.
        for uuid_response_dict in uuid_response:
            if "name" in uuid_response_dict and "_id" in uuid_response_dict:
                if (
                    uuid_response_dict["name"] == image_name
                    and uuid_response_dict["folderId"] == folder_uuid
                ):
                    dsa_uuid = uuid_response_dict["_id"]
                    logger.debug(f"Image file {image_name} found with id: {dsa_uuid}")
                    return dsa_uuid
    logger.warning(f"Image file {image_name} not found")
    return None

`get_slide_annotation(slide_id, annotation_name, collection_name, gc)` ¶

A helper function that pulls json annotations along with metadata for a particular slide from DSA. Used for both point and regional annotation types.

Parameters:

Name	Type	Description	Default
`slide_id`	`str`	image name of WSI on DSA.	required
`annotation_name`	`str`	name of annotation, or label, created on DSA	required
`collection_name`	`str`	name of DSA collection the WSI belongs to	required
`gc`		girder client	required

Returns:

Type	Description
`Optional[Tuple[str, Dict[str, any], Dict[str, any]]]`	Optional[Tuple[str, dict[str, any], dict[str, any]. A tuple consisting of the slide id, a json formatted annotation from slideviweer and slide metadata or None if the annotation can't be found (ie if image_id, annotation_name or collection_name are mis-specified)

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_slide_annotation(
    slide_id: str,
    annotation_name: str,
    collection_name: str,
    gc,
) -> Optional[Tuple[str, Dict[str, any], Dict[str, any]]]:
    """A helper function that pulls json annotations along with
    metadata for a particular slide from DSA. Used for both point and regional
    annotation types.

    Args:
        slide_id (str): image name of WSI on DSA.
        annotation_name (str): name of annotation, or label, created on DSA
        collection_name (str): name of DSA collection the WSI belongs to
        gc: girder client

    Returns:
        Optional[Tuple[str, dict[str, any], dict[str, any]. A tuple consisting of the slide id,
            a json formatted annotation from slideviweer and slide metadata or None if the
            annotation can't be found (ie if image_id, annotation_name or collection_name are
            mis-specified)
    """

    item_uuid = get_item_uuid(gc, slide_id, collection_name)

    if not item_uuid:
        logger.info(f"Slide {slide_id} not found in {collection_name}")
        return None

    # search for annotation

    logger.debug("Starting request for annotation")
    try:
        annotation_response = gc.get(
            f"/annotation?itemId={item_uuid}&name={annotation_name}"
        )

    except Exception as err:
        logger.error(f"Error in annotation get request: {err}")
        return None

    # get annotation json from response
    if annotation_response:
        annotation_response = annotation_response[0]
        annotation = annotation_response["annotation"]
    else:
        logger.info(f"No annotation found for slide {slide_id}")
        return None

    # get additional slide-level metadata from response
    date_created = annotation_response["created"]
    date_updated = annotation_response["updated"]

    annotation_id = annotation_response["_id"]
    creator_id = annotation_response["creatorId"]
    creator_updated_id = annotation_response["updatedId"]
    annotation_name = annotation["name"]

    try:
        creator_response = gc.get(f"/user/{creator_id}")
        creator_updated_response = gc.get(f"/user/{creator_updated_id}")
    except requests.exceptions.HTTPError as err:
        logger.error(
            f"Error in user get request: {err.response.status_code}, {err.response.text}"
        )
        return None

    creator_login = creator_response["login"]
    creator_login_updated = creator_updated_response["login"]

    slide_metadata = {
        "annotation_id": annotation_id,
        "annotation_name": annotation_name,
        "date": date_created,
        "date_updated": date_updated,
        "user": creator_login,
        "user_updated": creator_login_updated,
    }

    return (slide_id, slide_metadata, json.dumps(annotation))

`get_slide_df(gc, collection_uuid)` ¶

Return slide metadata (largeImage items) for a given colleciton as a dataframe

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`collection_uuid`	`str`	DSA collection uuid	required

Returns: pd.DataFrame: slide metadata, with slide_id and slide_item_uuid as additional indicies

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def get_slide_df(gc, collection_uuid: str) -> pd.DataFrame:
    """Return slide metadata (largeImage items) for a given colleciton as a dataframe

    Args:
        gc: girder client
        collection_uuid (str): DSA collection uuid
    Returns:
        pd.DataFrame: slide metadata, with slide_id and slide_item_uuid as additional indicies
    """

    try:
        resource_response = gc.listResource(
            f"resource/{collection_uuid}/items", {"type": "collection"}
        )
    except Exception:
        logger.error(
            f"Couldn't retrieve resource data from DSA for {collection_uuid}, perhaps the collection UUID does not exist?"
        )
        raise RuntimeError("Retriving slide data from DSA failed.")

    df_slide_items = pd.DataFrame(resource_response).dropna(
        subset=["largeImage"]
    )  # Get largeImage types from collection items

    # Fill additional metadata based on convention (slide_id)
    df_slide_items["slide_id"] = df_slide_items["name"].apply(
        lambda x: Path(x).stem
    )  # The stem
    df_slide_items["slide_item_uuid"] = df_slide_items["_id"]

    logger.info(f"Found {len(df_slide_items)} slides!")

    return df_slide_items

`import_assetstore_to_folder(gc, assetstore_uuid, destination_uuid)` ¶

Imports the assetstore to the specified destination folder.

Parameters:

Name	Type	Description	Default
`gc`		girder client	required
`assetstore_uuid`	`string`	uuid of the assetstore	required
`destination_uuid`	`string`	uuid of the destination folder	required

Returns:

Type	Description
`Optional[str]`	None, raises error if post request fails

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def import_assetstore_to_folder(
    gc, assetstore_uuid: str, destination_uuid: str
) -> Optional[str]:
    """
    Imports the assetstore to the specified destination folder.

    Args:
        gc: girder client
        assetstore_uuid (string): uuid of the assetstore
        destination_uuid (string): uuid of the destination folder

    Returns:
        None, raises error if post request fails
    """
    request_url = f"assetstore/{assetstore_uuid}/import"
    params = {
        "destinationId": destination_uuid,
        "destinationType": "folder",
        "importPath": "/",
    }
    try:
        gc.post(request_url, parameters=params)
        logger.debug(
            f"Importing from assetstore id {assetstore_uuid}"
            + f"to destination id {destination_uuid}"
        )
    except Exception as err:
        logger.error(f"Couldn't import assetstore id {assetstore_uuid} : {err}")
        raise RuntimeError("Unable to import assetstore to collection")

`push_annotation_to_dsa_image(item_uuid, annotation_file_urlpath, uri, gc, storage_options={})` ¶

Pushes annotation to DSA, adding given item_uuid (slide-specific id)

Parameters:

Name	Type	Description	Default
`item_uuid`	`str`	DSA item uuid to be tied to the annotation	required
`dsa_annotation_json`	`Dict[str, any]`	annotation JSON in DSA compatable format	required
`uri`	`str`	DSA scheme://host:port e.g. http://localhost:8080	required
`gc`	`GirderClient`	girder client	required

Returns:

Name	Type	Description
`int`		0 for successful upload, 1 otherwise

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def push_annotation_to_dsa_image(
    item_uuid: str,
    annotation_file_urlpath: str,
    uri: str,
    gc: girder_client.GirderClient,
    storage_options: dict = {},
):
    """Pushes annotation to DSA, adding given item_uuid (slide-specific id)

    Args:
        item_uuid (str): DSA item uuid to be tied to the annotation
        dsa_annotation_json (Dict[str, any]): annotation JSON in DSA compatable format
        uri (str): DSA scheme://host:port e.g. http://localhost:8080
        gc: girder client

    Returns:
        int: 0 for successful upload, 1 otherwise
    """

    annotation_name = Path(annotation_file_urlpath).name

    start = time.time()

    # always post a new annotation.
    # updating or deleting an existing annotation for a large annotation
    # document results in timeout.
    try:
        fs, path = fsspec.core.url_to_fs(annotation_file_urlpath, **storage_options)
        size = fs.size(path)
        reference = {
            "identifier": f"{Path(path).stem}-AnnotationFile",
            "itemId": item_uuid,
        }
        with fs.open(path) as of:
            gc.uploadFile(
                item_uuid,
                of,
                annotation_name,
                size,
                reference=orjson.dumps(reference).decode(),
            )

    except requests.exceptions.HTTPError as err:
        raise RuntimeError(
            f"Error in annotation upload: {err.response.status_code}, "
            + err.response.text
        )

    # Wait for annotation to be processed
    annotation_id = check_annotation_exists_with_retry(
        gc, item_uuid, annotation_name, retry_count=100, delay=20
    )
    if annotation_id:
        logger.info(f"Annotation successfully pushed to DSA as {annotation_id}.")
    else:
        logger.info("Annotation pushed to DSA but still processing.")
    logger.info(f"Time to push annotation {time.time() - start}")
    logger.info(f"{uri}/histomics#?image={item_uuid}")
    return annotation_id

`system_check(gc)` ¶

Check DSA connection with the girder client

Parameters:

Name	Type	Description	Default
`gc`		girder client	required

Returns: int: 0 for successful connection, 1 otherwise

Source code in src/luna/pathology/dsa/dsa_api_handler.py

def system_check(gc):
    """Check DSA connection with the girder client

    Args:
        gc: girder client
    Returns:
        int: 0 for successful connection, 1 otherwise
    """

    try:
        _ = gc.get("/system/check")

    except requests.exceptions.HTTPError as err:
        logger.error("Please check your host or credentials")
        logger.error(err)
        return 1

    logger.info("Successfully connected to DSA")

    return 0

`utils` ¶

`get_color(name, line_colors={}, fill_colors={}, alpha=100)` ¶

Get colors for cells/regions based on discrete categories.

Parameters:

Name	Type	Description	Default
`name`	`string`	feature name e.g. Stroma, Tumor	required
`line_colors`	`dict`	line color map with {feature name:rgb values}	`{}`
`fill_colors`	`dict`	fill color map with {feature name:rgba values}	`{}`
`alpha`	`int`	alpha value for the fill color. 100 by default	`100`

Returns:

Name	Type	Description
`string`		RGBA values for line and fill colors

Source code in src/luna/pathology/dsa/utils.py

def get_color(name, line_colors={}, fill_colors={}, alpha=100):
    """Get colors for cells/regions based on discrete categories.

    Args:
        name (string): feature name e.g. Stroma, Tumor
        line_colors (dict, optional): line color map with {feature name:rgb values}
        fill_colors (dict, optional): fill color map with {feature name:rgba values}
        alpha (int, optional): alpha value for the fill color. 100 by default

    Returns:
        string: RGBA values for line and fill colors
    """
    if name not in line_colors and name not in fill_colors:
        r = randint(0, 255)
        g = randint(0, 255)
        b = randint(0, 255)
        fill_colors[name] = "rgba({}, {}, {}, {})".format(r, g, b, alpha)
        line_colors[name] = "rgb({}, {}, {})".format(r, g, b)
    return line_colors[name], fill_colors[name]

`get_continuous_color(value, outline_color='same_as_fill', alpha=100)` ¶

Get RGBA line and fill colors for value.

Use color palette viridis to set a fill value - the color ranges from purple to yellow, for the values from 0 to 1. This function is used in generating a heatmap.

Parameters:

Name	Type	Description	Default
`value`	`float`	continuous value in [0,1]	required
`outline_color`	`string`	manages the color used to outline the border of the annotation. by default, uses the same color as fill_color.	`'same_as_fill'`
`alpha`	`int`	alpha value for the fill color. 100 by default	`100`

Returns:

Name	Type	Description
`string`	`Tuple[str, str]`	RGBA line and fill colors

Source code in src/luna/pathology/dsa/utils.py

def get_continuous_color(
    value, outline_color="same_as_fill", alpha=100
) -> Tuple[str, str]:
    """Get RGBA line and fill colors for value.

    Use color palette `viridis` to set a fill value - the color ranges from purple to yellow,
     for the values from 0 to 1. This function is used in generating a heatmap.

    Args:
        value (float): continuous value in [0,1]
        outline_color (string, optional): manages the color used to outline the border of the annotation.
            by default, uses the same color as fill_color.
        alpha (int, optional): alpha value for the fill color. 100 by default

    Returns:
        string: RGBA line and fill colors
    """
    c = sns.color_palette("viridis", as_cmap=True)
    r, g, b, a = c(value, bytes=True)

    fill_color = "rgba({}, {}, {}, {})".format(r, g, b, alpha)
    if outline_color == "same_as_fill":
        line_color = "rgb({}, {}, {})".format(r, g, b)
    elif outline_color == "black":
        line_color = "rgb({}, {}, {})".format(0, 0, 0)
    elif outline_color == "white":
        line_color = "rgb({}, {}, {})".format(255, 255, 255)
    else:
        return None, None
    return line_color, fill_color

`vectorize_np_array_bitmask_by_pixel_value(bitmask_np, label_num=255, polygon_tolerance=1, contour_level=0.5, scale_factor=1)` ¶

Get simplified contours from the bitmask

Parameters:

Name	Type	Description	Default
`bitmask_np`	`array`	a numpy bitmask	required
`label_num`	`int`	numeric value to filter the numpy array	`255`
`polygon_tolerance`	`float`	Maximum distance from original points of polygon to approximated polygonal chain. If tolerance is 0, the original coordinate array is returned.	`1`
`contour_level`	`float`	Value along which to find contours in the array. 0.5 by default	`0.5`
`scale_factor`	`int`	scale to match image. default 1	`1`

Returns:

Name	Type	Description
`list`		simplified approximated contours

Source code in src/luna/pathology/dsa/utils.py

def vectorize_np_array_bitmask_by_pixel_value(
    bitmask_np, label_num=255, polygon_tolerance=1, contour_level=0.5, scale_factor=1
):
    """Get simplified contours from the bitmask

    Args:
        bitmask_np (np.array): a numpy bitmask
        label_num (int, optional): numeric value to filter the numpy array
        polygon_tolerance (float, optional): Maximum distance from original points of polygon
            to approximated polygonal chain. If tolerance is 0, the original coordinate array is returned.
        contour_level (float, optional): Value along which to find contours in the array.
            0.5 by default
        scale_factor (int, optional): scale to match image. default 1

    Returns:
        list: simplified approximated contours
    """
    if not scale_factor:
        scale_factor = 1
    mask = np.where(bitmask_np == label_num, 1, 0).astype(np.int8)
    contours = measure.find_contours(mask, level=contour_level)
    simplified_contours = [
        measure.approximate_polygon(c, tolerance=polygon_tolerance) for c in contours
    ]
    for _, contour in enumerate(simplified_contours):
        for coord in contour:
            x = int(round(coord[0]))
            y = int(round(coord[1]))
            # switch coordinates, otherwise gets flipped
            coord[0] = y * scale_factor
            coord[1] = x * scale_factor

    return simplified_contours

`slideviewer` ¶

`regional_annotation` ¶

`dask_generate` ¶

`cli(data_config_file, app_config_file)` ¶

This module generates parquets with regional annotation pathology data

INPUT PARAMETERS

app_config_file - path to yaml file containing application runtime parameters. See config.yaml.template

data_config_file - path to yaml file containing data input and output parameters. See dask_data_config.yaml.template

TABLE SCHEMA

sv_project_id: project number in slide viewer
slideviewer_path: slide path based on slideviewer organization
slide_id: slide id. synonymous with image_id
user: username of the annotator for a given annotation. For all slides, we combine multiple annotations from different users for a slide. In this case, user is set to 'CONCAT' and bmp_filepath, npy_filepath are null.
bmp_filepath: file path to downloaded bmp annotation file
npy_filepath: file path to npy annotation file converted from bmp
geojson_path: file path to geojson file converted from numpy
date: creation date
labelset:

Source code in src/luna/pathology/slideviewer/regional_annotation/dask_generate.py

@click.command()
@click.option(
    "-d",
    "--data_config_file",
    default=None,
    type=click.Path(exists=True),
    help="path to yaml file containing data input and output parameters. "
    "See dask_data_config.yaml.template",
)
@click.option(
    "-a",
    "--app_config_file",
    default="config.yaml",
    type=click.Path(exists=True),
    help="path to yaml file containing application runtime parameters. "
    "See config.yaml.template",
)
def cli(data_config_file, app_config_file):
    """This module generates parquets with regional annotation pathology data

    INPUT PARAMETERS

    app_config_file - path to yaml file containing application runtime parameters. See config.yaml.template

    data_config_file - path to yaml file containing data input and output parameters. See dask_data_config.yaml.template

    TABLE SCHEMA

    - sv_project_id: project number in slide viewer

    - slideviewer_path: slide path based on slideviewer organization

    - slide_id: slide id. synonymous with image_id

    - user: username of the annotator for a given annotation. For all slides, we combine multiple annotations from
        different users for a slide. In this case, user is set to 'CONCAT' and bmp_filepath, npy_filepath are null.

    - bmp_filepath: file path to downloaded bmp annotation file

    - npy_filepath: file path to npy annotation file converted from bmp

    - geojson_path: file path to  geojson file converted from numpy

    - date: creation date

    - labelset:
    """
    logger = init_logger()

    # load configs
    cfg = ConfigSet(name="DATA_CFG", config_file=data_config_file)
    cfg = ConfigSet(name="APP_CFG", config_file=app_config_file)

    with CodeTimer(logger, "generate annotation geojson table"):
        logger.info("data template: " + data_config_file)
        logger.info("config_file: " + app_config_file)

        # copy app and data configuration to destination config dir
        config_location = const.CONFIG_LOCATION(cfg)
        os.makedirs(config_location, exist_ok=True)

        shutil.copy(app_config_file, os.path.join(config_location, "app_config.yaml"))
        shutil.copy(data_config_file, os.path.join(config_location, "data_config.yaml"))
        logger.info("config files copied to %s", config_location)

        failed = create_geojson_table()

        if failed:
            logger.error("GEOJSON table creation had errors. Exiting.")
            logger.error(failed)
            raise RuntimeError("GEOJSON table creation had errors. Exiting.")

        return

`create_geojson_table()` ¶

Vectorizes npy array annotation file into polygons and builds GeoJson with the polygon features. Creates a geojson file per labelset. Combines multiple annotations from different users for a slide.

Returns:

Name	Type	Description
`list`		list of slide ids that failed

Source code in src/luna/pathology/slideviewer/regional_annotation/dask_generate.py

def create_geojson_table():
    """Vectorizes npy array annotation file into polygons and builds GeoJson with the polygon features.
    Creates a geojson file per labelset.
    Combines multiple annotations from different users for a slide.

    Returns:
        list: list of slide ids that failed
    """
    logger = logging.getLogger(__name__)

    failed = []
    # get application and data config variables
    cfg = ConfigSet()
    client = Client(n_workers=25, threads_per_worker=1, memory_limit=0.1)
    client.run(init_logger)
    logger.info(client)

    SLIDEVIEWER_API_URL = cfg.get_value("DATA_CFG::SLIDEVIEWER_API_URL")
    SLIDEVIEWER_CSV_FILE = cfg.get_value("DATA_CFG::SLIDEVIEWER_CSV_FILE")
    PROJECT_ID = cfg.get_value("DATA_CFG::PROJECT_ID")
    LANDING_PATH = cfg.get_value("DATA_CFG::LANDING_PATH")
    TMP_ZIP_DIR_NAME = cfg.get_value("DATA_CFG::REQUESTOR_DEPARTMENT") + "_tmp_zips"
    TMP_ZIP_DIR = os.path.join(LANDING_PATH, TMP_ZIP_DIR_NAME)
    SLIDE_BMP_DIR = os.path.join(LANDING_PATH, "regional_bmps")
    SLIDE_NPY_DIR = os.path.join(LANDING_PATH, "regional_npys")
    SLIDE_STORE_DIR = os.path.join(LANDING_PATH, "slides")
    TABLE_OUT_DIR = const.TABLE_LOCATION(cfg)

    os.makedirs(TABLE_OUT_DIR, exist_ok=True)
    logger.info("Table output directory = %s", TABLE_OUT_DIR)

    # setup variables needed for build geojson UDF
    contour_level = cfg.get_value("DATA_CFG::CONTOUR_LEVEL")

    # fetch full set of slideviewer slides for project
    slides = fetch_slide_ids(
        SLIDEVIEWER_API_URL,
        PROJECT_ID,
        const.CONFIG_LOCATION(cfg),
        SLIDEVIEWER_CSV_FILE,
    )
    df = pd.DataFrame(
        data=np.array(slides), columns=["slideviewer_path", "slide_id", "sv_project_id"]
    )

    # get users and labelsets for df explosion
    all_users_list = cfg.get_value("DATA_CFG::USERS")
    all_labelsets = cfg.get_value("DATA_CFG::LABEL_SETS")

    global params
    params = cfg.get_config_set("APP_CFG")

    bmp_jobs = []
    for _, row in df.iterrows():
        bmp_future = client.submit(
            check_slideviewer_and_download_bmp,
            row.sv_project_id,
            row.slideviewer_path,
            row.slide_id,
            all_users_list,
            SLIDE_BMP_DIR,
            SLIDEVIEWER_API_URL,
            TMP_ZIP_DIR,
        )
        bmp_jobs.append(bmp_future)

    json_jobs = []
    for bmp_future in as_completed(bmp_jobs):
        if bmp_future.result() is not None:
            json_future = client.submit(
                convert_slide_bitmap_to_geojson,
                bmp_future,
                all_labelsets,
                contour_level,
                SLIDE_NPY_DIR,
                SLIDE_STORE_DIR,
            )
            json_jobs.append(json_future)

    for json_future in as_completed(json_jobs):
        slide_id = -1
        try:
            if json_future.result() is not None:
                slide_id, data = json_future.result()
                if slide_id and data:
                    result_df = pd.DataFrame(data)
                    logger.info(result_df)
                    result_df.drop(columns="geojson").to_parquet(
                        f"{TABLE_OUT_DIR}/regional_annot_slice_slide={slide_id}.parquet"
                    )
                else:
                    failed.append(slide_id)
                    logger.warning(
                        "Empty geojson returned. this means either this was an empty slide or an error occured during geojson generate"
                    )
        except Exception:
            failed.append(slide_id)
            logger.warning(f"Something was wrong with future {json_future}, skipping.")

    client.shutdown()

    return failed

`spatial` ¶

`stats` ¶

`Kfunction(p1XY, p2XY, radius, ls=False, count=True, intensity=[], distance=False, distance_scale=10.0)` ¶

Computes the Counting, Intensity, and experimental Intensity-Distance K functions

Parameters:

Name	Type	Description	Default
`p1XY`	`ndarray`	An Nx2 array representing the (X,Y) coordinates of cells with phenotype 1	required
`p2XY`	`ndarray`	Same as p1XY but for phenotype 2 cells	required
`radius`	`(float, list[float])`	The radius (or list of radii) to consider	required
`ls`	`bool`	If True, returns an \|radius\|x\|p1XY\| 2D array representing the K function for each phenotype 1 cell for each radius. If False, returns the mean for each radius	`False`
`count`	`bool`	By default, this function only computes the Counting K function. Can be disabled with count=False.	`True`
`intensity`	`ndarray`	An array of length \|p2XY\| representing the intensity of each phenotype 2 cell. When passed in, this method will also compute the Intensity K function	`[]`

distance (bool): If an intensity array is passed in, then setting distance=True
          will compute the experimental Intensity-Distance K function
        distance_scale (float): Characteristic distance scale (usually approx. 1 cell length in the given units)

Returns:
        dict: a dictionary with keys ["count", "intensity", "distance"] and values corresponding to the result of each K function

Source code in src/luna/pathology/spatial/stats.py

def Kfunction(
    p1XY,
    p2XY,
    radius,
    ls=False,
    count=True,
    intensity=[],
    distance=False,
    distance_scale=10.0,
):
    """Computes the Counting, Intensity, and experimental
                Intensity-Distance K functions

    Args:
            p1XY (np.ndarray): An Nx2 array representing the (X,Y) coordinates of cells with phenotype 1
            p2XY (np.ndarray): Same as p1XY but for phenotype 2 cells
            radius (float, list[float]): The radius (or list of radii) to consider
            ls (bool): If True, returns an |radius|x|p1XY| 2D array representing the K function
                for each phenotype 1 cell for each radius. If False, returns the mean
                for each radius
            count (bool): By default, this function only computes the Counting K function.
                   Can be disabled with count=False.
            intensity (np.ndarray): An array of length |p2XY| representing the intensity of each
                       phenotype 2 cell. When passed in, this method will also compute
                       the Intensity K function
        distance (bool): If an intensity array is passed in, then setting distance=True
                  will compute the experimental Intensity-Distance K function
                distance_scale (float): Characteristic distance scale (usually approx. 1 cell length in the given units)

        Returns:
                dict: a dictionary with keys ["count", "intensity", "distance"] and values corresponding to the result of each K function
    """
    # Compute the distance matrix
    dists = cdist(p1XY, p2XY)

    # Turn radius into an array if it isn't one already
    try:
        iter(radius)
    except TypeError:
        radius = [radius]

    # Define the lambdas for each K function variant
    CKfunc = lambda mask: np.sum(mask, axis=1)
    IKfunc = lambda Imask: np.sum(Imask, axis=1)
    IDKfunc = lambda Imask: np.sum(
        Imask * (1 / (distance_scale + (dists / distance_scale) ** 3)), axis=1
    )

    # Compute the mask for each radius
    masks = [(dists <= r) for r in radius]

    # Calculate each K function
    Kdict = {}
    if count:
        CK = [CKfunc(mask) for mask in masks]
        Kdict["count"] = _ret(CK, ls)
    if len(intensity) > 0:
        assert len(intensity) == len(p2XY)
        Imasks = [mask * intensity for mask in masks]
        IK = [IKfunc(Imask) for Imask in Imasks]
        Kdict["intensity"] = _ret(IK, ls)
        if distance:
            IDK = [IDKfunc(Imask) for Imask in Imasks]
            Kdict["distance"] = _ret(IDK, ls)

    return Kdict

`transforms` ¶

Higher-level transformation functions

`generate_k_function_statistics(cell_paths, method_data, main_index=None)` ¶

Compute K-function spatial statistics on given cell-data

Parameters:

Name	Type	Description	Default
`cell_paths`	`str or list[str]`	paths to a single or multiple FOV regions	required
`method_data`	`dict`	Configuration: "index": (str, optional) Column containting the patient/desired ID, if available (overrides main_index) "phenotype1" : { "name" : (str) Column name to query 'value' : (str) Phenotype string to match (e.g. CD68) }, "phenotype2" : { "name" : (str) Column name to query 'value' : (str) Phenotype string to match (e.g. panCK) }, "count" : (bool) Flag to compute counting stats. "radius" : (float) Radius cutoff "intensity" : (str, optional) Column containing intensity information "distance" : (bool) Flag to compute intensity-distance stats.	required

Returns:

Type	Description
	pd.DataFrame: spatial statistics aggregated over FOVs

Source code in src/luna/pathology/spatial/transforms.py

def generate_k_function_statistics(cell_paths, method_data, main_index=None):
    """
    Compute K-function spatial statistics on given cell-data

    Args:
        cell_paths (str or list[str]): paths to a single or multiple FOV regions
        method_data (dict): Configuration:
                "index": (str, optional) Column containting the patient/desired ID, if available (overrides main_index)
                "phenotype1" : {
                        "name" : (str) Column name to query
                        'value' : (str) Phenotype string to match (e.g. CD68)
                },
                "phenotype2" : {
                        "name" : (str) Column name to query
                        'value' : (str) Phenotype string to match (e.g. panCK)
                },
                "count" : (bool) Flag to compute counting stats.
                "radius" : (float) Radius cutoff
                "intensity" : (str, optional) Column containing intensity information
                "distance" : (bool) Flag to compute intensity-distance stats.

    Returns:
        pd.DataFrame: spatial statistics aggregated over FOVs
    """

    if type(cell_paths) == str:
        cell_paths = [cell_paths]

    print(cell_paths)

    agg_k_data = {}

    pheno1_col = method_data["phenotype1"]["name"]
    pheno1_val = method_data["phenotype1"]["value"]
    pheno2_col = method_data["phenotype2"]["name"]
    pheno2_val = method_data["phenotype2"]["value"]
    index_col = method_data.get("index", None)
    radius = method_data["radius"]
    count = method_data["count"]
    distance = method_data["distance"]
    intensity_col = method_data.get("intensity", None)

    indices = set()

    for cell_path in cell_paths:

        if Path(cell_path).suffix == ".parquet":
            df = pd.read_parquet(cell_path)
        elif Path(cell_path).suffix == ".csv":
            df = pd.read_csv(cell_path)
        else:
            raise RuntimeError(f"Invalid input data type {cell_path}")

        # Look up the index for this slice
        if index_col:
            index = df[method_data["index"]].iloc[0]
            indices.add(index)

        # Create the data arrays
        pheno1 = df[df[pheno1_col] == pheno1_val]
        pheno2 = df[df[pheno2_col] == pheno2_val]
        p1XY = np.array(pheno1[["Centroid X µm", "Centroid Y µm"]])
        p2XY = np.array(pheno2[["Centroid X µm", "Centroid Y µm"]])

        if intensity_col:
            intensity = np.array(pheno2[intensity_col])
        else:
            intensity = []
            if distance:
                raise RuntimeError(
                    "Can't compute intensity-distance function without intensity information"
                )

        if p1XY.size == 0:
            print(
                f"WARNING: List of phenotype 1 cells ({pheno1_val}) is empty for {index}"
            )
        if p2XY.size == 0:
            print(
                f"WARNING: List of phenotype 2 cells ({pheno2_val}) is empty for {index}"
            )

        # Compute the K function
        print(f"Running... {cell_path}")

        fov_k_data = Kfunction(
            p1XY,
            p2XY,
            radius,
            ls=True,
            count=count,
            intensity=intensity,
            distance=distance,
        )

        for key in fov_k_data:
            if key in agg_k_data:
                np.append(agg_k_data[key], fov_k_data[key])
            else:
                agg_k_data[key] = fov_k_data[key]

    data_out = {}

    for kfunct in agg_k_data.keys():
        arr = agg_k_data[kfunct]
        if len(arr) == 0:
            arr = [0]
        data_out.update(
            {
                f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_mean": np.mean(
                    arr
                ),
                f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_variance": np.var(
                    arr
                ),
                f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_skew": scipy.stats.skew(
                    arr
                ),
                f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_kurtosis": scipy.stats.kurtosis(
                    arr
                ),
            }
        )

    df_slice_out = pd.DataFrame(data_out, index=[0]).astype(np.float64)

    if main_index is None:
        if not len(indices) == 1:
            raise RuntimeError(
                f"Multiple cell maps with different indices! Found: {indices}"
            )
        main_index = indices.pop()

    df_slice_out["main_index"] = main_index
    df_slice_out = df_slice_out.set_index("main_index")

    print(df_slice_out)

    return df_slice_out

Luna Pathology¶

analysis ¶

ml ¶

BaseTorchClassifier ¶

__init__(**kwargs) ¶

setup(**kwargs) ¶

BaseTorchTileClassifier ¶

forward(index, tile_data) ¶

predict(input_tiles) ¶

setup(**kwargs) ¶

BaseTorchTileDataset ¶

__getitem__(idx) ¶

__init__(tile_manifest=None, tile_urlpath='', label_cols=[], using_ray=False, storage_options={}, **kwargs) ¶

preprocess(input_tile) ¶

setup(**kwargs) ¶

HDF5Dataset ¶

__getitem__(idx) ¶

__init__(hdf5_manifest, preprocess=nn.Identity(), label_cols=[], using_ray=False, storage_options={}) ¶

TorchTransformModel ¶

get_preprocess(**kwargs) ¶

transform(X) ¶

get_group_stratified_sampler(df_nh, label_col, group_col, num_splits=5, random_seed=42) ¶

post_transform_to_2d(input) ¶

cli ¶

create_wide_shape_features_query ¶

cli(shape_features_urlpath, storage_options={}) ¶

create_wide_shape_features_query(shape_features_urlpath, storage_options={}) ¶

dsa_annotation_etl ¶

DsaAnnotationProcessor ¶

build_proxy_repr_dsa(row) ¶

histomics_annotation_table_to_geojson(df, properties, shape_type_col='type', x_col='x_coords', y_col='y_coords') ¶

run(row) ¶

cli(dsa_endpoint='???', collection_name='???', annotation_name='???', username='${oc.env:DSA_USERNAME}', password='${oc.env:DSA_PASSWORD}', local_config='', output_urlpath='.', storage_options={}) ¶

dsa_annotation_etl(dsa_endpoint, collection_name, annotation_name, username, password, output_urlpath, storage_options) ¶

dsa_upload ¶

__upload_annotation_to_dsa(gc, dsa_endpoint_url, annotation_file_urlpath, collection_name, image_filename, force=False, storage_options={}) ¶

cli(dsa_endpoint_url='???', annotation_file_urlpath='', annotation_file_list_urlpath='', collection_name='???', image_filename='', username='${oc.env:DSA_USERNAME}', password='${oc.env:DSA_PASSWORD}', force=False, insecure=False, storage_options={}, local_config='') ¶

upload_annotation_to_dsa(dsa_endpoint_url, slide_manifest, annotation_column, collection_name, image_filename, username, password, force=False, insecure=False, storage_options={}) ¶

dsa_viz ¶

__bmp_polygon(input_urlpath, output_urlpath, image_filename, label_map, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}) ¶

__heatmap(input_urlpath, output_urlpath, image_filename, annotation_name, column, tile_size, scale_factor=None, fill_colors=None, line_colors=None, storage_options={}, output_storage_options={}) ¶

__qupath_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, classes_to_include, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}) ¶

__regional_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}) ¶

__stardist_cell(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}) ¶

__stardist_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}) ¶

__stardist_polygon_tile(object_urlpath, tiles_urlpath, output_urlpath, image_filename, annotation_name_prefix, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}) ¶

bitmask_polygon(input_map, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}) ¶

bitmask_polygon_cli(input_map='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, scale_factor=None, storage_options={}, output_storage_options={}, local_config='') ¶

bmp_polygon(slide_manifest, output_urlpath, label_map, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}, annotation_column='bmp_polygon_url', output_column='bmp_polygon_dsa_url') ¶

bmp_polygon_cli(input_urlpath='???', output_urlpath='???', label_map='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}, local_config='') ¶

check_filepaths_valid(urls, storage_options) ¶

get_dsa_annotation(elements, annotation_name, description='') ¶

heatmap(slide_manifest, output_urlpath, annotation_name, column, tile_size, scale_factor=None, fill_colors=None, line_colors=None, output_column='', storage_options={}, output_storage_options={}) ¶

heatmap_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', column='???', tile_size='???', scale_factor=1, fill_colors=None, line_colors=None, storage_options={}, output_storage_options={}, local_config='') ¶

qupath_polygon(slide_manifest, output_urlpath, image_filename, annotation_name, classes_to_include, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='') ¶

qupath_polygon_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', classes_to_include='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='') ¶

regional_polygon(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='') ¶

regional_polygon_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='') ¶

save_dsa_annotation(dsa_annotation, output_urlpath, image_filename, storage_options={}) ¶

stardist_cell(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='') ¶

stardist_cell_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='') ¶

stardist_polygon(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='') ¶

stardist_polygon_cli(input_urlpath='???', image_filename='???', annotation_name='???', output_urlpath='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='') ¶

stardist_polygon_tile(slide_manifest, output_urlpath, annotation_name_prefix, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column_suffix='') ¶

stardist_polygon_tile_cli(object_urlpath='???', tiles_urlpath='???', image_filename='???', annotation_name_prefix='???', output_urlpath='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='') ¶

extract_kfunction_statistics ¶

cli(input_cell_objects_urlpath='???', tile_size='???', intensity_label='???', tile_stride='???', radius='???', output_urlpath='.', storage_options={}, output_storage_options={}, local_config='') ¶

extract_kfunction(input_cell_objects_urlpath, tile_size, intensity_label, tile_stride, radius, storage_options={}) ¶

extract_shape_features ¶

cli(slide_mask_urlpath='???', label_cols='???', output_urlpath='???', include_smaller_regions=False, storage_options={}, output_storage_options={}, local_config='') ¶

extract_stain_texture ¶

cli(slide_image_urlpath='???', slide_mask_urlpath='???', stain_sample_factor='???', stain_channel='???', tile_size='???', output_urlpath='.', storage_options={}, output_storage_options={}, local_config='') ¶

extract_stain_texture(slide_image_urlpath, slide_mask_urlpath, stain_sample_factor, stain_channel, tile_size, output_urlpath, storage_options, output_storage_options) ¶

extract_tile_shape_features ¶

extract_tile_statistics ¶

cli(tiles_urlpath='???', output_urlpath='???', storage_options={}, output_storage_options={}, local_config='') ¶

extract_tile_statistics(tiles_urlpath, storage_options) ¶

generate_mask ¶

cli(slide_urlpath='???', roi_urlpath='???', output_urlpath='???', annotation_name='???', storage_options={}, output_storage_options={}, local_config='') ¶

generate_mask(slide_urlpath, roi_urlpath, output_urlpath, annotation_name, storage_options, output_storage_options) ¶

`analysis` ¶

`ml` ¶

`BaseTorchClassifier` ¶

`init(**kwargs)` ¶

`setup(**kwargs)` ¶

`BaseTorchTileClassifier` ¶

`forward(index, tile_data)` ¶

`predict(input_tiles)` ¶

`setup(**kwargs)` ¶

`BaseTorchTileDataset` ¶

`getitem(idx)` ¶

`init(tile_manifest=None, tile_urlpath='', label_cols=[], using_ray=False, storage_options={}, **kwargs)` ¶

`preprocess(input_tile)` ¶

`setup(**kwargs)` ¶

`HDF5Dataset` ¶

`getitem(idx)` ¶

`init(hdf5_manifest, preprocess=nn.Identity(), label_cols=[], using_ray=False, storage_options={})` ¶

`TorchTransformModel` ¶

`get_preprocess(**kwargs)` ¶

`transform(X)` ¶

`get_group_stratified_sampler(df_nh, label_col, group_col, num_splits=5, random_seed=42)` ¶

`post_transform_to_2d(input)` ¶

`cli` ¶

`create_wide_shape_features_query` ¶

`cli(shape_features_urlpath, storage_options={})` ¶

`create_wide_shape_features_query(shape_features_urlpath, storage_options={})` ¶

`dsa_annotation_etl` ¶

`DsaAnnotationProcessor` ¶

`build_proxy_repr_dsa(row)` ¶

`histomics_annotation_table_to_geojson(df, properties, shape_type_col='type', x_col='x_coords', y_col='y_coords')` ¶

`run(row)` ¶

`cli(dsa_endpoint='???', collection_name='???', annotation_name='???', username='${oc.env:DSA_USERNAME}', password='${oc.env:DSA_PASSWORD}', local_config='', output_urlpath='.', storage_options={})` ¶

`dsa_annotation_etl(dsa_endpoint, collection_name, annotation_name, username, password, output_urlpath, storage_options)` ¶

`dsa_upload` ¶

`__upload_annotation_to_dsa(gc, dsa_endpoint_url, annotation_file_urlpath, collection_name, image_filename, force=False, storage_options={})` ¶

`cli(dsa_endpoint_url='???', annotation_file_urlpath='', annotation_file_list_urlpath='', collection_name='???', image_filename='', username='${oc.env:DSA_USERNAME}', password='${oc.env:DSA_PASSWORD}', force=False, insecure=False, storage_options={}, local_config='')` ¶

`upload_annotation_to_dsa(dsa_endpoint_url, slide_manifest, annotation_column, collection_name, image_filename, username, password, force=False, insecure=False, storage_options={})` ¶

`dsa_viz` ¶

`__bmp_polygon(input_urlpath, output_urlpath, image_filename, label_map, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={})` ¶

`__heatmap(input_urlpath, output_urlpath, image_filename, annotation_name, column, tile_size, scale_factor=None, fill_colors=None, line_colors=None, storage_options={}, output_storage_options={})` ¶

`__qupath_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, classes_to_include, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

`__regional_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

`__stardist_cell(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

`__stardist_polygon(input_urlpath, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

`__stardist_polygon_tile(object_urlpath, tiles_urlpath, output_urlpath, image_filename, annotation_name_prefix, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={})` ¶

`bitmask_polygon(input_map, output_urlpath, image_filename, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={})` ¶

`bitmask_polygon_cli(input_map='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, scale_factor=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`bmp_polygon(slide_manifest, output_urlpath, label_map, annotation_name, line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}, annotation_column='bmp_polygon_url', output_column='bmp_polygon_dsa_url')` ¶

`bmp_polygon_cli(input_urlpath='???', output_urlpath='???', label_map='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, scale_factor=1, storage_options={}, output_storage_options={}, local_config='')` ¶

`check_filepaths_valid(urls, storage_options)` ¶

`get_dsa_annotation(elements, annotation_name, description='')` ¶

`heatmap(slide_manifest, output_urlpath, annotation_name, column, tile_size, scale_factor=None, fill_colors=None, line_colors=None, output_column='', storage_options={}, output_storage_options={})` ¶

`heatmap_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', column='???', tile_size='???', scale_factor=1, fill_colors=None, line_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`qupath_polygon(slide_manifest, output_urlpath, image_filename, annotation_name, classes_to_include, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

`qupath_polygon_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', classes_to_include='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`regional_polygon(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

`regional_polygon_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`save_dsa_annotation(dsa_annotation, output_urlpath, image_filename, storage_options={})` ¶

`stardist_cell(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

`stardist_cell_cli(input_urlpath='???', output_urlpath='???', image_filename='???', annotation_name='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`stardist_polygon(slide_manifest, output_urlpath, annotation_name, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column='')` ¶

`stardist_polygon_cli(input_urlpath='???', image_filename='???', annotation_name='???', output_urlpath='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`stardist_polygon_tile(slide_manifest, output_urlpath, annotation_name_prefix, line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, annotation_column='', output_column_suffix='')` ¶

`stardist_polygon_tile_cli(object_urlpath='???', tiles_urlpath='???', image_filename='???', annotation_name_prefix='???', output_urlpath='???', line_colors=None, fill_colors=None, storage_options={}, output_storage_options={}, local_config='')` ¶

`extract_kfunction_statistics` ¶

`cli(input_cell_objects_urlpath='???', tile_size='???', intensity_label='???', tile_stride='???', radius='???', output_urlpath='.', storage_options={}, output_storage_options={}, local_config='')` ¶

`extract_kfunction(input_cell_objects_urlpath, tile_size, intensity_label, tile_stride, radius, storage_options={})` ¶

`extract_shape_features` ¶

`cli(slide_mask_urlpath='???', label_cols='???', output_urlpath='???', include_smaller_regions=False, storage_options={}, output_storage_options={}, local_config='')` ¶

`extract_stain_texture` ¶

`cli(slide_image_urlpath='???', slide_mask_urlpath='???', stain_sample_factor='???', stain_channel='???', tile_size='???', output_urlpath='.', storage_options={}, output_storage_options={}, local_config='')` ¶

`extract_stain_texture(slide_image_urlpath, slide_mask_urlpath, stain_sample_factor, stain_channel, tile_size, output_urlpath, storage_options, output_storage_options)` ¶

`extract_tile_shape_features` ¶

`extract_tile_statistics` ¶

`cli(tiles_urlpath='???', output_urlpath='???', storage_options={}, output_storage_options={}, local_config='')` ¶

`extract_tile_statistics(tiles_urlpath, storage_options)` ¶

`generate_mask` ¶

`cli(slide_urlpath='???', roi_urlpath='???', output_urlpath='???', annotation_name='???', storage_options={}, output_storage_options={}, local_config='')` ¶

`generate_mask(slide_urlpath, roi_urlpath, output_urlpath, annotation_name, storage_options, output_storage_options)` ¶

`generate_tile_labels` ¶