Replacing outliers

`replace_outliers`

`ReplaceCooksOutliers`

Bases: ComputeTrimmedMean, LocFindCooksOutliers, AggMergeOutlierGenes, LocReplaceCooksOutliers, LocSetRefitAdata, AggNewAllZeros, LocSetNewAllZerosAndGetFeatures

Mixin class to replace Cook's outliers.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/replace_outliers.py

class ReplaceCooksOutliers(
    ComputeTrimmedMean,
    LocFindCooksOutliers,
    AggMergeOutlierGenes,
    LocReplaceCooksOutliers,
    LocSetRefitAdata,
    AggNewAllZeros,
    LocSetNewAllZerosAndGetFeatures,
):
    """Mixin class to replace Cook's outliers."""

    trimmed_mean_num_iter: int

    @log_organisation_method
    def replace_outliers(
        self,
        train_data_nodes,
        aggregation_node,
        local_states,
        cooks_shared_state,
        round_idx,
        clean_models,
    ):
        """Replace outlier counts.

        Parameters
        ----------
        train_data_nodes: list
            List of TrainDataNode.

        aggregation_node: AggregationNode
            The aggregation node.

        local_states: list[dict]
            Local states. Required to propagate intermediate results.

        cooks_shared_state: dict
            Shared state with the dispersion values for Cook's distances, in a
            "cooks_dispersions" key.


        round_idx: int
            Index of the current round.

        clean_models: bool
            Whether to clean the models after the computation.

        Returns
        -------
        local_states: dict
            Local states. The new local state contains Cook's distances.

        shared_states: list[dict]
            List of shared states with the features vector to input to
            compute_genewise_dispersion in a "local_features" key.

        round_idx: int
            The updated round index.
        """
        # Store trimmed means and find local Cooks outliers
        local_states, shared_states, round_idx = local_step(
            local_method=self.loc_find_cooks_outliers,
            train_data_nodes=train_data_nodes,
            output_local_states=local_states,
            input_local_states=local_states,
            input_shared_state=cooks_shared_state,
            aggregation_id=aggregation_node.organization_id,
            description="Find local Cooks outliers",
            round_idx=round_idx,
            clean_models=clean_models,
        )

        # Build the global list of genes for which to replace outliers
        genes_to_replace_share_state, round_idx = aggregation_step(
            aggregation_method=self.agg_merge_outlier_genes,
            train_data_nodes=train_data_nodes,
            aggregation_node=aggregation_node,
            input_shared_states=shared_states,
            description="Merge the lists of local outlier genes",
            round_idx=round_idx,
            clean_models=clean_models,
        )

        # Store trimmed means and find local Cooks outliers
        local_states, shared_states, round_idx = local_step(
            local_method=self.loc_set_refit_adata,
            train_data_nodes=train_data_nodes,
            output_local_states=local_states,
            input_local_states=local_states,
            input_shared_state=genes_to_replace_share_state,
            aggregation_id=aggregation_node.organization_id,
            description="Set the refit adata with the genes to replace",
            round_idx=round_idx,
            clean_models=clean_models,
        )

        # Compute imputation values, on genes to refit only.
        local_states, trimmed_means_shared_state, round_idx = self.compute_trim_mean(
            train_data_nodes,
            aggregation_node,
            local_states,
            round_idx,
            clean_models=clean_models,
            layer_used="normed_counts",
            trim_ratio=0.2,
            mode="normal",
            n_iter=self.trimmed_mean_num_iter,
            refit=True,
        )

        # Replace outliers in replaceable samples locally
        local_states, shared_states, round_idx = local_step(
            local_method=self.loc_replace_cooks_outliers,
            train_data_nodes=train_data_nodes,
            output_local_states=local_states,
            input_local_states=local_states,
            input_shared_state=trimmed_means_shared_state,
            aggregation_id=aggregation_node.organization_id,
            description="Replace Cooks outliers locally",
            round_idx=round_idx,
            clean_models=clean_models,
        )

        # Find genes who have only have zero counts due to imputation

        new_all_zeros_shared_state, round_idx = aggregation_step(
            aggregation_method=self.aggregate_new_all_zeros,
            train_data_nodes=train_data_nodes,
            aggregation_node=aggregation_node,
            input_shared_states=shared_states,
            description="Find new all zero genes",
            round_idx=round_idx,
            clean_models=clean_models,
        )

        # Set new all zeros genes and get features vector

        local_states, shared_states, round_idx = local_step(
            local_method=self.local_set_new_all_zeros_get_features,
            train_data_nodes=train_data_nodes,
            output_local_states=local_states,
            input_local_states=local_states,
            input_shared_state=new_all_zeros_shared_state,
            aggregation_id=aggregation_node.organization_id,
            description="Set new all zero genes and get features vector",
            round_idx=round_idx,
            clean_models=clean_models,
        )

        return local_states, shared_states, round_idx

`replace_outliers(train_data_nodes, aggregation_node, local_states, cooks_shared_state, round_idx, clean_models)`

Replace outlier counts.

Parameters:

Name	Description	Default
`train_data_nodes`	List of TrainDataNode.	required
`aggregation_node`	The aggregation node.	required
`local_states`	Local states. Required to propagate intermediate results.	required
`cooks_shared_state`	Shared state with the dispersion values for Cook's distances, in a "cooks_dispersions" key.	required
`round_idx`	Index of the current round.	required
`clean_models`	Whether to clean the models after the computation.	required

Returns:

Name	Type	Description
`local_states`	`dict`	Local states. The new local state contains Cook's distances.
`shared_states`	`list[dict]`	List of shared states with the features vector to input to compute_genewise_dispersion in a "local_features" key.
`round_idx`	`int`	The updated round index.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/replace_outliers.py

@log_organisation_method
def replace_outliers(
    self,
    train_data_nodes,
    aggregation_node,
    local_states,
    cooks_shared_state,
    round_idx,
    clean_models,
):
    """Replace outlier counts.

    Parameters
    ----------
    train_data_nodes: list
        List of TrainDataNode.

    aggregation_node: AggregationNode
        The aggregation node.

    local_states: list[dict]
        Local states. Required to propagate intermediate results.

    cooks_shared_state: dict
        Shared state with the dispersion values for Cook's distances, in a
        "cooks_dispersions" key.


    round_idx: int
        Index of the current round.

    clean_models: bool
        Whether to clean the models after the computation.

    Returns
    -------
    local_states: dict
        Local states. The new local state contains Cook's distances.

    shared_states: list[dict]
        List of shared states with the features vector to input to
        compute_genewise_dispersion in a "local_features" key.

    round_idx: int
        The updated round index.
    """
    # Store trimmed means and find local Cooks outliers
    local_states, shared_states, round_idx = local_step(
        local_method=self.loc_find_cooks_outliers,
        train_data_nodes=train_data_nodes,
        output_local_states=local_states,
        input_local_states=local_states,
        input_shared_state=cooks_shared_state,
        aggregation_id=aggregation_node.organization_id,
        description="Find local Cooks outliers",
        round_idx=round_idx,
        clean_models=clean_models,
    )

    # Build the global list of genes for which to replace outliers
    genes_to_replace_share_state, round_idx = aggregation_step(
        aggregation_method=self.agg_merge_outlier_genes,
        train_data_nodes=train_data_nodes,
        aggregation_node=aggregation_node,
        input_shared_states=shared_states,
        description="Merge the lists of local outlier genes",
        round_idx=round_idx,
        clean_models=clean_models,
    )

    # Store trimmed means and find local Cooks outliers
    local_states, shared_states, round_idx = local_step(
        local_method=self.loc_set_refit_adata,
        train_data_nodes=train_data_nodes,
        output_local_states=local_states,
        input_local_states=local_states,
        input_shared_state=genes_to_replace_share_state,
        aggregation_id=aggregation_node.organization_id,
        description="Set the refit adata with the genes to replace",
        round_idx=round_idx,
        clean_models=clean_models,
    )

    # Compute imputation values, on genes to refit only.
    local_states, trimmed_means_shared_state, round_idx = self.compute_trim_mean(
        train_data_nodes,
        aggregation_node,
        local_states,
        round_idx,
        clean_models=clean_models,
        layer_used="normed_counts",
        trim_ratio=0.2,
        mode="normal",
        n_iter=self.trimmed_mean_num_iter,
        refit=True,
    )

    # Replace outliers in replaceable samples locally
    local_states, shared_states, round_idx = local_step(
        local_method=self.loc_replace_cooks_outliers,
        train_data_nodes=train_data_nodes,
        output_local_states=local_states,
        input_local_states=local_states,
        input_shared_state=trimmed_means_shared_state,
        aggregation_id=aggregation_node.organization_id,
        description="Replace Cooks outliers locally",
        round_idx=round_idx,
        clean_models=clean_models,
    )

    # Find genes who have only have zero counts due to imputation

    new_all_zeros_shared_state, round_idx = aggregation_step(
        aggregation_method=self.aggregate_new_all_zeros,
        train_data_nodes=train_data_nodes,
        aggregation_node=aggregation_node,
        input_shared_states=shared_states,
        description="Find new all zero genes",
        round_idx=round_idx,
        clean_models=clean_models,
    )

    # Set new all zeros genes and get features vector

    local_states, shared_states, round_idx = local_step(
        local_method=self.local_set_new_all_zeros_get_features,
        train_data_nodes=train_data_nodes,
        output_local_states=local_states,
        input_local_states=local_states,
        input_shared_state=new_all_zeros_shared_state,
        aggregation_id=aggregation_node.organization_id,
        description="Set new all zero genes and get features vector",
        round_idx=round_idx,
        clean_models=clean_models,
    )

    return local_states, shared_states, round_idx

`substeps`

`AggMergeOutlierGenes`

Build the global list of genes to replace.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

class AggMergeOutlierGenes:
    """Build the global list of genes to replace."""

    @remote
    @log_remote
    def agg_merge_outlier_genes(
        self,
        shared_states: list[dict],
    ) -> dict:
        """Merge the lists of genes to replace.

        Parameters
        ----------
        shared_states : list
            List of dictionaries containing:
            - "local_genes_to_replace": genes with Cook's distance above the threshold,
            - "replaceable_samples": a boolean indicating whether there is at least
               one sample with enough replicates to replace it.

        Returns
        -------
        dict
            A dictionary with a unique key: "genes_to_replace" containing the list
            of genes for which to replace outlier values.
        """
        # If no sample is replaceable, we can skip
        any_replaceable = any(state["replaceable_samples"] for state in shared_states)

        if not any_replaceable:
            return {"genes_to_replace": set()}

        else:
            # Take the union of all local list of genes to replace
            genes_to_replace = set.union(
                *[state["local_genes_to_replace"] for state in shared_states]
            )

            return {
                "genes_to_replace": genes_to_replace,
            }

`agg_merge_outlier_genes(shared_states)`

Merge the lists of genes to replace.

Parameters:

Name	Type	Description	Default
`shared_states`	`list`	List of dictionaries containing: - "local_genes_to_replace": genes with Cook's distance above the threshold, - "replaceable_samples": a boolean indicating whether there is at least one sample with enough replicates to replace it.	required

Returns:

Type	Description
`dict`	A dictionary with a unique key: "genes_to_replace" containing the list of genes for which to replace outlier values.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

@remote
@log_remote
def agg_merge_outlier_genes(
    self,
    shared_states: list[dict],
) -> dict:
    """Merge the lists of genes to replace.

    Parameters
    ----------
    shared_states : list
        List of dictionaries containing:
        - "local_genes_to_replace": genes with Cook's distance above the threshold,
        - "replaceable_samples": a boolean indicating whether there is at least
           one sample with enough replicates to replace it.

    Returns
    -------
    dict
        A dictionary with a unique key: "genes_to_replace" containing the list
        of genes for which to replace outlier values.
    """
    # If no sample is replaceable, we can skip
    any_replaceable = any(state["replaceable_samples"] for state in shared_states)

    if not any_replaceable:
        return {"genes_to_replace": set()}

    else:
        # Take the union of all local list of genes to replace
        genes_to_replace = set.union(
            *[state["local_genes_to_replace"] for state in shared_states]
        )

        return {
            "genes_to_replace": genes_to_replace,
        }

`AggNewAllZeros`

Mixin to compute the new all zeros and share to the centers.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

class AggNewAllZeros:
    """Mixin to compute the new all zeros and share to the centers."""

    @remote
    @log_remote
    def aggregate_new_all_zeros(self, shared_states: list) -> dict:
        """Compute the global mean given the local results.

        Parameters
        ----------
        shared_states : list
            List of results (local_mean, n_samples) from training nodes.
            In refit mode, also contains "loc_new_all_zero".

        Returns
        -------
        dict
            New all-zero genes.
        """
        # Find genes that are all zero due to imputation of counts
        new_all_zeroes = np.all(
            [state["loc_new_all_zeroes"] for state in shared_states], axis=0
        )

        return {"new_all_zeroes": new_all_zeroes}

`aggregate_new_all_zeros(shared_states)`

Compute the global mean given the local results.

Parameters:

Name	Type	Description	Default
`shared_states`	`list`	List of results (local_mean, n_samples) from training nodes. In refit mode, also contains "loc_new_all_zero".	required

Returns:

Type	Description
`dict`	New all-zero genes.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

@remote
@log_remote
def aggregate_new_all_zeros(self, shared_states: list) -> dict:
    """Compute the global mean given the local results.

    Parameters
    ----------
    shared_states : list
        List of results (local_mean, n_samples) from training nodes.
        In refit mode, also contains "loc_new_all_zero".

    Returns
    -------
    dict
        New all-zero genes.
    """
    # Find genes that are all zero due to imputation of counts
    new_all_zeroes = np.all(
        [state["loc_new_all_zeroes"] for state in shared_states], axis=0
    )

    return {"new_all_zeroes": new_all_zeroes}

`LocFindCooksOutliers`

Find local Cooks outliers.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

class LocFindCooksOutliers:
    """Find local Cooks outliers."""

    local_adata: AnnData
    min_replicates: int

    @remote_data
    @log_remote_data
    @reconstruct_adatas
    def loc_find_cooks_outliers(
        self,
        data_from_opener,
        shared_state: dict,
    ) -> dict:
        """Find local Cooks outliers by comparing the cooks distance to a threshold.

        Parameters
        ----------
        data_from_opener : ad.AnnData
            AnnData returned by the opener. Not used.

        shared_state : dict, optional
            Not used.

        Returns
        -------
        dict
            Shared state containing:
            - "local_genes_to_replace": genes with Cook's distance above the threshold,
            - "replaceable_samples": a boolean indicating whether there is at least one
               sample with enough replicates to replace it.
        """
        # Find replaceable samples
        n_or_more = self.local_adata.uns["num_replicates"] >= self.min_replicates

        self.local_adata.obsm["replaceable"] = n_or_more[
            self.local_adata.obs["cells"]
        ].values

        # Find genes with Cook's distance above the threshold
        n_params = self.local_adata.uns["n_params"]
        cooks_cutoff = f.ppf(
            0.99, n_params, self.local_adata.uns["tot_num_samples"] - n_params
        )

        self.local_adata.uns["_where_cooks_g_cutoff"] = np.where(
            self.local_adata.layers["cooks"] > cooks_cutoff
        )

        local_idx_to_replace = (self.local_adata.layers["cooks"] > cooks_cutoff).any(
            axis=0
        )
        local_genes_to_replace = self.local_adata.var_names[local_idx_to_replace]

        return {
            "local_genes_to_replace": set(local_genes_to_replace),
            "replaceable_samples": self.local_adata.obsm["replaceable"].any(),
        }

`loc_find_cooks_outliers(data_from_opener, shared_state)`

Find local Cooks outliers by comparing the cooks distance to a threshold.

Parameters:

Name	Type	Description	Default
`data_from_opener`	`AnnData`	AnnData returned by the opener. Not used.	required
`shared_state`	`dict`	Not used.	required

Returns:

Type	Description
`dict`	Shared state containing: - "local_genes_to_replace": genes with Cook's distance above the threshold, - "replaceable_samples": a boolean indicating whether there is at least one sample with enough replicates to replace it.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

@remote_data
@log_remote_data
@reconstruct_adatas
def loc_find_cooks_outliers(
    self,
    data_from_opener,
    shared_state: dict,
) -> dict:
    """Find local Cooks outliers by comparing the cooks distance to a threshold.

    Parameters
    ----------
    data_from_opener : ad.AnnData
        AnnData returned by the opener. Not used.

    shared_state : dict, optional
        Not used.

    Returns
    -------
    dict
        Shared state containing:
        - "local_genes_to_replace": genes with Cook's distance above the threshold,
        - "replaceable_samples": a boolean indicating whether there is at least one
           sample with enough replicates to replace it.
    """
    # Find replaceable samples
    n_or_more = self.local_adata.uns["num_replicates"] >= self.min_replicates

    self.local_adata.obsm["replaceable"] = n_or_more[
        self.local_adata.obs["cells"]
    ].values

    # Find genes with Cook's distance above the threshold
    n_params = self.local_adata.uns["n_params"]
    cooks_cutoff = f.ppf(
        0.99, n_params, self.local_adata.uns["tot_num_samples"] - n_params
    )

    self.local_adata.uns["_where_cooks_g_cutoff"] = np.where(
        self.local_adata.layers["cooks"] > cooks_cutoff
    )

    local_idx_to_replace = (self.local_adata.layers["cooks"] > cooks_cutoff).any(
        axis=0
    )
    local_genes_to_replace = self.local_adata.var_names[local_idx_to_replace]

    return {
        "local_genes_to_replace": set(local_genes_to_replace),
        "replaceable_samples": self.local_adata.obsm["replaceable"].any(),
    }

`LocReplaceCooksOutliers`

Mixin to replace cooks outliers locally.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

class LocReplaceCooksOutliers:
    """Mixin to replace cooks outliers locally."""

    local_adata: AnnData
    refit_adata: AnnData

    @remote_data
    @log_remote_data
    @reconstruct_adatas
    def loc_replace_cooks_outliers(
        self,
        data_from_opener,
        shared_state: dict,
    ) -> dict:
        """Replace outlier counts with imputed values.

        Parameters
        ----------
        data_from_opener : ad.AnnData
            AnnData returned by the opener. Not used.

        shared_state : dict
            A dictionary with a "trimmed_mean_normed_counts" key, containing the
            trimmed means to use to compute the imputed values.

        Returns
        -------
        dict
            A dictionary containing:
            - "loc_new_all_zero": a boolean array indicating which genes are now
              all-zero.
        """
        # Set the trimmed mean normed counts in the varm
        self.refit_adata.varm["_trimmed_mean_normed_counts"] = shared_state[
            "trimmed_mean_normed_counts"
        ]

        set_imputed_counts_refit_adata(self)

        # Find new all-zero columns
        new_all_zeroes = self.refit_adata.X.sum(axis=0) == 0

        # Return the new local logmeans
        with np.errstate(divide="ignore"):  # ignore division by zero warnings
            return {
                "loc_new_all_zeroes": new_all_zeroes,
            }

`loc_replace_cooks_outliers(data_from_opener, shared_state)`

Replace outlier counts with imputed values.

Parameters:

Name	Type	Description	Default
`data_from_opener`	`AnnData`	AnnData returned by the opener. Not used.	required
`shared_state`	`dict`	A dictionary with a "trimmed_mean_normed_counts" key, containing the trimmed means to use to compute the imputed values.	required

Returns:

Type	Description
`dict`	A dictionary containing: - "loc_new_all_zero": a boolean array indicating which genes are now all-zero.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

@remote_data
@log_remote_data
@reconstruct_adatas
def loc_replace_cooks_outliers(
    self,
    data_from_opener,
    shared_state: dict,
) -> dict:
    """Replace outlier counts with imputed values.

    Parameters
    ----------
    data_from_opener : ad.AnnData
        AnnData returned by the opener. Not used.

    shared_state : dict
        A dictionary with a "trimmed_mean_normed_counts" key, containing the
        trimmed means to use to compute the imputed values.

    Returns
    -------
    dict
        A dictionary containing:
        - "loc_new_all_zero": a boolean array indicating which genes are now
          all-zero.
    """
    # Set the trimmed mean normed counts in the varm
    self.refit_adata.varm["_trimmed_mean_normed_counts"] = shared_state[
        "trimmed_mean_normed_counts"
    ]

    set_imputed_counts_refit_adata(self)

    # Find new all-zero columns
    new_all_zeroes = self.refit_adata.X.sum(axis=0) == 0

    # Return the new local logmeans
    with np.errstate(divide="ignore"):  # ignore division by zero warnings
        return {
            "loc_new_all_zeroes": new_all_zeroes,
        }

`LocSetNewAllZerosAndGetFeatures`

Mixin to set the new all zeros and return local features.

This Mixin implements the method to perform the transition towards the compute_rough_dispersions steps after refitting. It sets the new all zeros genes in the local AnnData and computes the local features to be shared to the aggregation node.

Methods:

Name	Description
`local_set_new_all_zeros_get_features`	The method to set the new all zeros genes and compute the local features.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

class LocSetNewAllZerosAndGetFeatures:
    """Mixin to set the new all zeros and return local features.

    This Mixin implements the method to perform the transition towards the
    compute_rough_dispersions steps after refitting. It sets the new all zeros
    genes in the local AnnData and computes the local features to be shared
    to the aggregation node.

    Methods
    -------
    local_set_new_all_zeros_get_features
        The method to set the new all zeros genes and compute the local features.
    """

    local_adata: ad.AnnData
    refit_adata: ad.AnnData

    @remote_data
    @log_remote_data
    @reconstruct_adatas
    def local_set_new_all_zeros_get_features(
        self,
        data_from_opener,
        shared_state,
    ) -> dict:
        """Set the new_all_zeros field and get the features.

        This method is used to set the new_all_zeros field in the local_adata uns
        field. This is the set of genes that are all zero after outlier replacement.

        It then restricts the refit_adata to the genes which are not all_zero.

        Finally, it computes the local features to be shared via shared_state to the
        aggregation node.

        Parameters
        ----------
        data_from_opener : ad.AnnData
            AnnData returned by the opener. Not used.

        shared_state : dict
            Shared state containing the "new_all_zeroes" key.

        Returns
        -------
        dict
            Local feature vector to be shared via shared_state to
            the aggregation node.
        """
        # Take all-zero genes into account
        new_all_zeroes = shared_state["new_all_zeroes"]

        self.local_adata.uns["new_all_zeroes_genes"] = self.refit_adata.var_names[
            new_all_zeroes
        ]

        self.local_adata.varm["refitted"] = self.local_adata.varm["replaced"].copy()
        # Only replace if genes are not all zeroes after outlier replacement
        self.local_adata.varm["refitted"][
            self.local_adata.varm["refitted"]
        ] = ~new_all_zeroes

        # RESTRICT REFIT ADATA TO NOT NEW ALL ZEROES
        self.refit_adata = self.refit_adata[:, ~new_all_zeroes].copy()

        # Update normed counts
        set_normed_counts(self.refit_adata)

        #### ---- Compute Gram matrix and feature vector ---- ####

        design = self.refit_adata.obsm["design_matrix"].values

        return {
            "local_features": design.T @ self.refit_adata.layers["normed_counts"],
        }

`local_set_new_all_zeros_get_features(data_from_opener, shared_state)`

Set the new_all_zeros field and get the features.

This method is used to set the new_all_zeros field in the local_adata uns field. This is the set of genes that are all zero after outlier replacement.

It then restricts the refit_adata to the genes which are not all_zero.

Finally, it computes the local features to be shared via shared_state to the aggregation node.

Parameters:

Name	Type	Description	Default
`data_from_opener`	`AnnData`	AnnData returned by the opener. Not used.	required
`shared_state`	`dict`	Shared state containing the "new_all_zeroes" key.	required

Returns:

Type	Description
`dict`	Local feature vector to be shared via shared_state to the aggregation node.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

@remote_data
@log_remote_data
@reconstruct_adatas
def local_set_new_all_zeros_get_features(
    self,
    data_from_opener,
    shared_state,
) -> dict:
    """Set the new_all_zeros field and get the features.

    This method is used to set the new_all_zeros field in the local_adata uns
    field. This is the set of genes that are all zero after outlier replacement.

    It then restricts the refit_adata to the genes which are not all_zero.

    Finally, it computes the local features to be shared via shared_state to the
    aggregation node.

    Parameters
    ----------
    data_from_opener : ad.AnnData
        AnnData returned by the opener. Not used.

    shared_state : dict
        Shared state containing the "new_all_zeroes" key.

    Returns
    -------
    dict
        Local feature vector to be shared via shared_state to
        the aggregation node.
    """
    # Take all-zero genes into account
    new_all_zeroes = shared_state["new_all_zeroes"]

    self.local_adata.uns["new_all_zeroes_genes"] = self.refit_adata.var_names[
        new_all_zeroes
    ]

    self.local_adata.varm["refitted"] = self.local_adata.varm["replaced"].copy()
    # Only replace if genes are not all zeroes after outlier replacement
    self.local_adata.varm["refitted"][
        self.local_adata.varm["refitted"]
    ] = ~new_all_zeroes

    # RESTRICT REFIT ADATA TO NOT NEW ALL ZEROES
    self.refit_adata = self.refit_adata[:, ~new_all_zeroes].copy()

    # Update normed counts
    set_normed_counts(self.refit_adata)

    #### ---- Compute Gram matrix and feature vector ---- ####

    design = self.refit_adata.obsm["design_matrix"].values

    return {
        "local_features": design.T @ self.refit_adata.layers["normed_counts"],
    }

`LocSetRefitAdata`

Mixin to replace cooks outliers locally.

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

class LocSetRefitAdata:
    """Mixin to replace cooks outliers locally."""

    local_adata: AnnData
    refit_adata: AnnData

    @remote_data
    @log_remote_data
    @reconstruct_adatas
    def loc_set_refit_adata(
        self,
        data_from_opener,
        shared_state: dict,
    ) -> None:
        """Set a refit adata containing the counts of the genes to replace.

        Parameters
        ----------
        data_from_opener : ad.AnnData
            AnnData returned by the opener. Not used.

        shared_state : dict
            A dictionary with a "genes_to_replace" key, containing the list of genes
            for which to replace outlier values.
        """
        # Save the information on which genes will be replaced
        genes_to_replace = pd.Series(False, index=self.local_adata.var_names)
        genes_to_replace[list(shared_state["genes_to_replace"])] = True
        self.local_adata.varm["replaced"] = genes_to_replace.values

        # Copy the values corresponding to the genes to refit in the refit_adata
        set_basic_refit_adata(self)

`loc_set_refit_adata(data_from_opener, shared_state)`

Set a refit adata containing the counts of the genes to replace.

Parameters:

Name	Type	Description	Default
`data_from_opener`	`AnnData`	AnnData returned by the opener. Not used.	required
`shared_state`	`dict`	A dictionary with a "genes_to_replace" key, containing the list of genes for which to replace outlier values.	required

Source code in fedpydeseq2/core/deseq2_core/replace_outliers/substeps.py

@remote_data
@log_remote_data
@reconstruct_adatas
def loc_set_refit_adata(
    self,
    data_from_opener,
    shared_state: dict,
) -> None:
    """Set a refit adata containing the counts of the genes to replace.

    Parameters
    ----------
    data_from_opener : ad.AnnData
        AnnData returned by the opener. Not used.

    shared_state : dict
        A dictionary with a "genes_to_replace" key, containing the list of genes
        for which to replace outlier values.
    """
    # Save the information on which genes will be replaced
    genes_to_replace = pd.Series(False, index=self.local_adata.var_names)
    genes_to_replace[list(shared_state["genes_to_replace"])] = True
    self.local_adata.varm["replaced"] = genes_to_replace.values

    # Copy the values corresponding to the genes to refit in the refit_adata
    set_basic_refit_adata(self)