Skip to content

Core of the fed-pydeseq2 package: deseq2_core

The core of the fed-pydeseq2 package is encapsulated in the following Mixin class.

DESeq2FullPipe

Bases: BuildDesignMatrix, ComputeSizeFactors, DESeq2LFCDispersions, ComputeCookDistances, ReplaceCooksOutliers, ReplaceRefittedValues, DESeq2Stats, SavePipelineResults

A Mixin class to run the full DESeq2 pipeline.

Methods:

Name Description
run_deseq_pipe

The method to run the full DESeq2 pipeline.

Source code in fedpydeseq2/core/deseq2_core/deseq2_full_pipe.py
class DESeq2FullPipe(
    BuildDesignMatrix,
    ComputeSizeFactors,
    DESeq2LFCDispersions,
    ComputeCookDistances,
    ReplaceCooksOutliers,
    ReplaceRefittedValues,
    DESeq2Stats,
    SavePipelineResults,
):
    """A Mixin class to run the full DESeq2 pipeline.

    Methods
    -------
    run_deseq_pipe
        The method to run the full DESeq2 pipeline.
    """

    def run_deseq_pipe(
        self,
        train_data_nodes: list[TrainDataNode],
        aggregation_node: AggregationNode,
        local_states: dict[str, LocalStateRef],
        round_idx: int = 0,
        clean_models: bool = True,
        clean_last_model: bool = False,
    ):
        """Run the DESeq2 pipeline.

        Parameters
        ----------
        train_data_nodes : list[TrainDataNode]
            List of the train nodes.
        aggregation_node : AggregationNode
            Aggregation node.
        local_states : dict[str, LocalStateRef]
            Local states.
        round_idx : int
            Round index.
        clean_models : bool
            Whether to clean the models after the computation. (default: ``True``).
            Note that as intermediate steps are very memory consuming, it is recommended
            to clean the models after each step.
        clean_last_model : bool
            Whether to clean the last model. (default: ``False``).
        """
        #### Build design matrices ####

        logger.info("Building design matrices...")

        local_states, log_mean_shared_states, round_idx = self.build_design_matrix(
            train_data_nodes,
            aggregation_node,
            local_states,
            round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished building design matrices.")

        #### Compute size factors ####
        # Note: in refit mode, this doesn't recompute size factors,
        # just the log features

        logger.info("Computing size factors...")

        (
            local_states,
            gram_features_shared_states,
            round_idx,
        ) = self.compute_size_factors(
            train_data_nodes,
            aggregation_node,
            local_states,
            shared_states=log_mean_shared_states,
            round_idx=round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished computing size factors.")

        #### Compute LFC and dispersions ####

        logger.info("Running LFC and dispersions.")

        local_states, round_idx = self.run_deseq2_lfc_dispersions(
            train_data_nodes=train_data_nodes,
            aggregation_node=aggregation_node,
            local_states=local_states,
            gram_features_shared_states=gram_features_shared_states,
            round_idx=round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished running LFC and dispersions.")

        logger.info("Computing Cook distances...")

        (
            local_states,
            cooks_shared_state,
            round_idx,
        ) = self.compute_cook_distance(
            train_data_nodes,
            aggregation_node,
            local_states,
            round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished computing Cook distances.")

        #### Refit cooks if necessary ####
        if self.refit_cooks:
            logger.info("Refitting Cook outliers...")
            (
                local_states,
                gram_features_shared_states,
                round_idx,
            ) = self.replace_outliers(
                train_data_nodes,
                aggregation_node,
                local_states,
                cooks_shared_state,
                round_idx,
                clean_models=clean_models,
            )

            local_states, round_idx = self.run_deseq2_lfc_dispersions(
                train_data_nodes=train_data_nodes,
                aggregation_node=aggregation_node,
                local_states=local_states,
                gram_features_shared_states=gram_features_shared_states,
                round_idx=round_idx,
                clean_models=clean_models,
                refit_mode=True,
            )
            # Replace values in the main ``local_adata`` object
            local_states, round_idx = self.replace_refitted_values(
                train_data_nodes=train_data_nodes,
                aggregation_node=aggregation_node,
                local_states=local_states,
                round_idx=round_idx,
                clean_models=clean_models,
            )

            logger.info("Finished refitting Cook outliers.")

        #### Compute DESeq2 statistics ####

        logger.info("Running DESeq2 statistics.")

        local_states, round_idx = self.run_deseq2_stats(
            train_data_nodes,
            aggregation_node,
            local_states,
            round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished running DESeq2 statistics.")

        # Build the results that will be downloaded at the end of the pipeline.

        logger.info("Saving pipeline results.")
        self.save_pipeline_results(
            train_data_nodes,
            aggregation_node,
            local_states,
            round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished saving pipeline results.")

run_deseq_pipe(train_data_nodes, aggregation_node, local_states, round_idx=0, clean_models=True, clean_last_model=False)

Run the DESeq2 pipeline.

Parameters:

Name Type Description Default
train_data_nodes list[TrainDataNode]

List of the train nodes.

required
aggregation_node AggregationNode

Aggregation node.

required
local_states dict[str, LocalStateRef]

Local states.

required
round_idx int

Round index.

0
clean_models bool

Whether to clean the models after the computation. (default: True). Note that as intermediate steps are very memory consuming, it is recommended to clean the models after each step.

True
clean_last_model bool

Whether to clean the last model. (default: False).

False
Source code in fedpydeseq2/core/deseq2_core/deseq2_full_pipe.py
def run_deseq_pipe(
    self,
    train_data_nodes: list[TrainDataNode],
    aggregation_node: AggregationNode,
    local_states: dict[str, LocalStateRef],
    round_idx: int = 0,
    clean_models: bool = True,
    clean_last_model: bool = False,
):
    """Run the DESeq2 pipeline.

    Parameters
    ----------
    train_data_nodes : list[TrainDataNode]
        List of the train nodes.
    aggregation_node : AggregationNode
        Aggregation node.
    local_states : dict[str, LocalStateRef]
        Local states.
    round_idx : int
        Round index.
    clean_models : bool
        Whether to clean the models after the computation. (default: ``True``).
        Note that as intermediate steps are very memory consuming, it is recommended
        to clean the models after each step.
    clean_last_model : bool
        Whether to clean the last model. (default: ``False``).
    """
    #### Build design matrices ####

    logger.info("Building design matrices...")

    local_states, log_mean_shared_states, round_idx = self.build_design_matrix(
        train_data_nodes,
        aggregation_node,
        local_states,
        round_idx,
        clean_models=clean_models,
    )

    logger.info("Finished building design matrices.")

    #### Compute size factors ####
    # Note: in refit mode, this doesn't recompute size factors,
    # just the log features

    logger.info("Computing size factors...")

    (
        local_states,
        gram_features_shared_states,
        round_idx,
    ) = self.compute_size_factors(
        train_data_nodes,
        aggregation_node,
        local_states,
        shared_states=log_mean_shared_states,
        round_idx=round_idx,
        clean_models=clean_models,
    )

    logger.info("Finished computing size factors.")

    #### Compute LFC and dispersions ####

    logger.info("Running LFC and dispersions.")

    local_states, round_idx = self.run_deseq2_lfc_dispersions(
        train_data_nodes=train_data_nodes,
        aggregation_node=aggregation_node,
        local_states=local_states,
        gram_features_shared_states=gram_features_shared_states,
        round_idx=round_idx,
        clean_models=clean_models,
    )

    logger.info("Finished running LFC and dispersions.")

    logger.info("Computing Cook distances...")

    (
        local_states,
        cooks_shared_state,
        round_idx,
    ) = self.compute_cook_distance(
        train_data_nodes,
        aggregation_node,
        local_states,
        round_idx,
        clean_models=clean_models,
    )

    logger.info("Finished computing Cook distances.")

    #### Refit cooks if necessary ####
    if self.refit_cooks:
        logger.info("Refitting Cook outliers...")
        (
            local_states,
            gram_features_shared_states,
            round_idx,
        ) = self.replace_outliers(
            train_data_nodes,
            aggregation_node,
            local_states,
            cooks_shared_state,
            round_idx,
            clean_models=clean_models,
        )

        local_states, round_idx = self.run_deseq2_lfc_dispersions(
            train_data_nodes=train_data_nodes,
            aggregation_node=aggregation_node,
            local_states=local_states,
            gram_features_shared_states=gram_features_shared_states,
            round_idx=round_idx,
            clean_models=clean_models,
            refit_mode=True,
        )
        # Replace values in the main ``local_adata`` object
        local_states, round_idx = self.replace_refitted_values(
            train_data_nodes=train_data_nodes,
            aggregation_node=aggregation_node,
            local_states=local_states,
            round_idx=round_idx,
            clean_models=clean_models,
        )

        logger.info("Finished refitting Cook outliers.")

    #### Compute DESeq2 statistics ####

    logger.info("Running DESeq2 statistics.")

    local_states, round_idx = self.run_deseq2_stats(
        train_data_nodes,
        aggregation_node,
        local_states,
        round_idx,
        clean_models=clean_models,
    )

    logger.info("Finished running DESeq2 statistics.")

    # Build the results that will be downloaded at the end of the pipeline.

    logger.info("Saving pipeline results.")
    self.save_pipeline_results(
        train_data_nodes,
        aggregation_node,
        local_states,
        round_idx,
        clean_models=clean_models,
    )

    logger.info("Finished saving pipeline results.")

Steps of fedpydeseq2 and corresponding Mixin classes.

Building the design matrix: build_design_matrix

Computing the size factors: compute_size_factors

Computing log fold changes and dispersions: deseq2_lfc_dispersions

Computing the cooks distance: compute_cook_distance

Replacing outliers: replace_outliers

Replace refitted values: replace_refitted_values

Computing statistics: deseq2_stats

Saving the results: save_pipeline_results