Module cellex.metrics.ep
Expand source code
import numpy as np
import pandas as pd
import time
import datetime
from .esw_star import esw_star
from ..utils.compute_pvalues import compute_pvalues
from ..summarydata import SummaryData
def _ep(mean: pd.DataFrame, verbose: bool=False):
"""Computes Expression Proportion ES weights for each gene / cell-type
Parameters
----------
mean : DataFrame
Mean expression per gene / annotation group.
verbose : bool, optional (default: False)
Print progress report.
Returns
-------
specificity : ndarray
ES weights
"""
n_genes = mean.shape[0]
# n_genes x n_annotations
expr_mean = mean.values
# Scale expression values by sum of cell expression:
# every column should sum to 1.
# axis=0, i.e. sum column values
# Shape: n_genes x n_annotations
expr_mean = expr_mean / np.sum(expr_mean, axis=0)
# n_genes x 1
# axis=1, i.e. sum row values. Reshape to ensure vector-shape
expr_mean_sum = np.sum(expr_mean,axis=1).reshape((n_genes,1))
# Compute Specificity
# mean_x / (sum_means_all + eps)
specificity = expr_mean / (expr_mean_sum + 1e-12)
return specificity
def ep(stats: SummaryData, verbose: bool=False, compute_meta: bool=False):
"""Compute Expression Proportion
EP is based on the specificity calculations described in:
Skene, et al. Genetic identification of brain cell types underlying
schizophrenia. Nat. Genet. 50, 825–833 (2018)
and implemented in the EWCE R package. Code available at:
github(.)com/NathanSkene/EWCE
Parameters
----------
summarydata : SummaryData
Summary data computed from raw data using specified annotation.
verbose : bool, optional (default: False)
Print progress report.
compute_meta : bool, optional (default: False)
Compute meta results.
Returns
-------
results : dict
Dictionary containing all computed ESw and meta results, e.g. pvals
"""
start = 0
if verbose:
start = time.time()
print("Computing EP ...")
df = stats.mean
idx_labels = df.index
col_labels = stats.mean.columns.values
key = "ep."
results = {}
if verbose:
print(" esw ...")
esw = _ep(df, verbose)
esw_df = pd.DataFrame(esw, idx_labels, col_labels)
results[(key + "esw")] = esw_df
if compute_meta:
esw_null = _ep(stats.mean_null, verbose)
pvals = compute_pvalues(esw, esw_null, verbose)
pvals_df = pd.DataFrame(pvals, idx_labels, col_labels)
results[(key + "esw_null")] = pd.DataFrame(esw_null, idx_labels, col_labels)
results[(key + "pvals")] = pvals_df
results[(key + "esw_s")] = esw_star(esw_df, pvals_df, verbose)
if verbose:
td = datetime.timedelta(seconds=(time.time() - start))
print(" finished in %d min %d sec" % (divmod(td.seconds, 60)))
return results
Functions
def ep(stats: SummaryData, verbose: bool = False, compute_meta: bool = False)
-
Compute Expression Proportion
EP is based on the specificity calculations described in:
Skene, et al. Genetic identification of brain cell types underlying schizophrenia. Nat. Genet. 50, 825–833 (2018)
and implemented in the EWCE R package. Code available at:
github(.)com/NathanSkene/EWCE
Parameters
summarydata
:SummaryData
- Summary data computed from raw data using specified annotation.
verbose
:bool
, optional(default: False)
- Print progress report.
compute_meta
:bool
, optional(default: False)
- Compute meta results.
Returns
results
:dict
- Dictionary containing all computed ESw and meta results, e.g. pvals
Expand source code
def ep(stats: SummaryData, verbose: bool=False, compute_meta: bool=False): """Compute Expression Proportion EP is based on the specificity calculations described in: Skene, et al. Genetic identification of brain cell types underlying schizophrenia. Nat. Genet. 50, 825–833 (2018) and implemented in the EWCE R package. Code available at: github(.)com/NathanSkene/EWCE Parameters ---------- summarydata : SummaryData Summary data computed from raw data using specified annotation. verbose : bool, optional (default: False) Print progress report. compute_meta : bool, optional (default: False) Compute meta results. Returns ------- results : dict Dictionary containing all computed ESw and meta results, e.g. pvals """ start = 0 if verbose: start = time.time() print("Computing EP ...") df = stats.mean idx_labels = df.index col_labels = stats.mean.columns.values key = "ep." results = {} if verbose: print(" esw ...") esw = _ep(df, verbose) esw_df = pd.DataFrame(esw, idx_labels, col_labels) results[(key + "esw")] = esw_df if compute_meta: esw_null = _ep(stats.mean_null, verbose) pvals = compute_pvalues(esw, esw_null, verbose) pvals_df = pd.DataFrame(pvals, idx_labels, col_labels) results[(key + "esw_null")] = pd.DataFrame(esw_null, idx_labels, col_labels) results[(key + "pvals")] = pvals_df results[(key + "esw_s")] = esw_star(esw_df, pvals_df, verbose) if verbose: td = datetime.timedelta(seconds=(time.time() - start)) print(" finished in %d min %d sec" % (divmod(td.seconds, 60))) return results