Module cellex.summarydata
Expand source code
import os
import numpy as np
import pandas as pd
class SummaryData(object):
"""A class that contains summary data for computing ES metrics
The SummaryData object allows sharing of summary data in between
commputations, thus reducing the number computations needed,
e.g. we need only compute the mean once.
Attributes
----------
data: DataFrame
Original expression data.
annotation :
Annotation to group cells by.
_mean : DataFrame
Mean expression for the groups specified by the annotation.
_n_cells_per_anno : DataFrame
Number of cells per groups specified by the annotation.
_n_nonzero : DataFrame
Number of nonzero expression values per groups specified by the annotation.
_variance : DataFrame
Variance of expression values per groups specified by the annotation.
annotation_null : np.ndarray
Null annotation to group cells by.
_mean_null : DataFrame
Mean expression for the groups specified by the null annotation.
_n_cells_per_anno_null : DataFrame
Number of cells per groups specified by the null annotation.
_n_nonzero_null : DataFrame
Number of nonzero expression values per groups specified by the null annotation.
_variance_null : DataFrame
Variance of expression values per groups specified by the null annotation.
Methods
-------
mean(self)
Lazily evaluate and return _mean.
n_cells_per_anno(self)
Lazily evaluate and return _n_cells_per_anno.
n_nonzero(self)
Lazily evaluate and return _n_nonzero.
variance(self)
Lazily evaluate and return _variance.
annotation_null(self)
Lazily evaluate and return _annotation_null.
mean_null(self)
Lazily evaluate and return _mean_null.
n_cells_per_anno_null(self)
Lazily evaluate and return _n_cells_per_anno_null.
n_nonzero_null(self)
Lazily evaluate and return _n_nonzero_null.
variance_null(self)
Lazily evaluate and return _variance_null.
save(dir_name: str=None, verbose: bool=False)
Save the object attributes, excluding the original data.
"""
def __init__(self, data: pd.DataFrame, annotation: np.array):
"""
Parameters
----------
data: DataFrame
Original expression data.
annotation : array
Annotation to group cells by.
"""
# df.columns = pd.MultiIndex.from_arrays([df.columns,
# df.columns.map(annotation, na_action="ignore").values.astype(str)],
# names=("id", "annotation"))
self.data = data
self.annotation = annotation
self._mean = None
self._n_cells_per_anno = None # tstat, ges (just one line)
self._n_nonzero = None
self._variance = None # tstat
# null attributes
self._annotation_null = None
self._mean_null = None
self._n_nonzero_null = None
self._n_cells_per_anno_null = None
self._variance_null = None
@property
def mean(self):
"""Compute or return mean"""
if self._mean is None:
self._mean = self.data.groupby(self.annotation, axis=1).mean()
return self._mean
@property
def n_cells_per_anno(self):
if self._n_cells_per_anno is None:
self._n_cells_per_anno = self.data.groupby(self.annotation, axis=1).size()
return self._n_cells_per_anno
@property
def n_nonzero(self):
if self._n_nonzero is None:
self._n_nonzero = self.data.groupby(self.annotation, axis=1).agg(lambda x: x.ne(0).sum(axis=1))
return self._n_nonzero
@property
def variance(self):
if self._variance is None:
self._variance = self.data.groupby(self.annotation, axis=1).var()
return self._variance
### Null summary data
@property
def annotation_null(self):
if self._annotation_null is None:
np.random.seed(1)
self._annotation_null = np.random.permutation(self.annotation)
return self._annotation_null
@property
def mean_null(self):
if self._mean_null is None:
self._mean_null = self.data.groupby(self.annotation_null, axis=1).mean()
return self._mean_null
@property
def n_cells_per_anno_null(self):
if self._n_cells_per_anno_null is None:
self._n_cells_per_anno_null = self.data.groupby(self.annotation_null, axis=1).size()
return self._n_cells_per_anno_null
@property
def n_nonzero_null(self):
if self._n_nonzero_null is None:
self._n_nonzero_null = self.data.groupby(self.annotation_null, axis=1).agg(lambda x: x.ne(0).sum(axis=1))
return self._n_nonzero_null
@property
def variance_null(self):
if self._variance_null is None:
self._variance_null = self.data.groupby(self.annotation_null, axis=1).var()
return self._variance_null
def save(self, dir_name: str=None, verbose: bool=False) -> None:
"""
Save summary statistics to disk.
"""
if verbose:
print("Saving results to disk ...")
if dir_name == None:
dir_name = "out"
os.makedirs(dir_name, exist_ok=True) # make dir if it doesn't already exist
### Loop over SummaryStats attributes and save stats to disk
for s in dir(self):
att = getattr(self, s)
if isinstance(att, pd.DataFrame) and s != "data":
fp = "{}/summarystat.{}.csv.gz".format(dir_name, self.name, s)
att.to_csv(fp, compression="gzip")
if verbose:
print(" Saved: {}".format(fp))
Classes
class SummaryData (data: pandas.core.frame.DataFrame, annotation:
) -
A class that contains summary data for computing ES metrics
The SummaryData object allows sharing of summary data in between commputations, thus reducing the number computations needed, e.g. we need only compute the mean once.
Attributes
data
:DataFrame
- Original expression data.
annotation
- Annotation to group cells by.
_mean
:DataFrame
- Mean expression for the groups specified by the annotation.
_n_cells_per_anno
:DataFrame
- Number of cells per groups specified by the annotation.
_n_nonzero
:DataFrame
- Number of nonzero expression values per groups specified by the annotation.
_variance
:DataFrame
- Variance of expression values per groups specified by the annotation.
annotation_null
:np.ndarray
- Null annotation to group cells by.
_mean_null
:DataFrame
- Mean expression for the groups specified by the null annotation.
_n_cells_per_anno_null
:DataFrame
- Number of cells per groups specified by the null annotation.
_n_nonzero_null
:DataFrame
- Number of nonzero expression values per groups specified by the null annotation.
_variance_null
:DataFrame
- Variance of expression values per groups specified by the null annotation.
Methods
mean(self) Lazily evaluate and return _mean.
n_cells_per_anno(self) Lazily evaluate and return _n_cells_per_anno.
n_nonzero(self) Lazily evaluate and return _n_nonzero.
variance(self) Lazily evaluate and return _variance.
annotation_null(self) Lazily evaluate and return _annotation_null.
mean_null(self) Lazily evaluate and return _mean_null.
n_cells_per_anno_null(self) Lazily evaluate and return _n_cells_per_anno_null.
n_nonzero_null(self) Lazily evaluate and return _n_nonzero_null.
variance_null(self) Lazily evaluate and return _variance_null.
save(dir_name: str=None, verbose: bool=False) Save the object attributes, excluding the original data.
Parameters
data
:DataFrame
- Original expression data.
annotation
:array
- Annotation to group cells by.
Expand source code
class SummaryData(object): """A class that contains summary data for computing ES metrics The SummaryData object allows sharing of summary data in between commputations, thus reducing the number computations needed, e.g. we need only compute the mean once. Attributes ---------- data: DataFrame Original expression data. annotation : Annotation to group cells by. _mean : DataFrame Mean expression for the groups specified by the annotation. _n_cells_per_anno : DataFrame Number of cells per groups specified by the annotation. _n_nonzero : DataFrame Number of nonzero expression values per groups specified by the annotation. _variance : DataFrame Variance of expression values per groups specified by the annotation. annotation_null : np.ndarray Null annotation to group cells by. _mean_null : DataFrame Mean expression for the groups specified by the null annotation. _n_cells_per_anno_null : DataFrame Number of cells per groups specified by the null annotation. _n_nonzero_null : DataFrame Number of nonzero expression values per groups specified by the null annotation. _variance_null : DataFrame Variance of expression values per groups specified by the null annotation. Methods ------- mean(self) Lazily evaluate and return _mean. n_cells_per_anno(self) Lazily evaluate and return _n_cells_per_anno. n_nonzero(self) Lazily evaluate and return _n_nonzero. variance(self) Lazily evaluate and return _variance. annotation_null(self) Lazily evaluate and return _annotation_null. mean_null(self) Lazily evaluate and return _mean_null. n_cells_per_anno_null(self) Lazily evaluate and return _n_cells_per_anno_null. n_nonzero_null(self) Lazily evaluate and return _n_nonzero_null. variance_null(self) Lazily evaluate and return _variance_null. save(dir_name: str=None, verbose: bool=False) Save the object attributes, excluding the original data. """ def __init__(self, data: pd.DataFrame, annotation: np.array): """ Parameters ---------- data: DataFrame Original expression data. annotation : array Annotation to group cells by. """ # df.columns = pd.MultiIndex.from_arrays([df.columns, # df.columns.map(annotation, na_action="ignore").values.astype(str)], # names=("id", "annotation")) self.data = data self.annotation = annotation self._mean = None self._n_cells_per_anno = None # tstat, ges (just one line) self._n_nonzero = None self._variance = None # tstat # null attributes self._annotation_null = None self._mean_null = None self._n_nonzero_null = None self._n_cells_per_anno_null = None self._variance_null = None @property def mean(self): """Compute or return mean""" if self._mean is None: self._mean = self.data.groupby(self.annotation, axis=1).mean() return self._mean @property def n_cells_per_anno(self): if self._n_cells_per_anno is None: self._n_cells_per_anno = self.data.groupby(self.annotation, axis=1).size() return self._n_cells_per_anno @property def n_nonzero(self): if self._n_nonzero is None: self._n_nonzero = self.data.groupby(self.annotation, axis=1).agg(lambda x: x.ne(0).sum(axis=1)) return self._n_nonzero @property def variance(self): if self._variance is None: self._variance = self.data.groupby(self.annotation, axis=1).var() return self._variance ### Null summary data @property def annotation_null(self): if self._annotation_null is None: np.random.seed(1) self._annotation_null = np.random.permutation(self.annotation) return self._annotation_null @property def mean_null(self): if self._mean_null is None: self._mean_null = self.data.groupby(self.annotation_null, axis=1).mean() return self._mean_null @property def n_cells_per_anno_null(self): if self._n_cells_per_anno_null is None: self._n_cells_per_anno_null = self.data.groupby(self.annotation_null, axis=1).size() return self._n_cells_per_anno_null @property def n_nonzero_null(self): if self._n_nonzero_null is None: self._n_nonzero_null = self.data.groupby(self.annotation_null, axis=1).agg(lambda x: x.ne(0).sum(axis=1)) return self._n_nonzero_null @property def variance_null(self): if self._variance_null is None: self._variance_null = self.data.groupby(self.annotation_null, axis=1).var() return self._variance_null def save(self, dir_name: str=None, verbose: bool=False) -> None: """ Save summary statistics to disk. """ if verbose: print("Saving results to disk ...") if dir_name == None: dir_name = "out" os.makedirs(dir_name, exist_ok=True) # make dir if it doesn't already exist ### Loop over SummaryStats attributes and save stats to disk for s in dir(self): att = getattr(self, s) if isinstance(att, pd.DataFrame) and s != "data": fp = "{}/summarystat.{}.csv.gz".format(dir_name, self.name, s) att.to_csv(fp, compression="gzip") if verbose: print(" Saved: {}".format(fp))
Instance variables
var annotation_null
-
Expand source code
@property def annotation_null(self): if self._annotation_null is None: np.random.seed(1) self._annotation_null = np.random.permutation(self.annotation) return self._annotation_null
var mean
-
Compute or return mean
Expand source code
@property def mean(self): """Compute or return mean""" if self._mean is None: self._mean = self.data.groupby(self.annotation, axis=1).mean() return self._mean
var mean_null
-
Expand source code
@property def mean_null(self): if self._mean_null is None: self._mean_null = self.data.groupby(self.annotation_null, axis=1).mean() return self._mean_null
var n_cells_per_anno
-
Expand source code
@property def n_cells_per_anno(self): if self._n_cells_per_anno is None: self._n_cells_per_anno = self.data.groupby(self.annotation, axis=1).size() return self._n_cells_per_anno
var n_cells_per_anno_null
-
Expand source code
@property def n_cells_per_anno_null(self): if self._n_cells_per_anno_null is None: self._n_cells_per_anno_null = self.data.groupby(self.annotation_null, axis=1).size() return self._n_cells_per_anno_null
var n_nonzero
-
Expand source code
@property def n_nonzero(self): if self._n_nonzero is None: self._n_nonzero = self.data.groupby(self.annotation, axis=1).agg(lambda x: x.ne(0).sum(axis=1)) return self._n_nonzero
var n_nonzero_null
-
Expand source code
@property def n_nonzero_null(self): if self._n_nonzero_null is None: self._n_nonzero_null = self.data.groupby(self.annotation_null, axis=1).agg(lambda x: x.ne(0).sum(axis=1)) return self._n_nonzero_null
var variance
-
Expand source code
@property def variance(self): if self._variance is None: self._variance = self.data.groupby(self.annotation, axis=1).var() return self._variance
var variance_null
-
Expand source code
@property def variance_null(self): if self._variance_null is None: self._variance_null = self.data.groupby(self.annotation_null, axis=1).var() return self._variance_null
Methods
def save(self, dir_name: str = None, verbose: bool = False) -> NoneType
-
Save summary statistics to disk.
Expand source code
def save(self, dir_name: str=None, verbose: bool=False) -> None: """ Save summary statistics to disk. """ if verbose: print("Saving results to disk ...") if dir_name == None: dir_name = "out" os.makedirs(dir_name, exist_ok=True) # make dir if it doesn't already exist ### Loop over SummaryStats attributes and save stats to disk for s in dir(self): att = getattr(self, s) if isinstance(att, pd.DataFrame) and s != "data": fp = "{}/summarystat.{}.csv.gz".format(dir_name, self.name, s) att.to_csv(fp, compression="gzip") if verbose: print(" Saved: {}".format(fp))