Cross-species modality prediction using scEve#

[1]:
import scanpy as sc
import scparadise
import os
from scipy import sparse
from mousipy import translate
import muon as mu
import numpy as np
import pandas as pd
import warnings

warnings.simplefilter('ignore')
sc.set_figure_params(dpi = 150, dpi_save = 600)
C:\Users\vadim\anaconda3\envs\scrna\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Recommendations about dataset#

Our models are trained using shifted logarithm normalized data. We recommend the shifted logarithm data normalization method for the proper use of our models. sc.pp.normalize_total(adata, target_sum=None) sc.pp.log1p(adata) adata.raw = adata

[2]:
# Load ADT data from 10x Genomics
url = "https://cf.10xgenomics.com/samples/cell-exp/7.2.0/4plex_mouse_LymphNode_Spleen_TotalSeqC_multiplex_LymphNode1_BC1_AB1/4plex_mouse_LymphNode_Spleen_TotalSeqC_multiplex_LymphNode1_BC1_AB1_count_sample_filtered_feature_bc_matrix.h5"
mdata = mu.read_10x_h5("dataset.h5", backup_url = url)
mdata.var_names_make_unique()
# RNA data
adata = mdata.mod['rna'].copy()
# Protein data
adata_adt = mdata.mod['prot'].copy()
del mdata
print('RNA data', adata, sep='\n')
print('Protein data', adata_adt, sep='\n')
RNA data
AnnData object with n_obs × n_vars = 7579 × 19059
    var: 'gene_ids', 'feature_types', 'genome'
Protein data
AnnData object with n_obs × n_vars = 7579 × 122
    var: 'gene_ids', 'feature_types', 'genome'

Gene expression data processing#

[3]:
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
adata.var["mt"] = adata.var_names.str.startswith("mt-")
[4]:
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], inplace=True, log1p=True
)
[5]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", "pct_counts_mt"],
    jitter=0.4,
    multi_panel=True,
)
../../../_images/tutorials_notebooks_scEve_Cross_species_modality_prediction_using_scEve_7_0.png
[6]:
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
[7]:
# Doublet detection using scrublet
sc.external.pp.scrublet(adata)
Automatically set threshold at doublet score = 0.24
Detected doublet rate = 3.0%
Estimated detectable doublet fraction = 39.0%
Overall doublet rate:
        Expected   = 5.0%
        Estimated  = 7.6%
[8]:
# Remove doublets + other QC metrics
adata = adata[adata.obs['predicted_doublet'] == False]
sc.pp.filter_cells(adata, max_genes = 6000)
sc.pp.filter_cells(adata, max_counts = 35000)
adata = adata[adata.obs['pct_counts_mt'] < 5]

Normalization, HVG, neighbors, PCA, UMAP#

[9]:
# Saving count data
adata.layers["counts"] = adata.X.copy()
# Normalization (Shifted logarithm)
sc.pp.normalize_total(adata, target_sum=None)
sc.pp.log1p(adata)
# scParadise use normalized data in adata.raw!!!
adata.raw = adata
# HVG
sc.pp.highly_variable_genes(adata, n_top_genes=2000)
# PCA
sc.tl.pca(adata)
# Nearest neighbors analysis
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)
# UMAP
sc.tl.umap(adata)
# Clusterization
sc.tl.leiden(adata, resolution=0.5, key_added='leiden_0.5')
[10]:
# Visualise predicted cell types levels and prediction probabilities
sc.pl.embedding(adata,
                color = [
                    'leiden_0.5'
                ],
                basis = 'X_umap',
                frameon = False,
                add_outline = True,
                legend_loc = 'on data',
                legend_fontsize = 9,
                legend_fontoutline = 2,
                save='clusterization.png')
WARNING: saving figure to file figures\X_umapclusterization.png
../../../_images/tutorials_notebooks_scEve_Cross_species_modality_prediction_using_scEve_13_1.png
[11]:
adata.write_h5ad('mouse_clusterization.h5ad')

Observed ADT preprocessing#

[12]:
mu.prot.pp.clr(adata_adt)
[13]:
adata_adt = adata_adt[adata.obs_names]
adata_adt.obsm['X_umap'] = adata.obsm['X_umap']
[15]:
# Visualise naive T cells markers
sc.pl.embedding(adata_adt,
                color = [
                    'Ms.CD3',
                    'Ms.CD4',
                    'Ms.CD8a',
                    'Ms.CD19',
                    'Ms.IgD',
                    'Ms.NK.1.1'
                ],
                basis = 'X_umap',
                frameon = False,
                add_outline = True,
                cmap = 'bwr',
                legend_loc = 'on data',
                legend_fontsize = 9,
                legend_fontoutline = 2,
                ncols = 3,
                save='_observed_proteins.png'
               )
WARNING: saving figure to file figures\X_umap_observed_proteins.png
../../../_images/tutorials_notebooks_scEve_Cross_species_modality_prediction_using_scEve_18_1.png

scParadise prediction (scEve)#

To use the model trained on human cells, it is necessary to replace the names of mouse genes with their orthologs from the human genome. Orthologs - genes in different species that evolved from a common ancestor by speciation (Example: Acta2 in mouse, ACTA2 in human).

[16]:
# Find orthologs of mouse genes in human genome
humanized_adata = translate(adata)
100%|██████████| 13167/13167 [00:18<00:00, 709.75it/s]
Found direct orthologs for 12219 genes.
Found multiple orthologs for 235 genes.
Found no orthologs for 442 genes.
Found no index in biomart for 271 genes.
100%|██████████| 235/235 [00:13<00:00, 17.83it/s]

[17]:
# Check matrix type and add it to raw
humanized_adata.X = sparse.csc_matrix(humanized_adata.X)
humanized_adata.raw = humanized_adata
[18]:
# Available models for cell type annotation
scparadise.sceve.available_models()
WARNING: RMSE, MASE, MSE are error metrics. Lower error metric value -> Better prediction.
RMSE - Root Mean Squared Error
MSE - Mean Squared Error
MAE - Mean Absolute Error

[18]:
Tissue/Model name Description Suspension RMSE MAE Number of Proteins
0 Human_PBMC_3p Peripheral blood mononuclear cells of healthy ... cells 0.305 0.226 224
1 Human_PBMC_5p Peripheral blood mononuclear cells of healthy ... cells 0.308 0.225 54
2 Human_BMMC Bone marrow mononuclear cell of healthy adults cells 0.706 0.454 134
[19]:
# Download model for modality prediction
scparadise.sceve.download_model(model_name = 'Human_PBMC_3p',
                                save_path = '')
[20]:
# Predict surface proteins using trained model
adata_adt_pred = scparadise.sceve.predict(humanized_adata,
                                          path_model = 'Human_PBMC_3p_scEve/',
                                          return_mdata = False)
Successfully loaded list of genes used for training model

Successfully loaded list of proteins used for training model

Successfully loaded model

[21]:
# Add UMAP from RNA data
adata_adt_pred.obsm['X_umap'] = adata.obsm['X_umap']
[22]:
# Visualise predicted proteins
sc.pl.embedding(adata_adt_pred,
                color = [
                    'adt_CD3-1_pred',
                    'adt_CD4-1_pred',
                    'adt_CD8_pred',
                    'adt_CD19_pred',
                    'adt_IgD_pred',
                    'adt_CD56-2_pred'
                ],
                basis = 'X_umap',
                frameon = False,
                add_outline = True,
                cmap = 'bwr',
                legend_loc = 'on data',
                legend_fontsize = 9,
                legend_fontoutline = 2,
                ncols = 3,
                save='_predicted_proteins.png'
               )
WARNING: saving figure to file figures\X_umap_predicted_proteins.png
../../../_images/tutorials_notebooks_scEve_Cross_species_modality_prediction_using_scEve_26_1.png
[ ]:

[23]:
pip list
Package                       Version
----------------------------- ---------------
absl-py                       2.1.0
aiohttp                       3.9.3
aiosignal                     1.3.1
alabaster                     0.7.16
alembic                       1.13.2
anndata                       0.10.5.post1
anndata2ri                    1.3
annotated-types               0.6.0
annoy                         1.17.3
ansicon                       1.89.0
anyio                         4.2.0
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
array_api_compat              1.4.1
arrow                         1.3.0
astroid                       2.14.2
asttokens                     2.0.5
async-lru                     2.0.4
async-timeout                 4.0.3
atomicwrites                  1.4.0
attrs                         23.1.0
autopep8                      2.0.4
Babel                         2.11.0
backcall                      0.2.0
backoff                       2.2.1
bbknn                         1.6.0
bcrypt                        3.2.0
beautifulsoup4                4.12.2
binaryornot                   0.4.4
black                         24.8.0
bleach                        4.1.0
blessed                       1.20.0
boto3                         1.34.37
botocore                      1.34.37
celltypist                    1.6.3
certifi                       2024.8.30
cffi                          1.16.0
chardet                       4.0.0
charset-normalizer            2.0.4
chex                          0.1.7
click                         8.1.7
cloudpickle                   3.0.0
colorama                      0.4.6
colorlog                      6.8.2
comm                          0.1.2
commonmark                    0.9.1
contextlib2                   21.6.0
contourpy                     1.2.0
cookiecutter                  2.6.0
croniter                      1.4.1
cryptography                  43.0.0
cycler                        0.12.1
Cython                        3.0.8
dateutils                     0.6.12
debugpy                       1.6.7
decorator                     5.1.1
deepdiff                      6.7.1
defusedxml                    0.7.1
Deprecated                    1.2.14
diff-match-patch              20200713
dill                          0.3.8
dm-tree                       0.1.8
docrep                        0.3.2
docstring-to-markdown         0.11
docutils                      0.18.1
editor                        1.6.6
et-xmlfile                    1.1.0
etils                         1.5.2
exceptiongroup                1.2.0
executing                     0.8.3
fastapi                       0.109.2
fastjsonschema                2.16.2
fbpca                         1.0
filelock                      3.13.1
flake8                        7.0.0
flax                          0.8.1
fonttools                     4.48.1
frozenlist                    1.4.1
fsspec                        2024.2.0
geosketch                     1.2
get-annotations               0.1.2
gprofiler-official            1.0.0
greenlet                      3.0.3
h11                           0.14.0
h5py                          3.10.0
harmonypy                     0.0.9
idna                          3.4
igraph                        0.10.8
imageio                       2.36.0
imagesize                     1.4.1
imbalanced-learn              0.12.3
importlib-metadata            7.0.1
importlib-resources           6.1.1
inflection                    0.5.1
inquirer                      3.2.3
intervaltree                  3.1.0
ipykernel                     6.28.0
ipython                       8.15.0
isort                         5.13.2
itsdangerous                  2.1.2
jaraco.classes                3.2.1
jax                           0.4.23
jaxlib                        0.4.23
jedi                          0.18.1
jellyfish                     1.0.1
Jinja2                        3.1.3
jinxed                        1.2.1
jmespath                      1.0.1
joblib                        1.3.2
json5                         0.9.6
jsonschema                    4.19.2
jsonschema-specifications     2023.7.1
jupyter_client                8.6.0
jupyter_core                  5.5.0
jupyter-events                0.8.0
jupyter-lsp                   2.2.0
jupyter_server                2.10.0
jupyter_server_terminals      0.4.4
jupyterlab                    4.0.11
jupyterlab-pygments           0.1.2
jupyterlab_server             2.25.1
jupytext                      1.16.4
keyring                       24.3.1
kiwisolver                    1.4.5
lazy_loader                   0.4
lazy-object-proxy             1.10.0
leidenalg                     0.10.2
lightning                     2.0.9.post0
lightning-cloud               0.5.64
lightning-utilities           0.10.1
llvmlite                      0.42.0
louvain                       0.8.1
Mako                          1.3.5
markdown-it-py                2.2.0
MarkupSafe                    2.1.3
matplotlib                    3.8.2
matplotlib-inline             0.1.6
mccabe                        0.7.0
mdit-py-plugins               0.4.2
mdurl                         0.1.0
mistune                       2.0.4
ml-collections                0.1.1
ml-dtypes                     0.3.2
more-itertools                10.3.0
mousipy                       0.1.6
mpmath                        1.3.0
msgpack                       1.0.7
mudata                        0.2.3
multidict                     6.0.5
multipledispatch              1.0.0
muon                          0.1.6
mypy-extensions               1.0.0
natsort                       8.4.0
nbclient                      0.8.0
nbconvert                     7.10.0
nbformat                      5.9.2
nest-asyncio                  1.5.6
networkx                      3.2.1
notebook_shim                 0.2.3
numba                         0.59.0
numpy                         1.24.4
numpydoc                      1.7.0
numpyro                       0.13.2
openpyxl                      3.1.5
opt-einsum                    3.3.0
optax                         0.1.9
optuna                        3.6.1
orbax-checkpoint              0.5.3
ordered-set                   4.1.0
overrides                     7.4.0
packaging                     23.1
pandas                        1.5.3
pandocfilters                 1.5.0
paramiko                      2.8.1
parso                         0.8.3
pathspec                      0.10.3
patsy                         0.5.6
pexpect                       4.8.0
pickleshare                   0.7.5
pillow                        10.2.0
pip                           23.3.1
platformdirs                  3.10.0
pluggy                        1.0.0
ply                           3.11
prometheus-client             0.14.1
prompt-toolkit                3.0.43
protobuf                      4.25.2
psutil                        5.9.0
ptyprocess                    0.7.0
pure-eval                     0.2.2
pycodestyle                   2.11.1
pycparser                     2.21
pydantic                      2.1.1
pydantic_core                 2.4.0
pydocstyle                    6.3.0
pydot                         2.0.0
pyflakes                      3.2.0
Pygments                      2.15.1
PyJWT                         2.8.0
pylint                        2.16.2
pylint-venv                   3.0.3
pyls-spyder                   0.4.0
PyNaCl                        1.5.0
pynndescent                   0.5.11
pyparsing                     3.1.1
PyQt5                         5.15.10
PyQt5-sip                     12.13.0
PyQtWebEngine                 5.15.6
pyro-api                      0.1.2
pyro-ppl                      1.8.6
python-dateutil               2.8.2
python-igraph                 0.11.3
python-json-logger            2.0.7
python-lsp-black              2.0.0
python-lsp-jsonrpc            1.1.2
python-lsp-server             1.10.0
python-multipart              0.0.7
python-slugify                5.0.2
pytoolconfig                  1.2.6
pytorch-lightning             2.2.0
pytorch-tabnet                4.1.0
pytz                          2023.3.post1
pywin32                       305.1
pywin32-ctypes                0.2.2
pywinpty                      2.0.10
PyYAML                        6.0.1
pyzmq                         25.1.2
QDarkStyle                    3.2.3
qstylizer                     0.2.2
QtAwesome                     1.3.1
qtconsole                     5.5.1
QtPy                          2.4.1
readchar                      4.0.5
referencing                   0.30.2
requests                      2.31.0
rfc3339-validator             0.1.4
rfc3986-validator             0.1.1
rich                          13.0.0
rope                          1.12.0
rpds-py                       0.10.6
rpy2                          3.5.3
Rtree                         1.0.1
runs                          1.2.2
s3transfer                    0.10.0
scanorama                     1.7.4
scanpy                        1.9.8
scib                          1.1.4
scikit-image                  0.24.0
scikit-learn                  1.4.0
scikit-misc                   0.3.1
scipy                         1.12.0
scparadise                    0.1.1b0
scrublet                      0.2.3
scvi-tools                    1.0.4
seaborn                       0.13.2
Send2Trash                    1.8.2
session-info                  1.0.0
setuptools                    68.2.2
sip                           6.7.12
six                           1.16.0
sniffio                       1.3.0
snowballstemmer               2.2.0
sortedcontainers              2.4.0
soupsieve                     2.5
sparse                        0.15.1
SpeechRecognition             3.10.1
Sphinx                        7.3.7
sphinxcontrib-applehelp       1.0.2
sphinxcontrib-devhelp         1.0.2
sphinxcontrib-htmlhelp        2.0.0
sphinxcontrib-jsmath          1.0.1
sphinxcontrib-qthelp          1.0.3
sphinxcontrib-serializinghtml 1.1.10
spyder                        5.5.1
spyder-kernels                2.5.0
SQLAlchemy                    2.0.31
stack-data                    0.2.0
starlette                     0.36.3
starsessions                  1.3.0
statsmodels                   0.14.1
stdlib-list                   0.10.0
sympy                         1.12
tabulate                      0.9.0
tensorstore                   0.1.53
terminado                     0.17.1
text-unidecode                1.3
textdistance                  4.2.1
texttable                     1.7.0
threadpoolctl                 3.2.0
three-merge                   0.1.1
tifffile                      2024.8.30
tinycss2                      1.2.1
tomli                         2.0.1
tomlkit                       0.13.2
toolz                         0.12.1
torch                         2.2.0
torchmetrics                  1.3.0.post0
tornado                       6.3.3
tqdm                          4.66.1
traitlets                     5.7.1
types-python-dateutil         2.8.19.20240106
typing_extensions             4.9.0
tzdata                        2023.4
tzlocal                       5.2
ujson                         5.10.0
umap-learn                    0.5.5
Unidecode                     1.3.8
urllib3                       1.26.18
uvicorn                       0.27.0.post1
watchdog                      4.0.1
wcwidth                       0.2.5
webencodings                  0.5.1
websocket-client              0.58.0
websockets                    12.0
whatthepatch                  1.0.2
wheel                         0.41.2
wrapt                         1.16.0
xarray                        2024.1.1
xmod                          1.8.1
yapf                          0.40.2
yarl                          1.9.4
zipp                          3.17.0
Note: you may need to restart the kernel to use updated packages.
[ ]: