--- license: afl-3.0 --- ## Suri: Develop new algorithms that are more efficient at training models on scRNA-seq data ## Data ## Usage - Pretrain on single-cell RNA-seq data ``` python --data_path "data_path" pretrain.py ``` ## Time cost Typical install time on a "normal" desktop computer is about 30 minutes. Exptected run time for infering 10,000 cells on a "normal" desktop computer is about 25 minutes. ## Disclaimer This project is used for academic research purposes. ## Citations You can find more information in these citations if you are interested in the technical details. ```bibtex @article{yang2022scbert, title={scBERT as a large-scale pretrained deep language model for cell type annotation of single-cell RNA-seq data}, author={Yang, Fan and Wang, Wenchuan and Wang, Fang and Fang, Yuan and Tang, Duyu and Huang, Junzhou and Lu, Hui and Yao, Jianhua}, journal={Nature Machine Intelligence}, volume={4}, number={10}, pages={852--866}, year={2022}, publisher={Nature Publishing Group UK London} } ``` ```bibtex @inproceedings{choromanski2020rethinking, title = {Rethinking Attention with Performers}, author = {Krzysztof Choromanski and Valerii Likhosherstov and David Dohan and Xingyou Song and Andreea Gane and Tamas Sarlos and Peter Hawkins and Jared Davis and Afroz Mohiuddin and Lukasz Kaiser and David Belanger and Lucy Colwell and Adrian Weller}, booktitle = {International Conference on Learning Representations}, year = {2021}, } ``` ```bibtex @article{liu2023sophia, title={Sophia: A Scalable Stochastic Second-order Optimizer for Language Model Pre-training}, author={Liu, Hong and Li, Zhiyuan and Hall, David and Liang, Percy and Ma, Tengyu}, journal={arXiv preprint arXiv:2305.14342}, year={2023} } ```