<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">ellibs</journal-id><journal-title-group><journal-title xml:lang="ru">Электронные библиотеки</journal-title><trans-title-group xml:lang="en"><trans-title>Russian Digital Libraries Journal</trans-title></trans-title-group></journal-title-group><issn pub-type="epub">1562-5419</issn><publisher><publisher-name>Казанский (Приволжский) федеральный университет</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.26907/1562-5419-2025-28-5-1253-1266</article-id><article-id custom-type="elpub" pub-id-type="custom">ellibs-618</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>Статьи</subject></subj-group></article-categories><title-group><article-title>Искусственный интеллект в решении проблемы онкопрофилактики: ретроспективное исследование</article-title><trans-title-group xml:lang="en"><trans-title>AI in Cancer Prevention: a Retrospective Study</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Филоненко</surname><given-names>Петр Александрович</given-names></name><name name-style="western" xml:lang="en"><surname>Philonenko</surname><given-names>Petr Aleksandrovich</given-names></name></name-alternatives><email xlink:type="simple">petr-filonenko@mail.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Кох</surname><given-names>Владимир Николаевич</given-names></name><name name-style="western" xml:lang="en"><surname>Kokh</surname><given-names>Vladimir Nikolaevich</given-names></name></name-alternatives><email xlink:type="simple">kokh.v.n@sber.ru</email><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Блинов</surname><given-names>Павел Дмитриевич</given-names></name><name name-style="western" xml:lang="en"><surname>Blinov</surname><given-names>Pavel Dmitrievich</given-names></name></name-alternatives><email xlink:type="simple">blinov.p.d@sber.ru</email><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru"><institution>Sber AI Lab</institution></aff><aff xml:lang="en"><institution>Sber AI Lab</institution></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru"><institution>Sber AI</institution></aff><aff xml:lang="en"><institution>Sber AI</institution></aff></aff-alternatives><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>19</day><month>12</month><year>2025</year></pub-date><volume>28</volume><issue>5</issue><fpage>1253</fpage><lpage>1266</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Филоненко П.А., Кох В.Н., Блинов П.Д., 2025</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="ru">Филоненко П.А., Кох В.Н., Блинов П.Д.</copyright-holder><copyright-holder xml:lang="en">Philonenko P.A., Kokh V.N., Blinov P.D.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://ellibs.elpub.ru/jour/article/view/618">https://ellibs.elpub.ru/jour/article/view/618</self-uri><abstract><p>Исследована возможность эффективного решения задачи популяционной онкопрофилактики с помощью методов искусственного интеллекта (ИИ), прогнозирующих риск злокачественных новообразований (ЗНО) на основе минимального набора данных из электронной медицинской карты (ЭМК) – кодов медицинских диагнозов и услуг. Для решения поставленной задачи рассмотрен широкий спектр современных подходов, включающих методы классического машинного обучения, анализа выживаемости, глубокого обучения и больших языковых моделей (LLM). Численные эксперименты показали, что наилучшей способностью ранжирования пациентов по уровню риска ЗНО обладает градиентный бустинг, использующий модели анализа выживаемости в качестве дополнительных предикторов, что позволяет учитывать как популяционные, так и индивидуальные факторы риска ЗНО. Из данных ЭМК были сконструированы предикторы, включающие демографические характеристики, паттерны обращений за медицинской помощью и клинические маркеры. Это решение было протестировано в ретроспективных экспериментах под контролем профильных врачей-онкологов. В ретроспективном эксперименте с участием более 1.9 млн пациентов установлено, что в группу риска попадает до 5.4 раза больше пациентов с ЗНО при том же уровне медицинских обследований. Предложенный метод представляет собой масштабируемое решение, использующее исключительно коды диагнозов и услуг, не требующее специализированной инфраструктуры и интегрируемое в процесс онконастороженности, что делает его применимым для решения задач популяционной онкопрофилактики.
</p></abstract><trans-abstract xml:lang="en"><p>This study investigates the feasibility of effectively solving population-scale cancer screening problems using artificial intelligence (AI) methods that predict malignant neoplasm risk based on minimal electronic health record (EHR) data – medical diagnosis and service codes. To address the formulated problem, we considered a broad spectrum of modern approaches, including classical machine learning methods, survival analysis, deep learning, and large language models (LLMs). Numerical experiments demonstrated that gradient boosting using survival analysis models as additional predictors possesses the best ability to rank patients by cancer risk level, enabling consideration of both population-level and individual risk factors for malignant neoplasms. Predictors constructed from EHR data include demographic characteristics, healthcare utilization patterns, and clinical markers. This solution was tested in retrospective experiments under the supervision of specialized oncologists. In the retrospective experiment involving more than 1.9 million patients, we established that the risk group captures up to 5.4 times more patients with cancer at the same level of medical examinations. The investigated method represents a scalable solution using exclusively diagnosis and service codes, requiring no specialized infrastructure and integrable into oncological vigilance processes, making it applicable for population-scale cancer screening.
</p></trans-abstract><kwd-group xml:lang="ru"><kwd>ИИ в медицине</kwd><kwd>популяционная онкопрофилактика</kwd><kwd>ретроспективные эксперименты</kwd></kwd-group><kwd-group xml:lang="en"><kwd>AI in medicine</kwd><kwd>cancer prevention</kwd><kwd>retrospective experiments</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Kaprin A. D., Starinskiy V. V., Shakhzadova A. O. Malignant neoplasms in Russia in 2023 (incidence and mortality) / Ed. by A. D. Kaprin, V. V. Starinskiy, A. O. Shakhzadova. Moscow: P. A. Herzen Moscow Oncology Research Institute — Branch of the National Medical Research Radiological Centre of the Ministry of Health of Russia, 2024. 276 p. ISBN 978-5-85502-298-8. (In Russian).</mixed-citation><mixed-citation xml:lang="en">Kaprin A. D., Starinskiy V. V., Shakhzadova A. O. Malignant neoplasms in Russia in 2023 (incidence and mortality) / Ed. by A. D. Kaprin, V. V. Starinskiy, A. O. Shakhzadova. Moscow: P. A. Herzen Moscow Oncology Research Institute — Branch of the National Medical Research Radiological Centre of the Ministry of Health of Russia, 2024. 276 p. ISBN 978-5-85502-298-8. (In Russian).</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Cenin D. R., Tinmouth J., Naber S. K., Khalaf N., Rabeneck L., Tinmouth J. M., Earle C. C., Hilsden R. J., Leddin D., Rostom A., Issaka R. B., Heitman S. J., Lansdorp-Vogelaar I. Calculation of stop ages for colorectal cancer screening based on comorbidities and screening history. Clinical Gastroenterology and Hepatology, 2021, vol. 19, no. 3, pp. 547–555. https://doi.org/10.1016/j.cgh.2020.05.038</mixed-citation><mixed-citation xml:lang="en">Cenin D. R., Tinmouth J., Naber S. K., Khalaf N., Rabeneck L., Tinmouth J. M., Earle C. C., Hilsden R. J., Leddin D., Rostom A., Issaka R. B., Heitman S. J., Lansdorp-Vogelaar I. Calculation of stop ages for colorectal cancer screening based on comorbidities and screening history. Clinical Gastroenterology and Hepatology, 2021, vol. 19, no. 3, pp. 547–555. https://doi.org/10.1016/j.cgh.2020.05.038</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Ratushnyak S., Hoogendoorn M., van Baal P. H. M. Cost-effectiveness of cancer screening: health and costs in life years gained. American Journal of Preventive Medicine, 2019, vol. 57, no. 6, pp. 792–799. https://doi.org/10.1016/j.amepre.2019.07.027</mixed-citation><mixed-citation xml:lang="en">Ratushnyak S., Hoogendoorn M., van Baal P. H. M. Cost-effectiveness of cancer screening: health and costs in life years gained. American Journal of Preventive Medicine, 2019, vol. 57, no. 6, pp. 792–799. https://doi.org/10.1016/j.amepre.2019.07.027</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Alexander M., Burbury K. A systematic review of biomarkers for the prediction of thromboembolism in lung cancer — Results, practical issues and proposed strategies for future risk prediction models. Thrombosis Research, 2016, vol. 148, pp. 63–69. https://doi.org/10.1016/j.thromres.2016.10.020</mixed-citation><mixed-citation xml:lang="en">Alexander M., Burbury K. A systematic review of biomarkers for the prediction of thromboembolism in lung cancer — Results, practical issues and proposed strategies for future risk prediction models. Thrombosis Research, 2016, vol. 148, pp. 63–69. https://doi.org/10.1016/j.thromres.2016.10.020</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Jacobs M. F. Predicting cancer risk based on family history. eLife, 2021, vol. 10, e73380. https://doi.org/10.7554/eLife.73380</mixed-citation><mixed-citation xml:lang="en">Jacobs M. F. Predicting cancer risk based on family history. eLife, 2021, vol. 10, e73380. https://doi.org/10.7554/eLife.73380</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Wang X., Oldani M. J., Zhao X., Huang X., Qian D. A review of cancer risk prediction models with genetic variants. Cancer Informatics, 2014, vol. 13, suppl. 2, pp. 19–28. https://doi.org/10.4137/CIN.S13788</mixed-citation><mixed-citation xml:lang="en">Wang X., Oldani M. J., Zhao X., Huang X., Qian D. A review of cancer risk prediction models with genetic variants. Cancer Informatics, 2014, vol. 13, suppl. 2, pp. 19–28. https://doi.org/10.4137/CIN.S13788</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Zhu M. Recall, precision and average precision. Technical Report, Department of Statistics and Actuarial Science, University of Waterloo, Waterloo, 2004, 6 p.</mixed-citation><mixed-citation xml:lang="en">Zhu M. Recall, precision and average precision. Technical Report, Department of Statistics and Actuarial Science, University of Waterloo, Waterloo, 2004, 6 p.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Lee C., Zame W. R., Yoon J., van der Schaar M. DeepHit: A deep learning approach to survival analysis with competing risks. Proceedings of the AAAI Conference on Artificial Intelligence, 2018, vol. 32, no. 1, pp. 2314–2321. https://doi.org/10.1609/aaai.v32i1.11842</mixed-citation><mixed-citation xml:lang="en">Lee C., Zame W. R., Yoon J., van der Schaar M. DeepHit: A deep learning approach to survival analysis with competing risks. Proceedings of the AAAI Conference on Artificial Intelligence, 2018, vol. 32, no. 1, pp. 2314–2321. https://doi.org/10.1609/aaai.v32i1.11842</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Nagpal C., Li X., Dubrawski A. Deep survival machines: Fully parametric survival regression and representation learning for censored data with competing risks. IEEE Journal of Biomedical and Health Informatics, 2021, vol. 25, no. 8, pp. 3163–3175. https://doi.org/10.1109/JBHI.2021.3052441</mixed-citation><mixed-citation xml:lang="en">Nagpal C., Li X., Dubrawski A. Deep survival machines: Fully parametric survival regression and representation learning for censored data with competing risks. IEEE Journal of Biomedical and Health Informatics, 2021, vol. 25, no. 8, pp. 3163–3175. https://doi.org/10.1109/JBHI.2021.3052441</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Babaev D., Ovsov N., Kireev I., Ivanova M., Gusev G., Nazarov I., Tuzhilin A. CoLES: Contrastive learning for event sequences with self-supervision. Proceedings of the 2022 International Conference on Management of Data (SIGMOD '22), New York, NY, USA, ACM, 2022, pp. 1190–1199. https://doi.org/10.1145/3514221.3526129</mixed-citation><mixed-citation xml:lang="en">Babaev D., Ovsov N., Kireev I., Ivanova M., Gusev G., Nazarov I., Tuzhilin A. CoLES: Contrastive learning for event sequences with self-supervision. Proceedings of the 2022 International Conference on Management of Data (SIGMOD '22), New York, NY, USA, ACM, 2022, pp. 1190–1199. https://doi.org/10.1145/3514221.3526129</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Blinov P., Kokh V. Medical profile model: scientific and practical applications in healthcare. IEEE Journal of Biomedical and Health Informatics, 2023, vol. 28, no. 1, pp. 450–458. https://doi.org/10.1109/JBHI.2023.3295631</mixed-citation><mixed-citation xml:lang="en">Blinov P., Kokh V. Medical profile model: scientific and practical applications in healthcare. IEEE Journal of Biomedical and Health Informatics, 2023, vol. 28, no. 1, pp. 450–458. https://doi.org/10.1109/JBHI.2023.3295631</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Yalunin A., Nesterov A., Umerenkov D. RuBioRoBERTa: a pre-trained biomedical language model for Russian language biomedical text mining. arXiv preprint, 2022, arXiv:2204.03951. https://doi.org/10.48550/arXiv.2204.03951</mixed-citation><mixed-citation xml:lang="en">Yalunin A., Nesterov A., Umerenkov D. RuBioRoBERTa: a pre-trained biomedical language model for Russian language biomedical text mining. arXiv preprint, 2022, arXiv:2204.03951. https://doi.org/10.48550/arXiv.2204.03951</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Philonenko P., Postovalov S. The new robust two-sample test for randomly right-censored data. Journal of Statistical Computation and Simulation, 2019, vol. 89, no. 8, pp. 1357–1375. https://doi.org/10.1080/00949655.2019.1577858</mixed-citation><mixed-citation xml:lang="en">Philonenko P., Postovalov S. The new robust two-sample test for randomly right-censored data. Journal of Statistical Computation and Simulation, 2019, vol. 89, no. 8, pp. 1357–1375. https://doi.org/10.1080/00949655.2019.1577858</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
