<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">ellibs</journal-id><journal-title-group><journal-title xml:lang="ru">Электронные библиотеки</journal-title><trans-title-group xml:lang="en"><trans-title>Russian Digital Libraries Journal</trans-title></trans-title-group></journal-title-group><issn pub-type="epub">1562-5419</issn><publisher><publisher-name>Казанский (Приволжский) федеральный университет</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.26907/1562-5419-2025-28-5-1165-1185</article-id><article-id custom-type="elpub" pub-id-type="custom">ellibs-614</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>Статьи</subject></subj-group></article-categories><title-group><article-title>Сокрытие в смысле: семантическое кодирование для генеративно-текстовой стеганографии</article-title><trans-title-group xml:lang="en"><trans-title>Hiding in Meaning: Semantic Encoding for Generative Text Steganography</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Рогов</surname><given-names>Олег Юрьевич</given-names></name><name name-style="western" xml:lang="en"><surname>Rogov</surname><given-names>Oleg Yurievich</given-names></name></name-alternatives><email xlink:type="simple">rogov@airi.net</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Инденбом</surname><given-names>Дмитрий Евгеньевич</given-names></name><name name-style="western" xml:lang="en"><surname>Indenbom</surname><given-names>Dmitrii Evgenievich</given-names></name></name-alternatives><email xlink:type="simple">indenbom.de@phystech.edu</email><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Корж</surname><given-names>Дмитрий Сергеевич</given-names></name><name name-style="western" xml:lang="en"><surname>Korzh</surname><given-names>Dmitrii Sergeevich</given-names></name></name-alternatives><email xlink:type="simple">korzh@airi.net</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Пугачёва</surname><given-names>Дарья Валерьевна</given-names></name><name name-style="western" xml:lang="en"><surname>Pugacheva</surname><given-names>Darya Valeryaevna</given-names></name></name-alternatives><email xlink:type="simple">Daria.Pugacheva@skoltech.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Воронов</surname><given-names>Всеволод Александрович</given-names></name><name name-style="western" xml:lang="en"><surname>Voronov</surname><given-names>Vsevolod Alexandrovich</given-names></name></name-alternatives><email xlink:type="simple">v-vor@yandex.ru</email><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Тутубалина</surname><given-names>Елена Викторовна</given-names></name><name name-style="western" xml:lang="en"><surname>Tutubalina</surname><given-names>Elena Viktorovna</given-names></name></name-alternatives><email xlink:type="simple">tutubalina@airi.net</email><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru"><institution>Институт искусственного интеллекта</institution></aff><aff xml:lang="en"><institution>Artificial Intelligence Research Institute</institution></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru"><institution>Московский физико-технический институт</institution></aff><aff xml:lang="en"><institution>Moscow Institute of Physics and Technology</institution></aff></aff-alternatives><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>19</day><month>12</month><year>2025</year></pub-date><volume>28</volume><issue>5</issue><fpage>1165</fpage><lpage>1185</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Рогов О.Ю., Инденбом Д.Е., Корж Д.С., Пугачёва Д.В., Воронов В.А., Тутубалина Е.В., 2025</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="ru">Рогов О.Ю., Инденбом Д.Е., Корж Д.С., Пугачёва Д.В., Воронов В.А., Тутубалина Е.В.</copyright-holder><copyright-holder xml:lang="en">Rogov O.Y., Indenbom D.E., Korzh D.S., Pugacheva D.V., Voronov V.A., Tutubalina E.V.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://ellibs.elpub.ru/jour/article/view/614">https://ellibs.elpub.ru/jour/article/view/614</self-uri><abstract><p>В статье предложена новая система для генерации стеганографического текста, скрывающая двоичные сообщения в семантически связном естественном языке с помощью скрытого пространства, обусловливающего большие языковые модели (LLM). Секретные сообщения сначала кодируются в непрерывные векторы с помощью обученного отображения двоичного кода в скрытое пространство, которое используется для управления генерацией текста посредством донастройки префикса. В отличие от предыдущих методов стеганографии на уровне токенов или синтаксиса, наш метод позволяет избежать явной манипуляции словами и вместо этого работает полностью в скрытом семантическом пространстве, что обеспечивает более плавные и менее заметные результаты. На стороне получателя скрытое представление восстанавливается из сгенерированного текста и декодируется обратно в исходное сообщение. 
В качестве ключевого теоретического вклада мы предоставляем гарантию надежности: если восстановленный скрытый вектор находится в пределах ограниченного расстояния от изначального, обеспечивается точное восстановление сообщения, причем граница определяется константой Липшица декодера и минимальным отступом логитов. Этот формальный результат предлагает принципиальный подход к компромиссу между надежностью и емкостью в скрытых стеганографических системах. Эмпирическая оценка как на синтетических данных, так и в практических предметных областях, таких как отзывы на Amazon, показывает, что наш метод достигает высокой точности восстановления сообщений (выше 91%), высокую плавность текста и конкурентоспособную емкость до 6 бит на элемент предложения, сохраняя при этом устойчивость к нейронному стегоанализу. Эти результаты демонстрируют, что генерация со скрытым условием предлагает безопасный и практичный путь для встраивания информации в современные LLM.
</p></abstract><trans-abstract xml:lang="en"><p>We propose a novel framework for steganographic text generation that hides binary messages within semantically coherent natural language using latent-space conditioning of large language models (LLMs). Secret messages are first encoded into continuous vectors via a learned binary-to-latent mapping, which is used to guide text generation through prefix tuning. Unlike prior token-level or syntactic steganography, our method avoids explicit word manipulation and instead operates entirely within the latent semantic space, enabling more fluent and less detectable outputs. On the receiver side, the latent representation is recovered from the generated text and decoded back into the original message. As a key theoretical contribution, we provide a robustness guarantee: if the recovered latent vector lies within a bounded distance of the original, exact message reconstruction is ensured, with the bound determined by the decoder’s Lipschitz continuity and the minimum logit margin. This formal result offers a principled view of the reliability–capacity trade-off in latent steganographic systems. Empirical evaluation on both synthetic data and real-world domains such as Amazon reviews shows that our method achieves high message recovery accuracy (above 91%), strong text fluency and competitive capacity up to 6 bits per sentence element while maintaining resilience against neural steganalysis. These findings demonstrate that latent conditioned generation offers a secure and practical pathway for embedding information in modern LLMs.
</p></trans-abstract><kwd-group xml:lang="ru"><kwd>стеганография</kwd><kwd>семантическое кодирование</kwd><kwd>языковые модели</kwd><kwd>донастройка префиксов</kwd><kwd>граф знаний</kwd><kwd>генерация естественного языка</kwd><kwd>скрытое обусловливание</kwd><kwd>нейронный стегоанализ</kwd></kwd-group><kwd-group xml:lang="en"><kwd>steganography</kwd><kwd>semantic encoding</kwd><kwd>language models</kwd><kwd>prefix tuning</kwd><kwd>knowledge graphs</kwd><kwd>natural language generation</kwd><kwd>latent conditioning</kwd><kwd>neural steganalysis</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Karimov E., Varlamov A., Ivanov D., Korzh D., and Rogov O.Y. Novel. LossEnhanced Universal Adversarial Patches for Sustainable Speaker Privacy. — 2025. — 2505.19951.</mixed-citation><mixed-citation xml:lang="en">Karimov E., Varlamov A., Ivanov D., Korzh D., and Rogov O.Y. Novel. LossEnhanced Universal Adversarial Patches for Sustainable Speaker Privacy. — 2025. — 2505.19951.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Moraldo H.H. An Approach for Text Steganography Based on Markov Chains // ArXiv. 2014. Vol. abs/1409.0915.</mixed-citation><mixed-citation xml:lang="en">Moraldo H.H. An Approach for Text Steganography Based on Markov Chains // ArXiv. 2014. Vol. abs/1409.0915.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Fang T., Jaggi M., Argyraki K. Generating steganographic text with LSTMs // arXiv preprint arXiv:1705.10742. 2017.</mixed-citation><mixed-citation xml:lang="en">Fang T., Jaggi M., Argyraki K. Generating steganographic text with LSTMs // arXiv preprint arXiv:1705.10742. 2017.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Yang Z.-L., Guo X.-Q., Chen Z.-M., Huang Y.-F., Zhang Y.-J. RNN-stega: Linguistic steganography based on recurrent neural networks // IEEE Transactions on Information Forensics and Security. 2018. Vol. 14, No. 5. P. 1280–1295.</mixed-citation><mixed-citation xml:lang="en">Yang Z.-L., Guo X.-Q., Chen Z.-M., Huang Y.-F., Zhang Y.-J. RNN-stega: Linguistic steganography based on recurrent neural networks // IEEE Transactions on Information Forensics and Security. 2018. Vol. 14, No. 5. P. 1280–1295.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Yang Z.-L., Zhang S.-Y., Hu Y.-T., Hu Z.-W., Huang Y.-F. VAE-Stega: linguistic steganography based on variational auto-encoder // IEEE Transactions on Information Forensics and Security. 2020. Vol. 16. P. 880–895.</mixed-citation><mixed-citation xml:lang="en">Yang Z.-L., Zhang S.-Y., Hu Y.-T., Hu Z.-W., Huang Y.-F. VAE-Stega: linguistic steganography based on variational auto-encoder // IEEE Transactions on Information Forensics and Security. 2020. Vol. 16. P. 880–895.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Ziegler Z., Deng Y., Rush A. M. Neural Linguistic Steganography // Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). 2019. P. 1210–1215.</mixed-citation><mixed-citation xml:lang="en">Ziegler Z., Deng Y., Rush A. M. Neural Linguistic Steganography // Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). 2019. P. 1210–1215.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Dai F.Z., Cai Z. Towards near-imperceptible steganographic text // arXiv preprint arXiv:1907.06679. 2019.</mixed-citation><mixed-citation xml:lang="en">Dai F.Z., Cai Z. Towards near-imperceptible steganographic text // arXiv preprint arXiv:1907.06679. 2019.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Zhang S., Yang Z., Yang J., Huang Y. Provably Secure Generative Linguistic Steganography// Findings of the AssociationforComputational Linguistics: ACLIJCNLP 2021. 2021. P. 3046–3055.</mixed-citation><mixed-citation xml:lang="en">Zhang S., Yang Z., Yang J., Huang Y. Provably Secure Generative Linguistic Steganography// Findings of the AssociationforComputational Linguistics: ACLIJCNLP 2021. 2021. P. 3046–3055.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Ding J., Chen K., Wang Y., Zhao N., Zhang W., Yu N. Discop: Provably Secure Steganography in Practice Based on “Distribution Copies” // 2023 IEEE Symposium on Security and Privacy (SP) / IEEE Computer Society. 2023. P. 2238– 2255.</mixed-citation><mixed-citation xml:lang="en">Ding J., Chen K., Wang Y., Zhao N., Zhang W., Yu N. Discop: Provably Secure Steganography in Practice Based on “Distribution Copies” // 2023 IEEE Symposium on Security and Privacy (SP) / IEEE Computer Society. 2023. P. 2238– 2255.</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Borisov V., Seßler K., Leemann T., Pawelczyk M., Kasneci G. Languagemodels are realistic tabular data generators // arXiv preprint arXiv:2210.06280. 2022.</mixed-citation><mixed-citation xml:lang="en">Borisov V., Seßler K., Leemann T., Pawelczyk M., Kasneci G. Languagemodels are realistic tabular data generators // arXiv preprint arXiv:2210.06280. 2022.</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Chia Y.K., Bing L., Poria S., Si L. RelationPrompt: Leveraging prompts to generate synthetic data for zero-shot relation triplet extraction // arXiv preprint arXiv:2203.09101. 2022.</mixed-citation><mixed-citation xml:lang="en">Chia Y.K., Bing L., Poria S., Si L. RelationPrompt: Leveraging prompts to generate synthetic data for zero-shot relation triplet extraction // arXiv preprint arXiv:2203.09101. 2022.</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Schick T., Schütze H. Generating datasets with pretrained language models // arXiv preprint arXiv:2104.07540. 2021.</mixed-citation><mixed-citation xml:lang="en">Schick T., Schütze H. Generating datasets with pretrained language models // arXiv preprint arXiv:2104.07540. 2021.</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Meng Y., Huang J., Zhang Y., Han J. Generating training data with language models: Towards zero-shot language understanding // Advances in Neural Information Processing Systems. 2022. Vol. 35. P. 462–477.</mixed-citation><mixed-citation xml:lang="en">Meng Y., Huang J., Zhang Y., Han J. Generating training data with language models: Towards zero-shot language understanding // Advances in Neural Information Processing Systems. 2022. Vol. 35. P. 462–477.</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Ye J., Gao J., Li Q., Xu H., Feng J., Wu Z., Yu T., Kong L. Zerogen: Efficient zero-shot learning via dataset generation // arXiv preprint arXiv:2202.07922. 2022.</mixed-citation><mixed-citation xml:lang="en">Ye J., Gao J., Li Q., Xu H., Feng J., Wu Z., Yu T., Kong L. Zerogen: Efficient zero-shot learning via dataset generation // arXiv preprint arXiv:2202.07922. 2022.</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Wang Y., Ma X., Chen Z., Luo Y., Yi J., Bailey J. Symmetric cross entropy for robust learning with noisy labels // Proceedings of the IEEE/CVF international conference on computer vision. 2019. P. 322–330.</mixed-citation><mixed-citation xml:lang="en">Wang Y., Ma X., Chen Z., Luo Y., Yi J., Bailey J. Symmetric cross entropy for robust learning with noisy labels // Proceedings of the IEEE/CVF international conference on computer vision. 2019. P. 322–330.</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Gao J., Pi R., Yong L., Xu H., Ye J., Wu Z., Zhang W., Liang X., Li Z., Kong L. Self-guided noise-free data generation for efficient zero-shot learning // International Conference on Learning Representations (ICLR 2023). 2023.</mixed-citation><mixed-citation xml:lang="en">Gao J., Pi R., Yong L., Xu H., Ye J., Wu Z., Zhang W., Liang X., Li Z., Kong L. Self-guided noise-free data generation for efficient zero-shot learning // International Conference on Learning Representations (ICLR 2023). 2023.</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Chen D., Lee C., Lu Y., Rosati D., Yu Z. Mixture of Soft Prompts for Controllable Data Generation // arXiv preprint arXiv:2303.01580. 2023.</mixed-citation><mixed-citation xml:lang="en">Chen D., Lee C., Lu Y., Rosati D., Yu Z. Mixture of Soft Prompts for Controllable Data Generation // arXiv preprint arXiv:2303.01580. 2023.</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Yu Y., Zhuang Y., Zhang J., Meng Y., Ratner A., Krishna R., Shen J., Zhang C. Large language model as attributed training data generator: A tale of diversity and bias // arXiv preprint arXiv:2306.15895. 2023.</mixed-citation><mixed-citation xml:lang="en">Yu Y., Zhuang Y., Zhang J., Meng Y., Ratner A., Krishna R., Shen J., Zhang C. Large language model as attributed training data generator: A tale of diversity and bias // arXiv preprint arXiv:2306.15895. 2023.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
