From a30566e1bf8c865cdca76ef5d367d7e179d4b02a Mon Sep 17 00:00:00 2001 From: hariprasathys22 Date: Thu, 11 Dec 2025 14:58:27 +0530 Subject: [PATCH 1/2] the system instruction inserted in the begining of user prompt --- .../0036_openaiagent_merge_system_prompt.py | 18 ++++++++++ core/models/agent.py | 33 +++++++++++++++---- 2 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 core/migrations/0036_openaiagent_merge_system_prompt.py diff --git a/core/migrations/0036_openaiagent_merge_system_prompt.py b/core/migrations/0036_openaiagent_merge_system_prompt.py new file mode 100644 index 0000000..cb5d56c --- /dev/null +++ b/core/migrations/0036_openaiagent_merge_system_prompt.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.5 on 2025-12-11 09:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0035_alter_openaiagent_max_tokens_digest'), + ] + + operations = [ + migrations.AddField( + model_name='openaiagent', + name='merge_system_prompt', + field=models.BooleanField(default=False, help_text="Enable for models that don't support system system instructions (e.g., Gemma 3)", verbose_name='Merge System Prompt to User Message'), + ), + ] diff --git a/core/models/agent.py b/core/models/agent.py index 6d0a7d8..013b37d 100644 --- a/core/models/agent.py +++ b/core/models/agent.py @@ -93,6 +93,11 @@ class OpenAIAgent(Agent): default=0, help_text=_("Maximum requests per minute (0 = no limit)"), ) + merge_system_prompt = models.BooleanField( + _("Merge System Prompt to User Message"), + default=False, + help_text=_("Enable for models that don't support system system instructions (e.g., Gemma 3)") + ) EXTRA_HEADERS = { "HTTP-Referer": "https://www.rssbox.app", "X-Title": "RSSBox", @@ -260,9 +265,28 @@ def completions( # 应用速率限制 self._wait_for_rate_limit() - + + if self.merge_system_prompt: + merged_content = f"{system_prompt}\n\n{text}" + messages = [ + {"role": "user", "content": merged_content} + ] + system_prompt_tokens = 0 + input_tokens = get_token_count(merged_content) + else: + messages = [ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "user", + "content": text + } + ] # 计算系统提示的token占用 - system_prompt_tokens = get_token_count(system_prompt) + system_prompt_tokens = get_token_count(system_prompt) + input_tokens = get_token_count(system_prompt) + get_token_count(text) # 获取最大可用token数(保留buffer) if self.max_tokens == 0: task_manager.submit_task( @@ -328,10 +352,7 @@ def completions( res = client.with_options(max_retries=3).chat.completions.create( extra_headers=self.EXTRA_HEADERS, model=self.model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": text}, - ], + messages=messages, **call_kwargs, ) if ( From 072763aa877eab313a611e29a8334e6c7459db77 Mon Sep 17 00:00:00 2001 From: Versun Date: Thu, 11 Dec 2025 20:27:26 +0800 Subject: [PATCH 2/2] refactor: Improve token calculation logic in OpenAIAgent for merged system prompts --- core/models/agent.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/core/models/agent.py b/core/models/agent.py index 013b37d..550d838 100644 --- a/core/models/agent.py +++ b/core/models/agent.py @@ -271,6 +271,7 @@ def completions( messages = [ {"role": "user", "content": merged_content} ] + # 当合并 system prompt 时,system_prompt_tokens 为 0(因为已包含在 merged_content 中) system_prompt_tokens = 0 input_tokens = get_token_count(merged_content) else: @@ -284,9 +285,10 @@ def completions( "content": text } ] - # 计算系统提示的token占用 + # 计算系统提示的token占用 system_prompt_tokens = get_token_count(system_prompt) input_tokens = get_token_count(system_prompt) + get_token_count(text) + # 获取最大可用token数(保留buffer) if self.max_tokens == 0: task_manager.submit_task( @@ -298,9 +300,19 @@ def completions( "max_tokens is not set, Please wait for the model limit detection to complete" ) - max_usable_tokens = ( - self.max_tokens - system_prompt_tokens - 100 - ) # 100 token buffer + # 计算最大可用token数 + # 当 merge_system_prompt=True 时,system_prompt 已包含在 merged_content 中,所以只需要考虑 text 的 token + if self.merge_system_prompt: + # 合并模式下,需要从总限制中减去 system_prompt 的 token(因为它会占用输入空间) + system_prompt_token_cost = get_token_count(system_prompt) + max_usable_tokens = ( + self.max_tokens - system_prompt_token_cost - 100 + ) # 100 token buffer + else: + max_usable_tokens = ( + self.max_tokens - system_prompt_tokens - 100 + ) # 100 token buffer + # 检查文本长度是否需要分块 if get_token_count(text) > max_usable_tokens: logger.info( @@ -332,7 +344,7 @@ def completions( return {"text": result_text, "tokens": tokens} # 计算合理的输出token限制 - input_tokens = get_token_count(system_prompt) + get_token_count(text) + # input_tokens 已经在上面根据 merge_system_prompt 的情况正确计算了,直接使用 # 输出token限制 = 模型总限制 - 输入token - 安全缓冲 output_token_limit = int(max(4096, (self.max_tokens - input_tokens) * 0.8))