diff --git a/core/migrations/0036_openaiagent_merge_system_prompt.py b/core/migrations/0036_openaiagent_merge_system_prompt.py new file mode 100644 index 0000000..cb5d56c --- /dev/null +++ b/core/migrations/0036_openaiagent_merge_system_prompt.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.5 on 2025-12-11 09:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0035_alter_openaiagent_max_tokens_digest'), + ] + + operations = [ + migrations.AddField( + model_name='openaiagent', + name='merge_system_prompt', + field=models.BooleanField(default=False, help_text="Enable for models that don't support system system instructions (e.g., Gemma 3)", verbose_name='Merge System Prompt to User Message'), + ), + ] diff --git a/core/models/agent.py b/core/models/agent.py index 6d0a7d8..550d838 100644 --- a/core/models/agent.py +++ b/core/models/agent.py @@ -93,6 +93,11 @@ class OpenAIAgent(Agent): default=0, help_text=_("Maximum requests per minute (0 = no limit)"), ) + merge_system_prompt = models.BooleanField( + _("Merge System Prompt to User Message"), + default=False, + help_text=_("Enable for models that don't support system system instructions (e.g., Gemma 3)") + ) EXTRA_HEADERS = { "HTTP-Referer": "https://www.rssbox.app", "X-Title": "RSSBox", @@ -260,9 +265,30 @@ def completions( # 应用速率限制 self._wait_for_rate_limit() - - # 计算系统提示的token占用 - system_prompt_tokens = get_token_count(system_prompt) + + if self.merge_system_prompt: + merged_content = f"{system_prompt}\n\n{text}" + messages = [ + {"role": "user", "content": merged_content} + ] + # 当合并 system prompt 时,system_prompt_tokens 为 0(因为已包含在 merged_content 中) + system_prompt_tokens = 0 + input_tokens = get_token_count(merged_content) + else: + messages = [ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "user", + "content": text + } + ] + # 计算系统提示的token占用 + system_prompt_tokens = get_token_count(system_prompt) + input_tokens = get_token_count(system_prompt) + get_token_count(text) + # 获取最大可用token数(保留buffer) if self.max_tokens == 0: task_manager.submit_task( @@ -274,9 +300,19 @@ def completions( "max_tokens is not set, Please wait for the model limit detection to complete" ) - max_usable_tokens = ( - self.max_tokens - system_prompt_tokens - 100 - ) # 100 token buffer + # 计算最大可用token数 + # 当 merge_system_prompt=True 时,system_prompt 已包含在 merged_content 中,所以只需要考虑 text 的 token + if self.merge_system_prompt: + # 合并模式下,需要从总限制中减去 system_prompt 的 token(因为它会占用输入空间) + system_prompt_token_cost = get_token_count(system_prompt) + max_usable_tokens = ( + self.max_tokens - system_prompt_token_cost - 100 + ) # 100 token buffer + else: + max_usable_tokens = ( + self.max_tokens - system_prompt_tokens - 100 + ) # 100 token buffer + # 检查文本长度是否需要分块 if get_token_count(text) > max_usable_tokens: logger.info( @@ -308,7 +344,7 @@ def completions( return {"text": result_text, "tokens": tokens} # 计算合理的输出token限制 - input_tokens = get_token_count(system_prompt) + get_token_count(text) + # input_tokens 已经在上面根据 merge_system_prompt 的情况正确计算了,直接使用 # 输出token限制 = 模型总限制 - 输入token - 安全缓冲 output_token_limit = int(max(4096, (self.max_tokens - input_tokens) * 0.8)) @@ -328,10 +364,7 @@ def completions( res = client.with_options(max_retries=3).chat.completions.create( extra_headers=self.EXTRA_HEADERS, model=self.model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": text}, - ], + messages=messages, **call_kwargs, ) if (