Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions core/migrations/0036_openaiagent_merge_system_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2.5 on 2025-12-11 09:22

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('core', '0035_alter_openaiagent_max_tokens_digest'),
]

operations = [
migrations.AddField(
model_name='openaiagent',
name='merge_system_prompt',
field=models.BooleanField(default=False, help_text="Enable for models that don't support system system instructions (e.g., Gemma 3)", verbose_name='Merge System Prompt to User Message'),
),
]
55 changes: 44 additions & 11 deletions core/models/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ class OpenAIAgent(Agent):
default=0,
help_text=_("Maximum requests per minute (0 = no limit)"),
)
merge_system_prompt = models.BooleanField(
_("Merge System Prompt to User Message"),
default=False,
help_text=_("Enable for models that don't support system system instructions (e.g., Gemma 3)")
)
EXTRA_HEADERS = {
"HTTP-Referer": "https://www.rssbox.app",
"X-Title": "RSSBox",
Expand Down Expand Up @@ -260,9 +265,30 @@ def completions(

# 应用速率限制
self._wait_for_rate_limit()

# 计算系统提示的token占用
system_prompt_tokens = get_token_count(system_prompt)

if self.merge_system_prompt:
merged_content = f"{system_prompt}\n\n{text}"
messages = [
{"role": "user", "content": merged_content}
]
# 当合并 system prompt 时,system_prompt_tokens 为 0(因为已包含在 merged_content 中)
system_prompt_tokens = 0
input_tokens = get_token_count(merged_content)
else:
messages = [
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": text
}
]
# 计算系统提示的token占用
system_prompt_tokens = get_token_count(system_prompt)
input_tokens = get_token_count(system_prompt) + get_token_count(text)

# 获取最大可用token数(保留buffer)
if self.max_tokens == 0:
task_manager.submit_task(
Expand All @@ -274,9 +300,19 @@ def completions(
"max_tokens is not set, Please wait for the model limit detection to complete"
)

max_usable_tokens = (
self.max_tokens - system_prompt_tokens - 100
) # 100 token buffer
# 计算最大可用token数
# 当 merge_system_prompt=True 时,system_prompt 已包含在 merged_content 中,所以只需要考虑 text 的 token
if self.merge_system_prompt:
# 合并模式下,需要从总限制中减去 system_prompt 的 token(因为它会占用输入空间)
system_prompt_token_cost = get_token_count(system_prompt)
max_usable_tokens = (
self.max_tokens - system_prompt_token_cost - 100
) # 100 token buffer
else:
max_usable_tokens = (
self.max_tokens - system_prompt_tokens - 100
) # 100 token buffer

# 检查文本长度是否需要分块
if get_token_count(text) > max_usable_tokens:
logger.info(
Expand Down Expand Up @@ -308,7 +344,7 @@ def completions(
return {"text": result_text, "tokens": tokens}

# 计算合理的输出token限制
input_tokens = get_token_count(system_prompt) + get_token_count(text)
# input_tokens 已经在上面根据 merge_system_prompt 的情况正确计算了,直接使用
# 输出token限制 = 模型总限制 - 输入token - 安全缓冲
output_token_limit = int(max(4096, (self.max_tokens - input_tokens) * 0.8))

Expand All @@ -328,10 +364,7 @@ def completions(
res = client.with_options(max_retries=3).chat.completions.create(
extra_headers=self.EXTRA_HEADERS,
model=self.model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
],
messages=messages,
**call_kwargs,
)
if (
Expand Down
Loading