Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ htmlcov
.venv
.DS_Store
.env
.env.test
.env.test
metrics.json
predictions.csv
12 changes: 7 additions & 5 deletions backend/app/api/routes/guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,17 @@ async def _validate_with_guard(
)

def add_validator_logs(guard: Guard, request_log_id: UUID, validator_log_crud: ValidatorLogCrud):
if not guard or not guard.history or not guard.history.last:
history = getattr(guard, "history", None)
if not history:
return

call = guard.history.last
if not call.iterations:
last_call = getattr(history, "last", None)
if not last_call or not getattr(last_call, "iterations", None):
return

iteration = call.iterations[-1]
if not iteration.outputs or not iteration.outputs.validator_logs:
iteration = last_call.iterations[-1]
outputs = getattr(iteration, "outputs", None)
if not outputs or not getattr(outputs, "validator_logs", None):
return

for log in iteration.outputs.validator_logs:
Expand Down
14 changes: 14 additions & 0 deletions backend/app/eval/common/io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pathlib import Path
import json
import pandas as pd


def write_csv(df: pd.DataFrame, path: Path):
path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(path, index=False)


def write_json(obj: dict, path: Path):
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
json.dump(obj, f, indent=2)
19 changes: 19 additions & 0 deletions backend/app/eval/common/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
def compute_binary_metrics(y_true, y_pred):
tp = sum((yt == 1 and yp == 1) for yt, yp in zip(y_true, y_pred))
tn = sum((yt == 0 and yp == 0) for yt, yp in zip(y_true, y_pred))
fp = sum((yt == 0 and yp == 1) for yt, yp in zip(y_true, y_pred))
fn = sum((yt == 1 and yp == 0) for yt, yp in zip(y_true, y_pred))
Comment on lines +1 to +5
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

git ls-files | grep -E "(backend/app|setup|pyproject|Pipfile)" | head -20

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 727


🏁 Script executed:

cat backend/app/eval/common/metrics.py 2>/dev/null || echo "File not found, searching for it..."
fd -t f -name "metrics.py"

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 1003


🏁 Script executed:

fd -t f -g 'pyproject.toml' -o -g 'setup.cfg' -o -g 'setup.py' -o -g '.python-version' -o -g 'Pipfile'

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 194


🏁 Script executed:

find . -type f \( -name "pyproject.toml" -o -name "setup.cfg" -o -name "setup.py" -o -name ".python-version" \) | head -10

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 102


🏁 Script executed:

cat pyproject.toml 2>/dev/null || cat setup.py 2>/dev/null || cat setup.cfg 2>/dev/null || echo "No config files found"

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 99


🏁 Script executed:

rg "python|requires.python" -i --type toml --type txt --max-count 5

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 164


🏁 Script executed:

cat backend/pyproject.toml

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 2352


Add strict=True to zip() calls to prevent silent truncation.

The zip() calls on lines 2–5 will silently drop items if y_true and y_pred have different lengths, producing incorrect metrics. Since the project requires Python ≥3.10, use strict=True to raise a ValueError immediately on length mismatch.

Proposed fix
-    tp = sum((yt == 1 and yp == 1) for yt, yp in zip(y_true, y_pred))
-    tn = sum((yt == 0 and yp == 0) for yt, yp in zip(y_true, y_pred))
-    fp = sum((yt == 0 and yp == 1) for yt, yp in zip(y_true, y_pred))
-    fn = sum((yt == 1 and yp == 0) for yt, yp in zip(y_true, y_pred))
+    tp = sum((yt == 1 and yp == 1) for yt, yp in zip(y_true, y_pred, strict=True))
+    tn = sum((yt == 0 and yp == 0) for yt, yp in zip(y_true, y_pred, strict=True))
+    fp = sum((yt == 0 and yp == 1) for yt, yp in zip(y_true, y_pred, strict=True))
+    fn = sum((yt == 1 and yp == 0) for yt, yp in zip(y_true, y_pred, strict=True))
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def compute_binary_metrics(y_true, y_pred):
tp = sum((yt == 1 and yp == 1) for yt, yp in zip(y_true, y_pred))
tn = sum((yt == 0 and yp == 0) for yt, yp in zip(y_true, y_pred))
fp = sum((yt == 0 and yp == 1) for yt, yp in zip(y_true, y_pred))
fn = sum((yt == 1 and yp == 0) for yt, yp in zip(y_true, y_pred))
def compute_binary_metrics(y_true, y_pred):
tp = sum((yt == 1 and yp == 1) for yt, yp in zip(y_true, y_pred, strict=True))
tn = sum((yt == 0 and yp == 0) for yt, yp in zip(y_true, y_pred, strict=True))
fp = sum((yt == 0 and yp == 1) for yt, yp in zip(y_true, y_pred, strict=True))
fn = sum((yt == 1 and yp == 0) for yt, yp in zip(y_true, y_pred, strict=True))
🧰 Tools
🪛 Ruff (0.14.14)

2-2: zip() without an explicit strict= parameter

Add explicit value for parameter strict=

(B905)


3-3: zip() without an explicit strict= parameter

Add explicit value for parameter strict=

(B905)


4-4: zip() without an explicit strict= parameter

Add explicit value for parameter strict=

(B905)


5-5: zip() without an explicit strict= parameter

Add explicit value for parameter strict=

(B905)

🤖 Prompt for AI Agents
In `@backend/app/eval/common/metrics.py` around lines 1 - 5,
compute_binary_metrics currently uses zip(y_true, y_pred) which silently
truncates on length mismatch; update each zip call in compute_binary_metrics
(the lines computing tp, tn, fp, fn) to use zip(y_true, y_pred, strict=True) so
a ValueError is raised if lengths differ, preserving correct metric
calculations.


precision = tp / (tp + fp) if tp + fp else 0.0
recall = tp / (tp + fn) if tp + fn else 0.0
f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0

return {
"tp": tp,
"tn": tn,
"fp": fp,
"fn": fn,
"precision": precision,
"recall": recall,
"f1": f1,
}
19 changes: 19 additions & 0 deletions backend/app/eval/common/profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import time
import tracemalloc

class Profiler:
def __enter__(self):
self.latencies = []
tracemalloc.start()
return self

def record(self, fn, *args):
start = time.perf_counter()
result = fn(*args)
self.latencies.append((time.perf_counter() - start) * 1000)
return result

def __exit__(self, *args):
_, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
self.peak_memory_mb = peak / (1024 * 1024)
602 changes: 602 additions & 0 deletions backend/app/eval/datasets/lexical_slur_testing_dataset.csv

Large diffs are not rendered by default.

420 changes: 420 additions & 0 deletions backend/app/eval/datasets/pii_detection_testing_dataset.csv

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions backend/app/eval/lexical_slur/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path
import pandas as pd
from guardrails.validators import FailResult

from app.core.validators.lexical_slur import LexicalSlur
from app.eval.common.metrics import compute_binary_metrics
from app.eval.common.profiling import Profiler
from app.eval.common.io import write_csv, write_json

BASE_DIR = Path(__file__).resolve().parent.parent
OUT_DIR = BASE_DIR / "outputs" / "lexical_slur"

df = pd.read_csv(BASE_DIR / "datasets" / "lexical_slur_testing_dataset.csv")

validator = LexicalSlur()

with Profiler() as p:
df["result"] = df["commentText"].astype(str).apply(
lambda x: p.record(lambda t: validator.validate(t, metadata=None), x)
)

df["y_pred"] = df["result"].apply(lambda r: int(isinstance(r, FailResult)))
df["y_true"] = df["label"]

metrics = compute_binary_metrics(df["y_true"], df["y_pred"])

# ---- Save outputs ----
write_csv(df.drop(columns=["result"]), OUT_DIR / "predictions.csv")

write_json(
{
"guardrail": "lexical_slur",
"num_samples": len(df),
"metrics": metrics,
"performance": {
"latency_ms": {
"mean": sum(p.latencies) / len(p.latencies),
"p95": sorted(p.latencies)[int(len(p.latencies) * 0.95)],
"max": max(p.latencies),
},
Comment on lines +35 to +40
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Guard latency stats for empty datasets.
If the dataset is empty, mean, p95, and max will raise. A small guard avoids failures in edge cases.

🛠 Proposed guard
+latencies = p.latencies
+if latencies:
+    latency_stats = {
+        "mean": sum(latencies) / len(latencies),
+        "p95": sorted(latencies)[int(len(latencies) * 0.95)],
+        "max": max(latencies),
+    }
+else:
+    latency_stats = {"mean": 0.0, "p95": 0.0, "max": 0.0}
+
 write_json(
     {
         "guardrail": "lexical_slur",
         "num_samples": len(df),
         "metrics": metrics,
         "performance": {
             "latency_ms": {
-                "mean": sum(p.latencies) / len(p.latencies),
-                "p95": sorted(p.latencies)[int(len(p.latencies) * 0.95)],
-                "max": max(p.latencies),
+                **latency_stats,
             },
             "memory_mb": p.peak_memory_mb,
         },
     },
     OUT_DIR / "metrics.json",
 )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
"performance": {
"latency_ms": {
"mean": sum(p.latencies) / len(p.latencies),
"p95": sorted(p.latencies)[int(len(p.latencies) * 0.95)],
"max": max(p.latencies),
},
latencies = p.latencies
if latencies:
latency_stats = {
"mean": sum(latencies) / len(latencies),
"p95": sorted(latencies)[int(len(latencies) * 0.95)],
"max": max(latencies),
}
else:
latency_stats = {"mean": 0.0, "p95": 0.0, "max": 0.0}
write_json(
{
"guardrail": "lexical_slur",
"num_samples": len(df),
"metrics": metrics,
"performance": {
"latency_ms": {
**latency_stats,
},
"memory_mb": p.peak_memory_mb,
},
},
OUT_DIR / "metrics.json",
)
🤖 Prompt for AI Agents
In `@backend/app/eval/lexical_slur/run.py` around lines 35 - 40, The performance
latency computation in run.py assumes p.latencies is non-empty and will throw on
empty datasets; update the "performance" block to guard p.latencies (e.g., check
if p.latencies truthy) and only compute mean, p95 (sorted index), and max when
there are values, otherwise set those fields to a safe default such as None (or
0) so empty datasets don't raise; locate the code using p.latencies in the
"performance": {"latency_ms": ...} block and wrap or inline-conditional the
mean, p95, and max calculations accordingly.

"memory_mb": p.peak_memory_mb,
},
},
OUT_DIR / "metrics.json",
)
83 changes: 83 additions & 0 deletions backend/app/eval/pii/entity_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import re
from collections import defaultdict
from typing import Iterable, Dict, Set

# Matches placeholders like [PHONE_NUMBER], <IN_PAN>, etc.
ENTITY_PATTERN = re.compile(r"[\[<]([A-Z0-9_]+)[\]>]")


def extract_entities(text: str) -> Set[str]:
"""
Extract entity labels from a masked/anonymized string.
Examples:
"Call me at [PHONE_NUMBER]" -> {"PHONE_NUMBER"}
"<IN_PAN> <PHONE_NUMBER>" -> {"IN_PAN", "PHONE_NUMBER"}
"""
if not isinstance(text, str):
return set()
return set(ENTITY_PATTERN.findall(text))


def compare_entities(gold: Set[str], pred: Set[str]):
"""
Compare gold vs predicted entity sets.
"""
tp = gold & pred # correctly detected
fn = gold - pred # missed entities
fp = pred - gold # hallucinated entities
return tp, fp, fn


def compute_entity_metrics(
gold_texts: Iterable[str],
pred_texts: Iterable[str],
) -> Dict[str, dict]:
"""
Compute per-entity TP / FP / FN counts across the dataset.
"""
stats = defaultdict(lambda: {"tp": 0, "fp": 0, "fn": 0})

for gold_txt, pred_txt in zip(gold_texts, pred_texts):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# Find Python version requirement
fd -type f -name "setup.py" -o -name "pyproject.toml" -o -name ".python-version" -o -name "requirements.txt" | head -20

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 248


🏁 Script executed:

# Read the file and surrounding context
cat -n backend/app/eval/pii/entity_metrics.py | head -80

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 2861


🏁 Script executed:

# Also check if there are any other zip calls to understand the pattern
rg "zip\(" backend/app/eval/pii/ -A 2 -B 2

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 495


🏁 Script executed:

# Search for Python version requirement
rg "python_requires|version.*python|3\.[0-9]+" -t py --max-count=10 | head -20

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 60


🏁 Script executed:

# Look for pyproject.toml or setup files
cat pyproject.toml 2>/dev/null | head -40

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 60


🏁 Script executed:

# Check for any test files or examples showing usage of compute_entity_metrics
rg "compute_entity_metrics" --type py -B 3 -A 3

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 1305


🏁 Script executed:

# Search all config files
find . -name "setup.py" -o -name "pyproject.toml" -o -name "setup.cfg" -o -name ".python-version" 2>/dev/null | head -10

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 102


🏁 Script executed:

# Check if pyproject.toml exists in repo root
ls -la | grep -E "pyproject|setup|requirements"

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 60


🏁 Script executed:

# Check the actual run.py usage context
cat -n backend/app/eval/pii/run.py | grep -A 10 "entity_report = compute_entity_metrics"

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 365


🏁 Script executed:

# Check the pyproject.toml for Python version
cat backend/pyproject.toml

Repository: ProjectTech4DevAI/kaapi-guardrails

Length of output: 2352


Add strict=True to zip to catch length mismatches.
zip() without strict=True silently truncates when iterables have unequal lengths, which would produce incorrect entity metrics. Since the project requires Python 3.10+, use strict=True.

🔧 Proposed fix
-    for gold_txt, pred_txt in zip(gold_texts, pred_texts):
+    for gold_txt, pred_txt in zip(gold_texts, pred_texts, strict=True):
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for gold_txt, pred_txt in zip(gold_texts, pred_texts):
for gold_txt, pred_txt in zip(gold_texts, pred_texts, strict=True):
🧰 Tools
🪛 Ruff (0.14.14)

41-41: zip() without an explicit strict= parameter

Add explicit value for parameter strict=

(B905)

🤖 Prompt for AI Agents
In `@backend/app/eval/pii/entity_metrics.py` at line 41, The loop pairing gold and
predicted texts uses zip without strict checking; update the loop "for gold_txt,
pred_txt in zip(gold_texts, pred_texts):" in entity_metrics.py to use
strict=True (i.e., zip(gold_texts, pred_texts, strict=True)) so a length
mismatch raises immediately; ensure any callers that rely on silent truncation
are adjusted or tests updated if needed.

gold_entities = extract_entities(gold_txt)
pred_entities = extract_entities(pred_txt)

tp, fp, fn = compare_entities(gold_entities, pred_entities)

for e in tp:
stats[e]["tp"] += 1
for e in fp:
stats[e]["fp"] += 1
for e in fn:
stats[e]["fn"] += 1

return finalize_entity_metrics(stats)


def finalize_entity_metrics(stats: Dict[str, dict]) -> Dict[str, dict]:
"""
Convert raw counts into precision / recall / F1 per entity.
"""
report = {}

for entity, s in stats.items():
tp, fp, fn = s["tp"], s["fp"], s["fn"]

precision = tp / (tp + fp) if (tp + fp) else 0.0
recall = tp / (tp + fn) if (tp + fn) else 0.0
f1 = (
2 * precision * recall / (precision + recall)
if (precision + recall)
else 0.0
)

report[entity] = {
"tp": tp,
"fp": fp,
"fn": fn,
"precision": precision,
"recall": recall,
"f1": f1,
}

return report
39 changes: 39 additions & 0 deletions backend/app/eval/pii/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pathlib import Path
import pandas as pd
from guardrails.validators import FailResult

from app.core.validators.pii_remover import PIIRemover
from app.eval.pii.entity_metrics import compute_entity_metrics
from app.eval.common.io import write_csv, write_json

BASE_DIR = Path(__file__).resolve().parent.parent
OUT_DIR = BASE_DIR / "outputs" / "pii_remover"

df = pd.read_csv(BASE_DIR / "datasets" / "pii_detection_testing_dataset.csv")

validator = PIIRemover()

def run_pii(text: str) -> str:
result = validator._validate(text)
if isinstance(result, FailResult):
return result.fix_value
return text

df["anonymized"] = df["source_text"].astype(str).apply(run_pii)

entity_report = compute_entity_metrics(
df["target_text"],
df["anonymized"],
)

# ---- Save outputs ----
write_csv(df, OUT_DIR / "predictions.csv")

Comment on lines +29 to +31
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Avoid exporting raw PII in predictions artifacts.
predictions.csv currently includes source_text, which likely contains raw PII. If these artifacts are shared or stored, this becomes a compliance/privacy risk. Consider excluding raw text (or gating it behind an explicit flag).

🔒 Proposed safer export
-# ---- Save outputs ----
-write_csv(df, OUT_DIR / "predictions.csv")
+# ---- Save outputs ----
+# Avoid exporting raw PII by default
+write_csv(df.drop(columns=["source_text"]), OUT_DIR / "predictions.csv")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# ---- Save outputs ----
write_csv(df, OUT_DIR / "predictions.csv")
# ---- Save outputs ----
# Avoid exporting raw PII by default
write_csv(df.drop(columns=["source_text"]), OUT_DIR / "predictions.csv")
🤖 Prompt for AI Agents
In `@backend/app/eval/pii/run.py` around lines 29 - 31, The export currently
writes raw text including potential PII to predictions.csv; before calling
write_csv(df, OUT_DIR / "predictions.csv") remove or mask the source_text column
(or any columns named source_text, text, raw_text, etc.) from df, or gate
inclusion behind an explicit flag (e.g., a keep_raw_text boolean) that defaults
to false; update the code path that prepares df (the variable named df) so
write_csv only receives non-PII columns, and ensure the change is applied where
write_csv and OUT_DIR / "predictions.csv" are used to prevent accidental export
of raw PII.

write_json(
{
"guardrail": "pii_remover",
"num_samples": len(df),
"entity_metrics": entity_report,
},
OUT_DIR / "metrics.json",
)
40 changes: 24 additions & 16 deletions backend/app/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
import os
from unittest.mock import MagicMock
# Set environment before importing ANYTHING else
os.environ["ENVIRONMENT"] = "testing"

import pytest
from fastapi.testclient import TestClient

# MUST be set before app import
os.environ["ENVIRONMENT"] = "testing"

from app.api.deps import SessionDep, verify_bearer_token
from app.api.routes import guardrails
from app.main import app

@pytest.fixture(scope="function", autouse=True)
def override_dependencies():
def override_dependencies(monkeypatch):
"""
Override ALL external dependencies:
- Auth
- DB session
- RequestLogCrud
- CRUDs
"""

# ---- Auth override ----
Expand All @@ -27,26 +28,33 @@ def override_dependencies():
app.dependency_overrides[SessionDep] = lambda: mock_session

# ---- CRUD override ----
mock_crud = MagicMock()
mock_crud.create.return_value = MagicMock(id=1)
mock_crud.update_success.return_value = None
mock_crud.update_error.return_value = None

guardrails.RequestLogCrud = lambda session: mock_crud
mock_request_log_crud = MagicMock()
mock_request_log_crud.create.return_value = MagicMock(id=1)
mock_request_log_crud.update.return_value = None

mock_validator_log_crud = MagicMock()
mock_validator_log_crud.create.return_value = None

monkeypatch.setattr(
guardrails,
"RequestLogCrud",
lambda session: mock_request_log_crud,
)
monkeypatch.setattr(
guardrails,
"ValidatorLogCrud",
lambda session: mock_validator_log_crud,
)

yield

app.dependency_overrides.clear()


@pytest.fixture(scope="function")
def client():
with TestClient(app) as c:
yield c


@pytest.fixture(scope="function")
def integration_client():
# Same app, just semantic distinction
with TestClient(app) as c:
yield c
def integration_client(client):
yield client
2 changes: 1 addition & 1 deletion backend/app/tests/test_guardrails_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def validate(self, data):
body = response.json()
assert body["success"] is False
assert body["data"]["safe_input"] is None
assert body["error"] == "PII detected"
assert body["error"] == "Validation failed"


def test_output_guardrails_success(client, mock_crud):
Expand Down
9 changes: 7 additions & 2 deletions backend/app/tests/test_validate_with_guard.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


mock_request_log_crud = MagicMock()
mock_validator_log_crud = MagicMock()
mock_request_log_id = uuid4()


Expand All @@ -28,6 +29,7 @@ def validate(self, data):
response_field="safe_input",
request_log_crud=mock_request_log_crud,
request_log_id=mock_request_log_id,
validator_log_crud=mock_validator_log_crud,
)

assert isinstance(response, APIResponse)
Expand Down Expand Up @@ -55,12 +57,13 @@ def validate(self, data):
response_field="safe_input",
request_log_crud=mock_request_log_crud,
request_log_id=mock_request_log_id,
validator_log_crud=mock_validator_log_crud,
)

assert isinstance(response, APIResponse)
assert response.success is False
assert response.data["safe_input"] is None
assert response.error == "PII detected"
assert response.error == "Validation failed"


@pytest.mark.asyncio
Expand All @@ -79,12 +82,13 @@ def validate(self, data):
response_field="safe_output",
request_log_crud=mock_request_log_crud,
request_log_id=mock_request_log_id,
validator_log_crud=mock_validator_log_crud,
)

assert isinstance(response, APIResponse)
assert response.success is False
assert response.data["safe_output"] is None
assert response.error == ""
assert response.error == "Validation failed"


@pytest.mark.asyncio
Expand All @@ -99,6 +103,7 @@ async def test_validate_with_guard_exception():
response_field="safe_input",
request_log_crud=mock_request_log_crud,
request_log_id=mock_request_log_id,
validator_log_crud=mock_validator_log_crud,
)

assert isinstance(response, APIResponse)
Expand Down
Loading