Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions src/neo4j_graphrag/experimental/components/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,92 @@ def _filter_invalid_constraints(
filtered_constraints.append(constraint)
return filtered_constraints

def _filter_properties_required_field(
self, node_types: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""Sanitize the 'required' field in node type properties. Ensures 'required' is a valid boolean.
converts known string values (true, yes, 1, false, no, 0) to booleans and removes unrecognized values.
"""
for node_type in node_types:
properties = node_type.get("properties", [])
if not properties:
continue
for prop in properties:
if not isinstance(prop, dict):
continue

required_value = prop.get("required")

# Not provided - will use Pydantic default (false)
if required_value is None:
continue

# already a valid boolean
if isinstance(required_value, bool):
continue

prop_name = prop.get("name", "unknown")
node_label = node_type.get("label", "unknown")

# Convert to string to handle int values like 1 or 0
required_str = str(required_value).lower()

if required_str in ("true", "yes", "1"):
prop["required"] = True
logging.info(
f"Converted 'required' value '{required_value}' to True "
f"for property '{prop_name}' on node '{node_label}'"
)
elif required_str in ("false", "no", "0"):
prop["required"] = False
logging.info(
f"Converted 'required' value '{required_value}' to False "
f"for property '{prop_name}' on node '{node_label}'"
)
else:
logging.info(
f"Removing unrecognized 'required' value '{required_value}' "
f"for property '{prop_name}' on node '{node_label}'. "
f"Using default (False)."
)
prop.pop("required", None)

return node_types

def _enforce_required_for_constraint_properties(
self,
node_types: List[Dict[str, Any]],
constraints: List[Dict[str, Any]],
) -> None:
"""Ensure properties with UNIQUENESS constraints are marked as required."""
if not constraints:
return

# Build a lookup for property_names and constraints
constraint_props: Dict[str, set[str]] = {}
for c in constraints:
if c.get("type") == "UNIQUENESS":
label = c.get("node_type")
prop = c.get("property_name")
if label and prop:
constraint_props.setdefault(label, set()).add(prop)

# Skip node_types without constraints
for node_type in node_types:
label = node_type.get("label")
if label not in constraint_props:
continue

props_to_fix = constraint_props[label]
for prop in node_type.get("properties", []):
if isinstance(prop, dict) and prop.get("name") in props_to_fix:
if prop.get("required") is not True:
logging.info(
f"Auto-setting 'required' as True for property '{prop.get('name')}' "
f"on node '{label}' (has UNIQUENESS constraint)."
)
prop["required"] = True

def _clean_json_content(self, content: str) -> str:
content = content.strip()

Expand Down Expand Up @@ -746,12 +832,22 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema
extracted_relationship_types
)

extracted_node_types = self._filter_properties_required_field(
extracted_node_types
)

# Filter out invalid patterns before validation
if extracted_patterns:
extracted_patterns = self._filter_invalid_patterns(
extracted_patterns, extracted_node_types, extracted_relationship_types
)

# Enforce required=true for properties with UNIQUENESS constraints
if extracted_constraints:
self._enforce_required_for_constraint_properties(
extracted_node_types, extracted_constraints
)

# Filter out invalid constraints
if extracted_constraints:
extracted_constraints = self._filter_invalid_constraints(
Expand Down
16 changes: 14 additions & 2 deletions src/neo4j_graphrag/generation/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,13 @@ class SchemaExtractionTemplate(PromptTemplate):
8.2 Only use properties that seem to not have too many missing values in the sample.
8.3 Constraints reference node_types by label and specify which property is unique.
8.4 If a property appears in a uniqueness constraint it MUST also appear in the corresponding node_type as a property.

9. REQUIRED PROPERTIES:
9.1 Mark a property as "required": true if every instance of that node/relationship type MUST have this property (non-nullable).
9.2 Mark a property as "required": false if the property is optional and may be absent on some instances.
9.3 Properties that are identifiers, names, or essential characteristics are typically required.
9.4 Properties that are supplementary information (phone numbers, descriptions, metadata) are typically optional.
9.5 When uncertain, default to "required": false.
9.6 If a property has a UNIQUENESS constraint, it MUST be marked as "required": true.

Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST,
LOCAL_DATETIME, LOCAL_TIME, POINT, STRING, ZONED_DATETIME, ZONED_TIME.
Expand All @@ -236,7 +242,13 @@ class SchemaExtractionTemplate(PromptTemplate):
"properties": [
{{
"name": "name",
"type": "STRING"
"type": "STRING",
"required": true
}},
{{
"name": "email",
"type": "STRING",
"required": false
}}
]
}}
Expand Down
Loading