From 1bdebbffccc38016199940736ee3ecd10360da00 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Wed, 17 Dec 2025 15:50:17 +0100 Subject: [PATCH 1/6] Add required property support to SchemaFromTextExtractor --- .../experimental/components/schema.py | 64 +++++++++++++++++++ src/neo4j_graphrag/generation/prompts.py | 15 ++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index d7c45694..bfb597f8 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -666,6 +666,66 @@ def _filter_invalid_constraints( filtered_constraints.append(constraint) return filtered_constraints + def _filter_properties_required_field( + self, node_types: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """Sanitize the 'required' field in node type properties. Ensures 'required' is a valid boolean. + converts known string values (true, yes, 1, false, no, 0) to booleans and removes unrecognized values. + """ + for node_type in node_types: + properties = node_type.get("properties", []) + if not properties: + continue + for prop in properties: + if not isinstance(prop, dict): + continue + + required_value = prop.get("required") + + # Not provided - will use Pydantic default (false) + if required_value is None: + continue + + # already a valid boolean + if isinstance(required_value, bool): + continue + + prop_name = prop.get("name", "unknown") + node_label = node_type.get("label", "unknown") + + if isinstance(required_value, str): + if required_value.lower() in ("true", "yes", "1"): + prop["required"] = True + logging.info( + f"Converted 'required' value {required_value} to True " + f"for property '{prop_name}' on node '{node_label}'" + ) + elif required_value.lower() in ("false", "no", "0"): + prop["required"] = False + logging.info( + f"Converted 'required' value '{required_value}' to False " + f"for property '{prop_name}' on node '{node_label}' " + ) + # Unknown string values + else: + logging.info( + f"Removing unrecognized 'required' value '{required_value}' " + f"for property '{prop_name}' on node '{node_label}'. " + f"Using default (False) " # TODO: Not sure if we have to convert it to the default value - double check! + ) + prop.pop("required", None) + else: + # Non-string, non-boolean - remove + logging.info( + f"Removing invalid 'required' value '{required_value}' (type: {type(required_value).__name__}) " + f"for property '{prop_name}' on node '{node_label}'. " + f"Using default (False). " + ) + prop.pop("required", None) + + return node_types + + def _clean_json_content(self, content: str) -> str: content = content.strip() @@ -746,6 +806,10 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema extracted_relationship_types ) + extracted_node_types = self._filter_properties_required_field( + extracted_node_types + ) + # Filter out invalid patterns before validation if extracted_patterns: extracted_patterns = self._filter_invalid_patterns( diff --git a/src/neo4j_graphrag/generation/prompts.py b/src/neo4j_graphrag/generation/prompts.py index 6fedb511..44818132 100644 --- a/src/neo4j_graphrag/generation/prompts.py +++ b/src/neo4j_graphrag/generation/prompts.py @@ -223,7 +223,12 @@ class SchemaExtractionTemplate(PromptTemplate): 8.2 Only use properties that seem to not have too many missing values in the sample. 8.3 Constraints reference node_types by label and specify which property is unique. 8.4 If a property appears in a uniqueness constraint it MUST also appear in the corresponding node_type as a property. - +9. REQUIRED PROPERTIES: +9.1 Mark a property as "required": true if every instance of that node/relationship type MUST have this property (non-nullable). +9.2 Mark a property as "required": false if the property is optional and may be absent on some instances. +9.3 Properties that are identifiers, names, or essential characteristics are typically required. +9.4 Properties that are supplementary information (phone numbers, descriptions, metadata) are typically optional. +9.5 When uncertain, default to "required": false. Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST, LOCAL_DATETIME, LOCAL_TIME, POINT, STRING, ZONED_DATETIME, ZONED_TIME. @@ -236,7 +241,13 @@ class SchemaExtractionTemplate(PromptTemplate): "properties": [ {{ "name": "name", - "type": "STRING" + "type": "STRING", + "required": true + }}, + {{ + "name": "email", + "type": "STRING", + "required": false }} ] }} From 844a15534c73a26b501c4647f6a7994759d9661f Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Wed, 17 Dec 2025 15:53:20 +0100 Subject: [PATCH 2/6] add required property support --- src/neo4j_graphrag/experimental/components/schema.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index bfb597f8..d1c6c524 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -670,7 +670,7 @@ def _filter_properties_required_field( self, node_types: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: """Sanitize the 'required' field in node type properties. Ensures 'required' is a valid boolean. - converts known string values (true, yes, 1, false, no, 0) to booleans and removes unrecognized values. + converts known string values (true, yes, 1, false, no, 0) to booleans and removes unrecognized values. """ for node_type in node_types: properties = node_type.get("properties", []) @@ -711,7 +711,7 @@ def _filter_properties_required_field( logging.info( f"Removing unrecognized 'required' value '{required_value}' " f"for property '{prop_name}' on node '{node_label}'. " - f"Using default (False) " # TODO: Not sure if we have to convert it to the default value - double check! + f"Using default (False) " # TODO: Not sure if we have to convert it to the default value - double check! ) prop.pop("required", None) else: @@ -725,7 +725,6 @@ def _filter_properties_required_field( return node_types - def _clean_json_content(self, content: str) -> str: content = content.strip() From cf365e8f93829626ddfb770e0d4394e5854d1789 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Thu, 18 Dec 2025 10:42:49 +0100 Subject: [PATCH 3/6] unit tests --- .../experimental/components/test_schema.py | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 98bb3fe5..0705f201 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -79,6 +79,28 @@ def test_node_type_additional_properties_default() -> None: assert node_type.additional_properties is True +def test_property_type_initalization() -> None: + prop = PropertyType(name="email", type="STRING") + assert prop.name == "email" + assert prop.type == "STRING" + assert prop.required is False + + +def test_property_type_with_required_true() -> None: + prop = PropertyType(name="id", type="INTEGER", required=True) + assert prop.required is True + + +def test_property_type_is_frozen() -> None: + prop = PropertyType(name="email", type="STRING", required=False) + + with pytest.raises(ValidationError): + prop.name = "other" + + with pytest.raises(ValidationError): + prop.required = True + + def test_relationship_type_initialization_from_string() -> None: relationship_type = RelationshipType.model_validate("REL") assert isinstance(relationship_type, RelationshipType) @@ -730,6 +752,55 @@ def schema_json_with_null_constraints() -> str: """ +@pytest.fixture +def schema_json_with_required_properties() -> str: + return """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": true}, + {"name": "email", "type": "STRING", "required": false}, + {"name": "phone", "type": "STRING"} + ] + } + ], + "relationship_types": [ + {"label": "KNOWS"} + ], + "patterns": [ + ["Person", "KNOWS", "Person"] + ] + } + """ + + +@pytest.fixture +def schema_json_with_string_required_values() -> str: + return """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": "true"}, + {"name": "email", "type": "STRING", "required": "yes"}, + {"name": "phone", "type": "STRING", "required": "false"}, + {"name": "address", "type": "STRING", "required": "no"} + ] + } + ], + "relationship_types": [ + {"label": "KNOWS"} + ], + "patterns": [ + ["Person", "KNOWS", "Person"] + ] + } + """ + + @pytest.fixture def invalid_schema_json() -> str: return """ @@ -1388,6 +1459,114 @@ def test_clean_json_content_plain_json( assert cleaned == '{"node_types": [{"label": "Person"}]}' +def test_filter_properties_required_field_valid_true( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types = [ + { + "label": "Person", + "properties": [{"name": "name", "type": "STRING", "required": True}], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + assert result[0]["properties"][0]["required"] is True + + +def test_filter_properties_required_field_valid_false( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types = [ + { + "label": "Person", + "properties": [{"name": "name", "type": "STRING", "required": False}], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + assert result[0]["properties"][0]["required"] is False + + +def test_filter_properties_required_field_string( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types = [ + { + "label": "Person", + "properties": [ + {"name": "prop1", "type": "STRING", "required": "true"}, + {"name": "prop2", "type": "STRING", "required": "yes"}, + {"name": "prop3", "type": "STRING", "required": "1"}, + {"name": "prop4", "type": "STRING", "required": "TRUE"}, + ], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + for prop in result[0]["properties"]: + assert prop["required"] is True + node_types = [ + { + "label": "Person", + "properties": [ + {"name": "prop1", "type": "STRING", "required": "false"}, + {"name": "prop2", "type": "STRING", "required": "no"}, + {"name": "prop3", "type": "STRING", "required": "0"}, + {"name": "prop4", "type": "STRING", "required": "FALSE"}, + ], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + for prop in result[0]["properties"]: + assert prop["required"] is False + + +def test_filter_properties_required_field_invalid_string( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types = [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": "mandatory"}, + {"name": "email", "type": "STRING", "required": "always"}, + ], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + + assert "required" not in result[0]["properties"][0] + assert "required" not in result[0]["properties"][1] + + +def test_filter_properties_required_field_invalid_type( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types = [ + { + "label": "Person", + "properties": [ + {"name": "prop1", "type": "STRING", "required": 1}, + {"name": "prop2", "type": "STRING", "required": []}, + {"name": "prop3", "type": "STRING", "required": {"value": True}}, + ], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + for prop in result[0]["properties"]: + assert "required" not in prop + + +def test_filter_properties_required_field_missing( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types = [ + { + "label": "Person", + "properties": [{"name": "name", "type": "STRING"}], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + assert "required" not in result[0]["properties"][0] + + @pytest.mark.asyncio @patch("neo4j_graphrag.experimental.components.schema.get_structured_schema") async def test_schema_from_existing_graph(mock_get_structured_schema: Mock) -> None: From 0c56db98d24d4af46f3aa232ce95a1ae7875ae27 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Thu, 18 Dec 2025 11:54:22 +0100 Subject: [PATCH 4/6] unit tests --- .../experimental/components/test_schema.py | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 0705f201..764c53a1 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -1567,6 +1567,77 @@ def test_filter_properties_required_field_missing( assert "required" not in result[0]["properties"][0] +@pytest.mark.asyncio +async def test_schema_from_text_with_required_properties( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + schema_json_with_required_properties: str, +) -> None: + mock_llm.ainvoke.return_value = LLMResponse( + content=schema_json_with_required_properties + ) + + schema = await schema_from_text.run(text="Sample text for test") + + person = schema.node_type_from_label("Person") + assert person is not None + + # Check required properties + name_prop = next((p for p in person.properties if p.name == "name"), None) + email_prop = next((p for p in person.properties if p.name == "email"), None) + phone_prop = next((p for p in person.properties if p.name == "phone"), None) + + assert name_prop is not None and name_prop.required is True + assert email_prop is not None and email_prop.required is False + assert phone_prop is not None and phone_prop.required is False + + +@pytest.mark.asyncio +async def test_schema_from_text_sanitizes_string_required_values( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + schema_json_with_string_required_values: str, +) -> None: + mock_llm.ainvoke.return_value = LLMResponse( + content=schema_json_with_string_required_values + ) + + schema = await schema_from_text.run(text="Sample text for test") + + person = schema.node_type_from_label("Person") + assert person is not None + + # true and yes should become True + name_prop = next((p for p in person.properties if p.name == "name"), None) + email_prop = next((p for p in person.properties if p.name == "email"), None) + assert name_prop is not None and name_prop.required is True + assert email_prop is not None and email_prop.required is True + + # false and no should become False + phone_prop = next((p for p in person.properties if p.name == "phone"), None) + address_prop = next((p for p in person.properties if p.name == "address"), None) + assert phone_prop is not None and phone_prop.required is False + assert address_prop is not None and address_prop.required is False + + +@pytest.mark.asyncio +async def test_schema_from_text_handles_missing_required_field( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + valid_schema_json: str, +) -> None: + mock_llm.ainvoke.return_value = LLMResponse(content=valid_schema_json) + + schema = await schema_from_text.run(text="Sample text") + + person = schema.node_type_from_label("Person") + assert person is not None + + # All properties should have required=False (default) + for prop in person.properties: + assert prop.required is False + + @pytest.mark.asyncio @patch("neo4j_graphrag.experimental.components.schema.get_structured_schema") async def test_schema_from_existing_graph(mock_get_structured_schema: Mock) -> None: From 4ea0f19b7e71b4fda8c9a5346a117dacc869e12f Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Thu, 18 Dec 2025 15:03:56 +0100 Subject: [PATCH 5/6] add required support to SchemaFromTextExtractor --- .../experimental/components/schema.py | 40 +++++ src/neo4j_graphrag/generation/prompts.py | 1 + .../experimental/components/test_schema.py | 160 ++++++++++++++++++ 3 files changed, 201 insertions(+) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index d1c6c524..05395e0a 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -725,6 +725,40 @@ def _filter_properties_required_field( return node_types + def _enforce_required_for_constraint_properties( + self, + node_types: List[Dict[str, Any]], + constraints: List[Dict[str, Any]], + ) -> None: + """Ensure properties with UNIQUENESS constraints are marked as required.""" + if not constraints: + return + + # Build a lookup for property_names and constraints + constraint_props: Dict[str, set[str]] = {} + for c in constraints: + if c.get("type") == "UNIQUENESS": + label = c.get("node_type") + prop = c.get("property_name") + if label and prop: + constraint_props.setdefault(label, set()).add(prop) + + # Skop node_types without constraints + for node_type in node_types: + label = node_type.get("label") + if label not in constraint_props: + continue + + props_to_fix = constraint_props[label] + for prop in node_type.get("properties", []): + if isinstance(prop, dict) and prop.get("name") in props_to_fix: + if prop.get("required") is not True: + logging.info( + f"Auto-setting 'required' as True for property '{prop.get('name')}' " + f"on node '{label}' (has UNIQUENESS constraint)." + ) + prop["required"] = True + def _clean_json_content(self, content: str) -> str: content = content.strip() @@ -815,6 +849,12 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema extracted_patterns, extracted_node_types, extracted_relationship_types ) + # Enforce required=true for properties with UNIQUENESS constraints + if extracted_constraints: + self._enforce_required_for_constraint_properties( + extracted_node_types, extracted_constraints + ) + # Filter out invalid constraints if extracted_constraints: extracted_constraints = self._filter_invalid_constraints( diff --git a/src/neo4j_graphrag/generation/prompts.py b/src/neo4j_graphrag/generation/prompts.py index 44818132..e7269427 100644 --- a/src/neo4j_graphrag/generation/prompts.py +++ b/src/neo4j_graphrag/generation/prompts.py @@ -229,6 +229,7 @@ class SchemaExtractionTemplate(PromptTemplate): 9.3 Properties that are identifiers, names, or essential characteristics are typically required. 9.4 Properties that are supplementary information (phone numbers, descriptions, metadata) are typically optional. 9.5 When uncertain, default to "required": false. +9.6 If a property has a UNIQUENESS constraint, it MUST be marked as "required": true. Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST, LOCAL_DATETIME, LOCAL_TIME, POINT, STRING, ZONED_DATETIME, ZONED_TIME. diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 764c53a1..82f99f1c 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -1567,6 +1567,127 @@ def test_filter_properties_required_field_missing( assert "required" not in result[0]["properties"][0] +def test_enforce_required_for_constraint_properties_sets_required_true( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types: list[dict[str, Any]] = [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": False}, + {"name": "email", "type": "STRING", "required": False}, + ], + } + ] + constraints = [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ] + + schema_from_text._enforce_required_for_constraint_properties( + node_types, constraints + ) + + # name should now be required=true + assert node_types[0]["properties"][0]["required"] is True + # email should remain required=false + assert node_types[0]["properties"][1]["required"] is False + + +def test_enforce_required_for_constraint_properties_already_true( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types: list[dict[str, Any]] = [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": True}, + ], + } + ] + constraints = [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ] + + schema_from_text._enforce_required_for_constraint_properties( + node_types, constraints + ) + + assert node_types[0]["properties"][0]["required"] is True + + +def test_enforce_required_for_constraint_properties_missing_required_field( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types: list[dict[str, Any]] = [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING"}, # No required field + ], + } + ] + constraints = [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ] + + schema_from_text._enforce_required_for_constraint_properties( + node_types, constraints + ) + + assert node_types[0]["properties"][0]["required"] is True + + +def test_enforce_required_for_constraint_properties_no_constraints( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types: list[dict[str, Any]] = [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": False}, + ], + } + ] + constraints: list[dict[str, Any]] = [] + + schema_from_text._enforce_required_for_constraint_properties( + node_types, constraints + ) + + assert node_types[0]["properties"][0]["required"] is False + + +def test_enforce_required_for_constraint_properties_skips_unconstrained_nodes( + schema_from_text: SchemaFromTextExtractor, +) -> None: + node_types: list[dict[str, Any]] = [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": False}, + ], + }, + { + "label": "Company", + "properties": [ + {"name": "name", "type": "STRING", "required": False}, + ], + }, + ] + constraints = [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ] + + schema_from_text._enforce_required_for_constraint_properties( + node_types, constraints + ) + + # Person.name should be required=true + assert node_types[0]["properties"][0]["required"] is True + # Company.name should remain required=false (no constraint on Company) + assert node_types[1]["properties"][0]["required"] is False + + @pytest.mark.asyncio async def test_schema_from_text_with_required_properties( schema_from_text: SchemaFromTextExtractor, @@ -1638,6 +1759,45 @@ async def test_schema_from_text_handles_missing_required_field( assert prop.required is False +@pytest.mark.asyncio +async def test_schema_from_text_enforces_required_for_constrained_properties( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, +) -> None: + schema_json = """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING", "required": false}, + {"name": "email", "type": "STRING", "required": false} + ] + } + ], + "relationship_types": [], + "patterns": [], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ] + } + """ + mock_llm.ainvoke.return_value = LLMResponse(content=schema_json) + + schema = await schema_from_text.run(text="Sample text") + + person = schema.node_type_from_label("Person") + assert person is not None + + name_prop = next((p for p in person.properties if p.name == "name"), None) + email_prop = next((p for p in person.properties if p.name == "email"), None) + + # name should be auto-fixed to required=true + assert name_prop is not None and name_prop.required is True + # email should remain required=false + assert email_prop is not None and email_prop.required is False + + @pytest.mark.asyncio @patch("neo4j_graphrag.experimental.components.schema.get_structured_schema") async def test_schema_from_existing_graph(mock_get_structured_schema: Mock) -> None: From 91f707672c081f036fae1d85af55807d8f22b0e8 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Fri, 19 Dec 2025 11:29:29 +0100 Subject: [PATCH 6/6] handle int values for required property --- .../experimental/components/schema.py | 43 ++++++++----------- .../experimental/components/test_schema.py | 24 +++++++++-- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index 05395e0a..d546feb8 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -693,33 +693,26 @@ def _filter_properties_required_field( prop_name = prop.get("name", "unknown") node_label = node_type.get("label", "unknown") - if isinstance(required_value, str): - if required_value.lower() in ("true", "yes", "1"): - prop["required"] = True - logging.info( - f"Converted 'required' value {required_value} to True " - f"for property '{prop_name}' on node '{node_label}'" - ) - elif required_value.lower() in ("false", "no", "0"): - prop["required"] = False - logging.info( - f"Converted 'required' value '{required_value}' to False " - f"for property '{prop_name}' on node '{node_label}' " - ) - # Unknown string values - else: - logging.info( - f"Removing unrecognized 'required' value '{required_value}' " - f"for property '{prop_name}' on node '{node_label}'. " - f"Using default (False) " # TODO: Not sure if we have to convert it to the default value - double check! - ) - prop.pop("required", None) + # Convert to string to handle int values like 1 or 0 + required_str = str(required_value).lower() + + if required_str in ("true", "yes", "1"): + prop["required"] = True + logging.info( + f"Converted 'required' value '{required_value}' to True " + f"for property '{prop_name}' on node '{node_label}'" + ) + elif required_str in ("false", "no", "0"): + prop["required"] = False + logging.info( + f"Converted 'required' value '{required_value}' to False " + f"for property '{prop_name}' on node '{node_label}'" + ) else: - # Non-string, non-boolean - remove logging.info( - f"Removing invalid 'required' value '{required_value}' (type: {type(required_value).__name__}) " + f"Removing unrecognized 'required' value '{required_value}' " f"for property '{prop_name}' on node '{node_label}'. " - f"Using default (False). " + f"Using default (False)." ) prop.pop("required", None) @@ -743,7 +736,7 @@ def _enforce_required_for_constraint_properties( if label and prop: constraint_props.setdefault(label, set()).add(prop) - # Skop node_types without constraints + # Skip node_types without constraints for node_type in node_types: label = node_type.get("label") if label not in constraint_props: diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 82f99f1c..3918ef8a 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -1536,16 +1536,34 @@ def test_filter_properties_required_field_invalid_string( assert "required" not in result[0]["properties"][1] -def test_filter_properties_required_field_invalid_type( +def test_filter_properties_required_field_int_values( schema_from_text: SchemaFromTextExtractor, ) -> None: + """Test that int values like 1 and 0 are converted to True/False.""" node_types = [ { "label": "Person", "properties": [ {"name": "prop1", "type": "STRING", "required": 1}, - {"name": "prop2", "type": "STRING", "required": []}, - {"name": "prop3", "type": "STRING", "required": {"value": True}}, + {"name": "prop2", "type": "STRING", "required": 0}, + ], + } + ] + result = schema_from_text._filter_properties_required_field(node_types) + assert result[0]["properties"][0]["required"] is True + assert result[0]["properties"][1]["required"] is False + + +def test_filter_properties_required_field_invalid_type( + schema_from_text: SchemaFromTextExtractor, +) -> None: + """Test that unrecognized types like list and dict are removed.""" + node_types = [ + { + "label": "Person", + "properties": [ + {"name": "prop1", "type": "STRING", "required": []}, + {"name": "prop2", "type": "STRING", "required": {"value": True}}, ], } ]