Skip to content

Commit

Permalink
Merge pull request #770 from neo4j-contrib/task/767-improve-inspection
Browse files Browse the repository at this point in the history
Task/767 improve inspection
  • Loading branch information
mariusconjeaud authored Dec 11, 2023
2 parents 6510f13 + a83b611 commit 70beab2
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 40 deletions.
16 changes: 16 additions & 0 deletions doc/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,18 @@ You can inspect an existing Neo4j database to generate a neomodel definition fil
This will generate a file called ``models.py`` in the ``yourapp`` directory. This file can be used as a starting point,
and will contain the necessary module imports, as well as class definition for nodes and, if relevant, relationships.

Ommitting the ``--db`` argument will default to the ``NEO4J_BOLT_URL`` environment variable. This is useful for masking
your credentials.

Note that you can also print the output to the console instead of writing a file by omitting the ``--write-to`` option.

If you have a database with a large number of nodes and relationships,
this script can take a long time to run (during our tests, it took 30 seconds for 500k nodes and 1.3M relationships).
You can speed it up by not scanning for relationship properties and/or relationship cardinality, using these options :
``--no-rel-props`` and ``--no-rel-cardinality``.
Note that this will still add relationship definition to your nodes, but without relationship models ;
and cardinality will be default (ZeroOrMore).

.. note::

This command will only generate the definition for nodes and relationships that are present in the
Expand All @@ -108,6 +118,9 @@ script (:ref:`neomodel_install_labels`) to automate this: ::

It is important to execute this after altering the schema and observe the number of classes it reports.

Ommitting the ``--db`` argument will default to the ``NEO4J_BOLT_URL`` environment variable. This is useful for masking
your credentials.

Remove existing constraints and indexes
=======================================
Similarly, ``neomodel`` provides a script (:ref:`neomodel_remove_labels`) to automate the removal of all existing constraints and indexes from
Expand All @@ -117,6 +130,9 @@ the database, when this is required: ::

After executing, it will print all indexes and constraints it has removed.

Ommitting the ``--db`` argument will default to the ``NEO4J_BOLT_URL`` environment variable. This is useful for masking
your credentials.

Create, Update, Delete operations
=================================

Expand Down
105 changes: 78 additions & 27 deletions neomodel/scripts/neomodel_inspect_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
If no file is specified, the tool will print the class definitions to stdout.
options:
-h, --help show this help message and exit
--db bolt://neo4j:neo4j@localhost:7687
-h, --help show this help message and exit
--db bolt://neo4j:neo4j@localhost:7687
Neo4j Server URL
-T, --write-to someapp/models.py
-T, --write-to someapp/models.py
File where to write output.
--no-rel-props Do not inspect relationship properties
--no-rel-cardinality Do not infer relationship cardinality
"""

import argparse
Expand Down Expand Up @@ -116,13 +118,20 @@ def get_indexed_properties_for_label(label):

class RelationshipInspector:
@classmethod
def outgoing_relationships(cls, start_label):
query = f"""
MATCH (n:`{start_label}`)-[r]->(m)
WITH DISTINCT type(r) as rel_type, head(labels(m)) AS target_label, keys(r) AS properties, head(collect(r)) AS sampleRel
ORDER BY size(properties) DESC
RETURN rel_type, target_label, apoc.meta.cypher.types(properties(sampleRel)) AS properties LIMIT 1
"""
def outgoing_relationships(cls, start_label, get_properties: bool = True):
if get_properties:
query = f"""
MATCH (n:`{start_label}`)-[r]->(m)
WITH DISTINCT type(r) as rel_type, head(labels(m)) AS target_label, keys(r) AS properties, head(collect(r)) AS sampleRel
ORDER BY size(properties) DESC
RETURN rel_type, target_label, apoc.meta.cypher.types(properties(sampleRel)) AS properties LIMIT 1
"""
else:
query = f"""
MATCH (n:`{start_label}`)-[r]->(m)
WITH DISTINCT type(r) as rel_type, head(labels(m)) AS target_label
RETURN rel_type, target_label, {{}} AS properties LIMIT 1
"""
result, _ = db.cypher_query(query)
return [(record[0], record[1], record[2]) for record in result]

Expand Down Expand Up @@ -222,7 +231,9 @@ def parse_imports():
return imports


def build_rel_type_definition(label, outgoing_relationships, defined_rel_types):
def build_rel_type_definition(
label, outgoing_relationships, defined_rel_types, infer_cardinality: bool = True
):
class_definition_append = ""
rel_type_definitions = ""

Expand All @@ -241,9 +252,12 @@ def build_rel_type_definition(label, outgoing_relationships, defined_rel_types):
rel_type
)

cardinality = RelationshipInspector.infer_cardinality(rel_type, label)
cardinality_string = ""
if infer_cardinality:
cardinality = RelationshipInspector.infer_cardinality(rel_type, label)
cardinality_string += f", cardinality={cardinality}"

class_definition_append += f' {clean_class_member_key(rel_name)} = RelationshipTo("{target_label}", "{rel_type}", cardinality={cardinality}'
class_definition_append += f' {clean_class_member_key(rel_name)} = RelationshipTo("{target_label}", "{rel_type}"{cardinality_string}'

if rel_props and rel_type not in defined_rel_types:
rel_model_name = generate_rel_class_name(rel_type)
Expand All @@ -265,7 +279,11 @@ def build_rel_type_definition(label, outgoing_relationships, defined_rel_types):
return class_definition_append


def inspect_database(bolt_url):
def inspect_database(
bolt_url,
get_relationship_properties: bool = True,
infer_relationship_cardinality: bool = True,
):
# Connect to the database
print(f"Connecting to {bolt_url}")
db.set_connection(bolt_url)
Expand All @@ -284,23 +302,32 @@ def inspect_database(bolt_url):
indexed_properties = NodeInspector.get_indexed_properties_for_label(label)

class_definition = f"class {class_name}(StructuredNode):\n"
class_definition += "".join(
[
build_prop_string(
unique_properties, indexed_properties, prop, prop_type
)
for prop, prop_type in properties.items()
]
)
if properties:
class_definition += "".join(
[
build_prop_string(
unique_properties, indexed_properties, prop, prop_type
)
for prop, prop_type in properties.items()
]
)

outgoing_relationships = RelationshipInspector.outgoing_relationships(label)
outgoing_relationships = RelationshipInspector.outgoing_relationships(
label, get_relationship_properties
)

if outgoing_relationships and "StructuredRel" not in IMPORTS:
IMPORTS.append("RelationshipTo")
IMPORTS.append("StructuredRel")
# No rel properties = no rel classes
# Then StructuredRel import is not needed
if get_relationship_properties:
IMPORTS.append("StructuredRel")

class_definition += build_rel_type_definition(
label, outgoing_relationships, defined_rel_types
label,
outgoing_relationships,
defined_rel_types,
infer_relationship_cardinality,
)

if not properties and not outgoing_relationships:
Expand Down Expand Up @@ -353,6 +380,20 @@ def main():
help="File where to write output.",
)

parser.add_argument(
"--no-rel-props",
dest="get_relationship_properties",
action="store_false",
help="Do not inspect relationship properties",
)

parser.add_argument(
"--no-rel-cardinality",
dest="infer_relationship_cardinality",
action="store_false",
help="Do not infer relationship cardinality",
)

args = parser.parse_args()

bolt_url = args.neo4j_bolt_url
Expand All @@ -364,12 +405,22 @@ def main():
# Before connecting to the database
if args.write_to:
with open(args.write_to, "w") as file:
output = inspect_database(bolt_url=bolt_url)
output = inspect_database(
bolt_url=bolt_url,
get_relationship_properties=args.get_relationship_properties,
infer_relationship_cardinality=args.infer_relationship_cardinality,
)
print(f"Writing to {args.write_to}")
file.write(output)
# If no file is specified, print to stdout
else:
print(inspect_database(bolt_url=bolt_url))
print(
inspect_database(
bolt_url=bolt_url,
get_relationship_properties=args.get_relationship_properties,
infer_relationship_cardinality=args.infer_relationship_cardinality,
)
)


if __name__ == "__main__":
Expand Down
26 changes: 26 additions & 0 deletions test/data/neomodel_inspect_database_output_light.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from neomodel import StructuredNode, StringProperty, RelationshipTo, ArrayProperty, FloatProperty, BooleanProperty, DateTimeProperty, IntegerProperty
from neomodel.contrib.spatial_properties import PointProperty

class ScriptsTestNode(StructuredNode):
personal_id = StringProperty(unique_index=True)
name = StringProperty(index=True)
rel = RelationshipTo("ScriptsTestNode", "REL")


class EveryPropertyTypeNode(StructuredNode):
array_property = ArrayProperty(StringProperty())
float_property = FloatProperty()
boolean_property = BooleanProperty()
point_property = PointProperty(crs='wgs-84')
string_property = StringProperty()
datetime_property = DateTimeProperty()
integer_property = IntegerProperty()


class NoPropertyNode(StructuredNode):
pass


class NoPropertyRelNode(StructuredNode):
no_prop_rel = RelationshipTo("NoPropertyRelNode", "NO_PROP_REL")

26 changes: 26 additions & 0 deletions test/data/neomodel_inspect_database_output_pre_5_7_light.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from neomodel import StructuredNode, StringProperty, RelationshipTo, ArrayProperty, FloatProperty, BooleanProperty, DateTimeProperty, IntegerProperty
from neomodel.contrib.spatial_properties import PointProperty

class ScriptsTestNode(StructuredNode):
personal_id = StringProperty(unique_index=True)
name = StringProperty(index=True)
rel = RelationshipTo("ScriptsTestNode", "REL")


class EveryPropertyTypeNode(StructuredNode):
array_property = ArrayProperty(StringProperty())
float_property = FloatProperty()
boolean_property = BooleanProperty()
point_property = PointProperty(crs='wgs-84')
string_property = StringProperty()
datetime_property = DateTimeProperty()
integer_property = IntegerProperty()


class NoPropertyNode(StructuredNode):
pass


class NoPropertyRelNode(StructuredNode):
no_prop_rel = RelationshipTo("NoPropertyRelNode", "NO_PROP_REL")

34 changes: 21 additions & 13 deletions test/test_scripts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import subprocess

import pytest

from neomodel import (
RelationshipTo,
StringProperty,
Expand Down Expand Up @@ -87,7 +89,15 @@ def test_neomodel_remove_labels():
assert len(indexes) == 0


def test_neomodel_inspect_database():
@pytest.mark.parametrize(
"script_flavour",
[
"",
"_light",
],
)
def test_neomodel_inspect_database(script_flavour):
output_file = "test/data/neomodel_inspect_database_test_output.py"
# Check that the help option works
result = subprocess.run(
["neomodel_inspect_database", "--help"],
Expand Down Expand Up @@ -128,8 +138,11 @@ def test_neomodel_inspect_database():
)

# Test the console output version of the script
args_list = ["neomodel_inspect_database", "--db", config.DATABASE_URL]
if script_flavour == "_light":
args_list += ["--no-rel-props", "--no-rel-cardinality"]
result = subprocess.run(
["neomodel_inspect_database", "--db", config.DATABASE_URL],
args_list,
capture_output=True,
text=True,
check=True,
Expand All @@ -141,9 +154,9 @@ def test_neomodel_inspect_database():
assert wrapped_console_output[0].startswith("Connecting to")
# Check that all the expected lines are here
file_path = (
"test/data/neomodel_inspect_database_output.txt"
f"test/data/neomodel_inspect_database_output{script_flavour}.txt"
if db.version_is_higher_than("5.7")
else "test/data/neomodel_inspect_database_output_pre_5_7.txt"
else f"test/data/neomodel_inspect_database_output_pre_5_7{script_flavour}.txt"
)
with open(file_path, "r") as f:
wrapped_test_file = [line for line in f.read().split("\n") if line.strip()]
Expand All @@ -165,14 +178,9 @@ def test_neomodel_inspect_database():
assert set(wrapped_test_file) == set(wrapped_console_output[2:])

# Test the file output version of the script
args_list += ["--write-to", output_file]
result = subprocess.run(
[
"neomodel_inspect_database",
"--db",
config.DATABASE_URL,
"--write-to",
"test/data/neomodel_inspect_database_test_output.py",
],
args_list,
capture_output=True,
text=True,
check=True,
Expand All @@ -186,11 +194,11 @@ def test_neomodel_inspect_database():
]
assert wrapped_file_console_output[0].startswith("Connecting to")
assert wrapped_file_console_output[1].startswith("Writing to")
with open("test/data/neomodel_inspect_database_test_output.py", "r") as f:
with open(output_file, "r") as f:
wrapped_output_file = [line for line in f.read().split("\n") if line.strip()]
assert set(wrapped_output_file) == set(wrapped_console_output[1:])

# Finally, delete the file created by the script
subprocess.run(
["rm", "test/data/neomodel_inspect_database_test_output.py"],
["rm", output_file],
)

0 comments on commit 70beab2

Please sign in to comment.