BTW, my main code looks like this. Can anyone give me some advice to speed up the dependency graph construction?
Sincere thanks!
from neo4j import GraphDatabase
driver = GraphDatabase.driver(uri, auth=(user, password), max_connection_lifetime=3600 * 24 * 30, keep_alive=True)
url = 'https://registry.npmjs.org/'
pro_queue = queue.Queue() # pkg names to get information and store in neo4j
seen = set() # record pkgs in the pro_queue
root_name = 'express'
pro_queue .put(root_name)
seen.add(root_name )
num = pro_queue.qsize()
while num > 0:
for i in range(num):
pkg_json = requests.get(url).json()
parent_name = pkg_json['name']
with driver.session() as session:
session.write_transaction(create_basic, pkg_json) #create node, add its information in 'pkg_json'
for child_name in pkg_json['dependencies']:
session.write_transaction(create_dependency, parent_name, child_name)
if child_name not in seen and dependency_id not check_neo4j(child_name):
pro_queue.put(child_name)
seen.add(child_name)
num = pro_queue.qsize()
Two functions :
def create_basic(tx, pkg_json):
pkg_id = pkg_json['id']
pkg_name = pkg_json['name']
query = (
"MERGE (p1:Pkg{ id: $pkg_id }) SET p1.name = $pkg_name "
"RETURN p1"
)
result = tx.run(query, pkg_id=pkg_id, pkg_name=pkg_name)
try:
return [{"pkg ": record["p1"]["id"]}
for record in result]
except ServiceUnavailable as exception:
logging.error("CREATE BASIC- {query} raised an error: \n {exception}".format(
query=query, exception=exception))
raise
def _create_dependency(tx, parent_name, child_name):
query = (
"MATCH (p1:Pkg{name:$parent_name}) "
"MERGE(p2:Pkg{name:$child_name}) "
"MERGE (p1)-[depend:DependOn]->(p2) "
"RETURN p1,p2"
)
result = tx.run(query, parent_name = parent_name, child_name = child_name)
try:
return [{"pkg": record["p1"]["id"], 'dependcy': record["p2"]["id"]}
for record in result]
except ServiceUnavailable as exception:
logging.error("CREATE DEPENDENCY- {query} raised an error: \n {exception}".format(
uery=query, exception=exception))
raise