cancel
Showing results for 
Search instead for 
Did you mean: 

Head's Up! Site migration is underway. Expect disruption to service on Thursday, Feb. 9!

Nodes are not being created using the neo4j python driver

moopsish
Node

Hi everyone,

I'm trying to load in some files using the Neo4j python driver. the problem is that it skips over creating some nodes without throwing an error. It creates all the patient nodes but then doesnt create encounter nodes.

Does it overwrite my commit? or where am I going wrong?

I'm using the open healthcare Synthea csv dataset

My code:

from neo4j import GraphDatabase
import csv
from datetime import datetime, date
from math import floor
import pandas as pd
import os

csvprefix = "synthea_sample_data_csv_apr2020/csv/"
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Neo4ja"))

def create_patient(tx, patient):
    patient = list(patient)
    id = patient[0]
    birthday = str(patient[1])
    agegroup = calc_agegroup(patient[1])
    gender = patient[14]
    #print(f"create(p =(Patient {{ id:'{id}', birthday: {birthday} }})-[:is_gender]->({gender})) create(p)-[:in_agegroup]-({agegroup})")
    print(f"CREATE(p:Patient) SET p.id = {id} SET p.DOB = date({birthday})")
    
    try:
        result = tx.run(f"CREATE(p:Patient) SET p.id = '{id}' SET p.DOB = date('{birthday}') \
            MERGE (a:Ages_{agegroup}) \
            MERGE (g:Gender_{gender}) \
            CREATE (p)-[:in_agegroup]->(a) \
            CREATE (p)-[:has_gender]->(g) \
            ")
    except Exception as e:
        print("failed "+ patient[0])
        print(e)
        return 0
    return 1

def create_encounter(tx, row):
    #print(row)
    Id,START,STOP,PATIENT,ORGANIZATION,PROVIDER,PAYER,ENCOUNTERCLASS,CODE,DESCRIPTION,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE,REASONDESCRIPTION = row
    try:
        #MATCH (e2:Encounter) ORDER BY 'stop' RETURN e2 LIMIT 1) \ MATCH (p:Patient {{id: "{PATIENT}"}})-[:had_encounter]-(e2:Encounter) return e2 ORDER BY e2.stop DESC limit 1 \
        #with e2, e CREATE (e2)-[:next]->(e) \ WITH e MATCH (p:Patient {{id: "{PATIENT}"}}) \
        result = tx.run(f'\
            MATCH (p:Patient {{id: "{Id}"}})-[:had_encounter]->(:Encounter)-[r:to_latest]->(e2:Encounter) \
            WITH p, r, e2 MERGE (e:Encounter {{name: "{Id}", start: "{START}", stop: "{STOP}", encounterclass: "{ENCOUNTERCLASS}", code: "{CODE}", description: "{DESCRIPTION}", reasoncode: "{REASONCODE}", reason: "{REASONDESCRIPTION}"}}) \
            CREATE (p)-[:had_encounter]->(e) \
            CREATE (e2)-[:to_latest]->(e) \
            CREATE (e2)-[:next]->(e) \
            DELETE r \
            ')
        print(f'create encounter: {Id}')
    except Exception as e:
        print("failed "+ Id)
        print(e)
        return 0
    return 1


def create_file(tx, cfile, relation=None):
    path =  os.path.abspath(cfile)
    name = os.path.basename(path)
    storeDate = date.today()
    data = pd.read_csv(open(cfile), sep=None)
    dimensions = f"{data.shape[0]} Rows, {data.shape[1]} Columns"
    try:
        result = tx.run(f"MERGE(f:File) SET f.name = '{name}' SET f.storeDate = date('{storeDate}') SET f.location = '{path}' SET f.dimensions = '{dimensions}' \
            WITH f MATCH {relation} \
            CREATE (p)-[:from_file]->(f) \
            ")
    except Exception as e:
        print("failed "+ cfile)
        print(e)
        return 0
    return 1


def calc_agegroup(birthday):
    today = date.today()
    bday = datetime.strptime(birthday, '%Y-%m-%d')
    age = today.year - bday.year - ((today.month, today.day) < (bday.month, bday.day))
    lowerbound = (floor(age / 10) * 10)
    upperbound = (lowerbound + 10)
    return f"{lowerbound}_{upperbound}"


def main():
    failed = []
    with driver.session() as session:
        with open(csvprefix + 'patients.csv', 'r') as f:
            reader = csv.reader(f) 
            for num, row in enumerate(reader):
                if num == 0:
                    pass
                else:
                    #pass
                    #print(row)
                    create_file(session, csvprefix + 'patients.csv', "(p:Patient {id: '"+list(row)[0]+"'})")
                    result = create_patient(session, row)
                    if result == 0:
                        failed.append(row)
        print(f"failed: {len(failed)}")

        with open(csvprefix + 'encounters.csv', 'r') as f:
            create_file(session, csvprefix + 'encounters.csv', "(e:Encounter {id: '"+list(row)[0]+"'})")
            reader = csv.reader(f)
            reader.__next__
            for row in reader:
                create_encounter(session, row)
                #print(Id)

main()
driver.close()

Hope someone can teach me what I'm doing wrong.

Regards,
Julian

3 REPLIES 3

clem
Graph Steward

One possible issue (I'm not 100% sure), is if the Id is actually an integer in this python statement:
id = patient[0]

and some cypher, later you have this statement which looks like you're assigning an integer:
SET p.id = {id}

Then this MATCH will fail because it's explicitly a string:
MATCH (p:Patient {{id: "{Id}"}})

and a string isn't going to match an integer.

Hi,

Thank you for the reply, I don't think this is the issue though as id is a string of characters in all cases. I did try it but it just complained about an invalid literal in the match instead.

I'm really stumped on what is going wrong here, I don't get any errors but it just does not create the nodes.

Then, it could be the second MATCH has a quote within the quote.

I'd recommend doing a print statement of Id each time before you use it (or go into the debugger)

That is, do something like:

cypher = f"CREATE(p:Patient) SET p.id = '{id}' SET p.DOB = date('{birthday}') \
            MERGE (a:Ages_{agegroup}) \
            MERGE (g:Gender_{gender}) \
            CREATE (p)-[:in_agegroup]->(a) \
            CREATE (p)-[:has_gender]->(g) \
            "
print(cypher)
try:
result = tx.run(cyphyer)

At least, then you can copy paste the Cypher statements and see if they work within the Neo4J browser. That way, you can separate out whether Cypher vs. neo4J driver is the cause of your problem.