Hi everyone,
I'm trying to load in some files using the Neo4j python driver. the problem is that it skips over creating some nodes without throwing an error. It creates all the patient nodes but then doesnt create encounter nodes.
Does it overwrite my commit? or where am I going wrong?
I'm using the open healthcare Synthea csv dataset
My code:
from neo4j import GraphDatabase
import csv
from datetime import datetime, date
from math import floor
import pandas as pd
import os
csvprefix = "synthea_sample_data_csv_apr2020/csv/"
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Neo4ja"))
def create_patient(tx, patient):
patient = list(patient)
id = patient[0]
birthday = str(patient[1])
agegroup = calc_agegroup(patient[1])
gender = patient[14]
#print(f"create(p =(Patient {{ id:'{id}', birthday: {birthday} }})-[:is_gender]->({gender})) create(p)-[:in_agegroup]-({agegroup})")
print(f"CREATE(p:Patient) SET p.id = {id} SET p.DOB = date({birthday})")
try:
result = tx.run(f"CREATE(p:Patient) SET p.id = '{id}' SET p.DOB = date('{birthday}') \
MERGE (a:Ages_{agegroup}) \
MERGE (g:Gender_{gender}) \
CREATE (p)-[:in_agegroup]->(a) \
CREATE (p)-[:has_gender]->(g) \
")
except Exception as e:
print("failed "+ patient[0])
print(e)
return 0
return 1
def create_encounter(tx, row):
#print(row)
Id,START,STOP,PATIENT,ORGANIZATION,PROVIDER,PAYER,ENCOUNTERCLASS,CODE,DESCRIPTION,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE,REASONDESCRIPTION = row
try:
#MATCH (e2:Encounter) ORDER BY 'stop' RETURN e2 LIMIT 1) \ MATCH (p:Patient {{id: "{PATIENT}"}})-[:had_encounter]-(e2:Encounter) return e2 ORDER BY e2.stop DESC limit 1 \
#with e2, e CREATE (e2)-[:next]->(e) \ WITH e MATCH (p:Patient {{id: "{PATIENT}"}}) \
result = tx.run(f'\
MATCH (p:Patient {{id: "{Id}"}})-[:had_encounter]->(:Encounter)-[r:to_latest]->(e2:Encounter) \
WITH p, r, e2 MERGE (e:Encounter {{name: "{Id}", start: "{START}", stop: "{STOP}", encounterclass: "{ENCOUNTERCLASS}", code: "{CODE}", description: "{DESCRIPTION}", reasoncode: "{REASONCODE}", reason: "{REASONDESCRIPTION}"}}) \
CREATE (p)-[:had_encounter]->(e) \
CREATE (e2)-[:to_latest]->(e) \
CREATE (e2)-[:next]->(e) \
DELETE r \
')
print(f'create encounter: {Id}')
except Exception as e:
print("failed "+ Id)
print(e)
return 0
return 1
def create_file(tx, cfile, relation=None):
path = os.path.abspath(cfile)
name = os.path.basename(path)
storeDate = date.today()
data = pd.read_csv(open(cfile), sep=None)
dimensions = f"{data.shape[0]} Rows, {data.shape[1]} Columns"
try:
result = tx.run(f"MERGE(f:File) SET f.name = '{name}' SET f.storeDate = date('{storeDate}') SET f.location = '{path}' SET f.dimensions = '{dimensions}' \
WITH f MATCH {relation} \
CREATE (p)-[:from_file]->(f) \
")
except Exception as e:
print("failed "+ cfile)
print(e)
return 0
return 1
def calc_agegroup(birthday):
today = date.today()
bday = datetime.strptime(birthday, '%Y-%m-%d')
age = today.year - bday.year - ((today.month, today.day) < (bday.month, bday.day))
lowerbound = (floor(age / 10) * 10)
upperbound = (lowerbound + 10)
return f"{lowerbound}_{upperbound}"
def main():
failed = []
with driver.session() as session:
with open(csvprefix + 'patients.csv', 'r') as f:
reader = csv.reader(f)
for num, row in enumerate(reader):
if num == 0:
pass
else:
#pass
#print(row)
create_file(session, csvprefix + 'patients.csv', "(p:Patient {id: '"+list(row)[0]+"'})")
result = create_patient(session, row)
if result == 0:
failed.append(row)
print(f"failed: {len(failed)}")
with open(csvprefix + 'encounters.csv', 'r') as f:
create_file(session, csvprefix + 'encounters.csv', "(e:Encounter {id: '"+list(row)[0]+"'})")
reader = csv.reader(f)
reader.__next__
for row in reader:
create_encounter(session, row)
#print(Id)
main()
driver.close()
Hope someone can teach me what I'm doing wrong.
Regards,
Julian