Sure.
Here is one line of record in my json:
{
"node": {
"label": "MEMBER",
"id": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
}
],
"properties": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
},
{
"key": "hashmobileno",
"value": "3b7d3e14562eb421e07e20b15fcc0f954f63362d5ed607a9e9c5d804f4fd2416"
}
]
},
"relationships": [
{
"label": "JOIN",
"from_node": {
"label": "MEMBER",
"id": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
}
]
},
"to_node": {
"label": "MEMBERSHIP",
"id": [
{
"key": "bu",
"value": "YATA"
},
{
"key": "member_id",
"value": "M9122805"
}
]
},
"properties": [
{
"key": "registration_datetime",
"value": "2023-12-06 00:00:00"
}
]
},
{
"label": "AGE_IN",
"from_node": {
"label": "MEMBER",
"id": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
}
]
},
"to_node": {
"label": "AGE_GROUP",
"id": [
{
"key": "tag_id",
"value": "G1100001A03"
}
]
},
"properties": [
{
"key": "createdate",
"value": "2023-11-29"
},
{
"key": "enddate",
"value": "9999-12-31"
},
{
"key": "value",
"value": "1.0"
}
]
},
{
"label": "OWN",
"from_node": {
"label": "MEMBER",
"id": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
}
]
},
"to_node": {
"label": "DEVICE",
"id": [
{
"key": "tag_id",
"value": "G1130002D03"
}
]
},
"properties": [
{
"key": "createdate",
"value": "2023-12-08"
},
{
"key": "enddate",
"value": "2025-01-06"
},
{
"key": "value",
"value": "1.0"
}
]
},
{
"label": "LIVE_IN",
"from_node": {
"label": "MEMBER",
"id": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
}
]
},
"to_node": {
"label": "DISTRICT",
"id": [
{
"key": "tag_id",
"value": "G1100006H06"
}
]
},
"properties": [
{
"key": "createdate",
"value": "2023-11-29"
},
{
"key": "enddate",
"value": "9999-12-31"
},
{
"key": "value",
"value": "1.0"
}
]
},
{
"label": "IS_GENDER",
"from_node": {
"label": "MEMBER",
"id": [
{
"key": "globalcustid",
"value": "0000578817c244ad6d25f623d660a1d4c72c0dcd0f35c1f223d43db8ddeefeea"
}
]
},
"to_node": {
"label": "GENDER",
"id": [
{
"key": "tag_id",
"value": "G1100004G01"
}
]
},
"properties": [
{
"key": "createdate",
"value": "2024-01-01"
},
{
"key": "enddate",
"value": "9999-12-31"
},
{
"key": "value",
"value": "1.0"
}
]
}
]
}
Here is the cypher i used to process and load the record to merge/create different nodes/relationships:
CALL apoc.load.json('file:///test.json.gz') YIELD value
CALL {
WITH value
WITH value.node as node, value.relationships as relationships
WITH toUpper(node.label) as node_label,
apoc.map.fromLists(
[entry IN node.id | toLower(entry.key)],
[entry IN node.id | toLower(entry.value)]
) AS node_id,
apoc.map.setKey(
apoc.map.fromLists(
[entry IN node.properties | toLower(entry.key)],
[entry IN node.properties | toLower(entry.value)]
),
'last_sys_upd_dt',
datetime({epochMillis: apoc.date.currentTimestamp()}) + duration({hours: 8})
) AS node_properties, relationships
// Create member node
CALL apoc.merge.node(
[node_label],
node_id,
node_properties,
node_properties
) YIELD node as member_node
// Unnest relationships and process each of them
WITH member_node, relationships
UNWIND relationships as relationship
WITH member_node, relationship
WHERE relationship IS NOT NULL
WITH member_node,
toUpper(relationship.label) as rel_label,
relationship.from_node as from_node,
relationship.to_node as to_node,
relationship.properties as rel_properties
WITH member_node,
rel_label,
apoc.map.fromLists(
[entry IN from_node.id | toLower(entry.key)],
[entry IN from_node.id | toLower(entry.value)]
) AS from_node_id,
toUpper(from_node.label) as from_node_label,
apoc.map.fromLists(
[entry IN to_node.id | toLower(entry.key)],
[entry IN to_node.id | toLower(entry.value)]
) AS to_node_id,
toUpper(to_node.label) as to_node_label,
CASE
WHEN rel_properties IS NOT NULL THEN
apoc.map.setKey(
apoc.map.fromLists(
[entry IN rel_properties | toLower(entry.key)],
[entry IN rel_properties | toLower(entry.value)]
),
'last_sys_upd_dt',
datetime({epochMillis: apoc.date.currentTimestamp()}) + duration({hours: 8})
)
ELSE {
last_sys_upd_dt: datetime({epochMillis: apoc.date.currentTimestamp()}) + duration({hours: 8})
}
END AS rel_properties
// Special handling of relationship properties data types
WITH member_node, rel_label,
from_node_id, from_node_label,
to_node_id, to_node_label,
apoc.map.setValues(
rel_properties,
[
"createdate", date(rel_properties.createdate),
"enddate", date(rel_properties.enddate),
"registration_datetime", CASE
WHEN rel_properties.registration_datetime IS NOT NULL
THEN datetime({epochmillis: apoc.date.parse(rel_properties.registration_datetime, "ms", "yyyy-MM-dd hh:mm:ss")})
ELSE null
END,
"value", toFloat(rel_properties.value)
]
) as rel_properties
// Skip those relationship if no from_node_id or to_node_id
WHERE from_node_id IS NOT NULL and to_node_id IS NOT NULL
// Match from node
CALL apoc.merge.node(
[from_node_label],
from_node_id,
{last_sys_upd_dt: datetime({epochMillis: apoc.date.currentTimestamp()}) + duration({hours: 8})},
{}
) YIELD node as from_node
// Match to node
CALL apoc.merge.node(
[to_node_label],
to_node_id,
{last_sys_upd_dt: datetime({epochMillis: apoc.date.currentTimestamp()}) + duration({hours: 8})},
{}
) YIELD node as to_node
// Create relationship
CALL apoc.merge.relationship(
from_node,
rel_label,
{},
rel_properties,
to_node,
{}
) YIELD rel as member_node_rel
RETURN *
} IN TRANSACTIONS OF 10000 ROWS
return member_node, member_node_rel;
Appreciate it if you would give me some direction to optimization it.