I am trying to collect all of the Person nodes that have one or more matching Personas. Think of Personas as identifiers like a driver license, passport number, ssn, etc.
For each collection of matching Person nodes, I want to create a node (SamePerson) with a [:SAME] relationship from all of the Person nodes in a collection to (SamePerson) like in the image below.
//Person
//UNWIND $rows AS row
UNWIND
[
{ dsrecordid: 1, personid: 1, personname: 'Lucas', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 2, personid: 2, personname: 'Lucas', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 3, personid: 3, personname: 'Franco', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 5, personid: 5, personname: 'Franco', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 4, personid: 4, personname: 'Lucas', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 6, personid: 6, personname: 'Lucas', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 7, personid: 7, personname: 'Franco', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 8, personid: 8, personname: 'Lucas', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 9, personid: 9, personname: 'Franco', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 10, personid: 10, personname: 'Ajesh', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 11, personid: 11, personname: 'Ajesh', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 12, personid: 12, personname: 'Ajesh', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 13, personid: 13, personname: 'Ajesh', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 14, personid: 14, personname: 'Bruce', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 15, personid: 15, personname: 'Bruce', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 16, personid: 16, personname: 'Bruce', dscreateddts: '2022-10-31T19:53:37.655337Z' }
] as row
WITH row
CREATE (n:Person {
dsRecordId: row.dsrecordid,
personId: row.personid,
personName: row.personname,
dsCreatedDts: row.dscreateddts})
//Persona
//UNWIND $rows AS row
UNWIND
[
{ dsrecordid: 1, personaid: 1, personid: 1, personatypeid: 7, personasourcevalue: '222', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 2, personaid: 2, personid: 2, personatypeid: 6, personasourcevalue: '321', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 3, personaid: 3, personid: 3, personatypeid: 6, personasourcevalue: '123', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 4, personaid: 4, personid: 4, personatypeid: 7, personasourcevalue: '222', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 5, personaid: 5, personid: 4, personatypeid: 6, personasourcevalue: '321', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 6, personaid: 6, personid: 5, personatypeid: 6, personasourcevalue: '123', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 7, personaid: 7, personid: 6, personatypeid: 6, personasourcevalue: '321', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 8, personaid: 8, personid: 7, personatypeid: 6, personasourcevalue: '123', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 9, personaid: 9, personid: 8, personatypeid: 6, personasourcevalue: '321', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 10, personaid: 10, personid: 9, personatypeid: 6, personasourcevalue: '123', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 11, personaid: 11, personid: 10, personatypeid: 6, personasourcevalue: '345', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 12, personaid: 12, personid: 10, personatypeid: 7, personasourcevalue: '333', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 13, personaid: 13, personid: 11, personatypeid: 7, personasourcevalue: '333', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 14, personaid: 14, personid: 12, personatypeid: 7, personasourcevalue: '333', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 15, personaid: 15, personid: 13, personatypeid: 6, personasourcevalue: '345', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 16, personaid: 16, personid: 14, personatypeid: 6, personasourcevalue: '678', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 17, personaid: 17, personid: 15, personatypeid: 6, personasourcevalue: '678', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 18, personaid: 18, personid: 16, personatypeid: 6, personasourcevalue: '678', dscreateddts: '2022-10-31T19:53:37.655337Z' },
{ dsrecordid: 19, personaid: 19, personid: 16, personatypeid: 7, personasourcevalue: '444', dscreateddts: '2022-10-31T19:53:37.655337Z' }
] as row
CREATE (n:Persona {
dsRecordId: row.dsrecordid,
personaId: row.personaid,
personId: row.personid,
personaTypeId: row.personatypeid,
personaSourceValue: row.personasourcevalue,
dsCreatedDts: row.dscreateddts})
MERGE (p:Person {personId: row.personid})
MERGE (p)-[:HAS_PERSONA]->(n)
I tried this query but it creates a SamePerson node for each combination of Person and Persona.
MATCH (p:Person)-[:HAS_PERSONA]->(pa:Persona)
WITH pa.personaTypeId AS typeId, pa.personaSourceValue AS sourceValue, COLLECT(DISTINCT p) AS people
MERGE (m:SamePerson { personaTypeId: typeId, personaSourceValue: sourceValue })
WITH m, people
UNWIND people AS person
MERGE (person)-[:SAME]->(m)
RETURN DISTINCT m;