Please keep the following things in mind:
Please format code + Cypher statements with the code </>
icon, it's much easier to read.
Please provide the following information if you ran into a more serious issue:
- version 2025.02, browser version , GDS -12.14 version ,
- Implementing the gds page-rank algorithm
- dataset - yellowtrip dataset 2022.03 parquet file
I have encountered a prob when executing the page-rank algo, previously I have uploaded and created the graph wrong. I considered every dropoff and pickup point as one relation which resulted in 42 nodes and 702 relations for the dataset.
now ,after update I was able to add multiple relations for nodes which results in the correct relations 1530 however in both cases the page-rank returns the same score for different graphs.I noticed weighted property is missing.
I am unable to add a weighted property to the pagerank.
My graph creation
#code
with self.driver.session() as session:
for _, row in trips.iterrows():
pickup_location = row['PULocationID']
dropoff_location = row['DOLocationID']
pickup_time = row['tpep_pickup_datetime']
dropoff_time = row['tpep_dropoff_datetime']
distance = row['trip_distance']
fare = row['fare_amount']
# Create Location nodes for Pickup and Dropoff locations if they don't exist
session.run(
"MERGE (p:Location {id: $pickup_location}) "
"SET p.name = $pickup_location",
pickup_location=pickup_location
)
session.run(
"MERGE (d:Location {id: $dropoff_location}) "
"SET d.name = $dropoff_location",
dropoff_location=dropoff_location
)
# Create TRIP relationship between Pickup and Dropoff locations
session.run(
"MATCH (p:Location {id: $pickup_location}), (d:Location {id: $dropoff_location}) "
"CREATE (p)-[:TRIP {pickup_dt: $pickup_time, dropoff_dt: $dropoff_time, "
"distance: $distance, fare: $fare}]->(d)",
pickup_location=pickup_location,
dropoff_location=dropoff_location,
pickup_time=pickup_time,
dropoff_time=dropoff_time,
distance=distance,
fare=fare
)
# Count nodes
result = session.run("MATCH (n) RETURN count(n) AS num_nodes")
num_nodes = result.single()["num_nodes"]
print(f"Total Nodes: {num_nodes}")
# Count relationships
result = session.run("MATCH ()-[r]->() RETURN count(r) AS num_relationships")
num_relationships = result.single()["num_relationships"]
result = session.run("MATCH (a)-[:TRIP]->(b) RETURN COUNT(*) AS num_edges")
num_edges = result.single()["num_edges"]
My pagerank query
with self._driver.session() as session:
# Create the in-memory graph for GDS
session.run("CALL gds.graph.project('myGraph', 'Location', { TRIP:{ properties:'distance' } })")
print("running qeuries now")
result = session.run("CALL gds.graph.list() YIELD graphName, nodeCount, relationshipCount")
print("called the query")
for record in result:
print(f"Graph: {record['graphName']}, Nodes: {record['nodeCount']}, Relationships: {record['relationshipCount']}")
print("completed now moving on to the graph")
# Run the PageRank algorithm
query = f"""
CALL gds.pageRank.stream('myGraph', {{
maxIterations: {max_iterations},
dampingFactor: 0.85,
relationshipWeightProperty: 'distance'
}})
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS location, score
ORDER BY score DESC
"""
result = session.run(query)
rankings = result.data()
if not rankings:
return None, None # Handle empty results
max_rank = max(rankings, key=lambda x: x['score'])
min_rank = min(rankings, key=lambda x: x['score'])
print("lowest rank nodes",min_rank)
print("higest rank nodes",max_rank)
print("Rankings",rankings)
return max_rank, min_rank
q1. do I have to have a weighted graph to run pagerank with weight because the scores given for reference do not match what I get ?
q2. i used distance as weighted property but this fails I could not find a reference on how to add the weighted property to the graph creation.I know i have added the weight property feature in pagerank gds correct.
q3. could data types affect the graph does it need to be a specific data type ? do they all have to same type
NO matter what I do despite the relationships have changed I can't seem to get the correct page rank values . I know that here I am creating directed graph.