First of all thanks for your help Joel.
As you suggested I checked the used and required versions of Spark:
-
since I'm using pyspark 3.0.1 doc that runs on scala 2.12, I use neo4j-connector-apache-spark_2.12-4.0.0.jar
according to github
-
I've even tried to install pyspark 2.4.0 which runs on scala 2.11, in order to try another connector (neo4j-connector-apache-spark_2.11-4.0.0.jar
)
Be that as it may in both cases I'm still getting the same error I report entirely:
Traceback (most recent call last):
File "c:/Users/arman/Desktop/prova/sparkneo4jconn.py", line 14, in <module>
spark.read.format("org.neo4j.spark.DataSource") \
File "C:\Users\arman\Desktop\prova\venv\lib\site-packages\pyspark\sql\readwriter.py", line 184, in load
return self._df(self._jreader.load())
File "C:\Users\arman\Desktop\prova\venv\lib\site-packages\py4j\java_gateway.py", line 1304, in __call__
return_value = get_return_value(
File "C:\Users\arman\Desktop\prova\venv\lib\site-packages\pyspark\sql\utils.py", line 128, in deco
return f(*a, **kw)
File "C:\Users\arman\Desktop\prova\venv\lib\site-packages\py4j\protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o38.load.
: java.lang.NoClassDefFoundError: org/apache/spark/sql/sources/v2/ReadSupport
at java.base/java.lang.ClassLoader.defineClass1(Native Method)
at java.base/java.lang.ClassLoader.defineClass(ClassLoader.java:1016)
at java.base/java.security.SecureClassLoader.defineClass(SecureClassLoader.java:151)
at java.base/jdk.internal.loader.BuiltinClassLoader.defineClass(BuiltinClassLoader.java:825)
at java.base/jdk.internal.loader.BuiltinClassLoader.findClassOnClassPathOrNull(BuiltinClassLoader.java:723)
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClassOrNull(BuiltinClassLoader.java:646)
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:604)
at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:168)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:576)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:522)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$3(DataSource.scala:653)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:653)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:733)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:248)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:221)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:64)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:564)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:832)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.sources.v2.ReadSupport
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:606)
at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:168)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:522)
... 27 more