-
Notifications
You must be signed in to change notification settings - Fork 215
Open
Description
I am using a Microsoft Fabric Notebook using PySpark to consume a Delta Sharing table.
The environment is using:
Apache Spark: 3.5
Java: 11
Scala: 2.12.17
Python: 3.11
Delta Lake: 3.2
R: 4.4.1
I have installed the following packages:
PySpark: delta-sharing-spark_2.12-3.3.2 # I have tried 2.12-3.1.0 through 2.12-3.3.2
Python: delta-sharing
When I run this code:
client = delta_sharing.SharingClient(profile.name)
tables = client.list_all_tables()
for table in tables:
table_url = f'{profile.name}#{table.share}.{table.schema}.{table.name}'
print(f'Loading share={table.share};schema={table.schema};table={table.name}')
df = spark.read.format("deltaSharing").load(table_url)I am getting this error on versions 2.12-3.3.0 through 2.12-3.3.2:
Py4JJavaError: An error occurred while calling o7571.load.
: java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider io.delta.sharing.spark.DeltaSharingDataSource could not be instantiated
at java.base/java.util.ServiceLoader.fail(ServiceLoader.java:582)
at java.base/java.util.ServiceLoader$ProviderImpl.newInstance(ServiceLoader.java:804)
at java.base/java.util.ServiceLoader$ProviderImpl.get(ServiceLoader.java:722)
at java.base/java.util.ServiceLoader$3.next(ServiceLoader.java:1395)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:303)
at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:297)
at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
at scala.collection.TraversableLike.filter(TraversableLike.scala:395)
at scala.collection.TraversableLike.filter$(TraversableLike.scala:395)
at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:632)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:700)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:216)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.NoClassDefFoundError: org/apache/spark/internal/LoggingShims
at io.delta.sharing.spark.DeltaSharingDataSource.<init>(DeltaSharingDataSource.scala:53)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
at java.base/java.util.ServiceLoader$ProviderImpl.newInstance(ServiceLoader.java:780)
... 30 more
I get this error on versions 2.12-3.10 through 2.12-3.2.1:
Py4JJavaError: An error occurred while calling o7537.load.
: java.lang.NoClassDefFoundError: org/apache/spark/delta/sharing/PreSignedUrlCache$
at io.delta.sharing.spark.DeltaSharingDataSource$.setupFileSystem(DeltaSharingDataSource.scala:446)
at io.delta.sharing.spark.DeltaSharingDataSource.createRelation(DeltaSharingDataSource.scala:232)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:346)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:236)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:219)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:219)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.delta.sharing.PreSignedUrlCache$
at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:476)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:594)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:527)
... 19 more
Based on what I'm reading online, the first error message is caused by the version of the library being ahead of the version of Delta Lake I have installed. However, I am unable to find anything on the second error. Are you able to shed any light on this for me?
Metadata
Metadata
Assignees
Labels
No labels