Писпарк. spark.SparkException: задание прервано из-за сбоя этапа: задача 0 на этапе 15.0 завершилась неудачей 1 раз, javPython

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Писпарк. spark.SparkException: задание прервано из-за сбоя этапа: задача 0 на этапе 15.0 завершилась неудачей 1 раз, jav

Сообщение Anonymous »

Я новичок в pyspark, и я пытаюсь запустить несколько временных рядов в Prophet с помощью pyspark (как распределенные вычисления, потому что мне нужно предсказать сотни временных рядов), но у меня есть ошибка, как показано ниже.
import time
start_time = time.time()
sdf = spark.createDataFrame(data)
print('%0.2f min: Lags' % ((time.time() - start_time) / 60))
sdf.createOrReplaceTempView('Quantity')
spark.sql("select Reseller_City, Business_Unit, count(*) from Quantity group by Reseller_City, Business_Unit order by Reseller_City, Business_Unit").show()
query = 'SELECT Reseller_City, Business_Unit, conditions, black_week, promos, Sales_Date as ds, sum(Rslr_Sales_Quantity) as y FROM Quantity GROUP BY Reseller_City, Business_Unit, conditions, black_week, promos, ds ORDER BY Reseller_City, Business_Unit, ds'
spark.sql(query).show()
sdf.rdd.getNumPartitions()
store_part = (spark.sql(query).repartition(spark.sparkContext.defaultParallelism['Reseller_City','Business_Unit'])).cache()

store_part.explain()

from pyspark.sql.types import *

result_schema =StructType([
StructField('ds',TimestampType()),
StructField('Reseller_City',StringType()),
StructField('Business_Unit',StringType()),
StructField('y',DoubleType()),
StructField('yhat',DoubleType()),
StructField('yhat_upper',DoubleType()),
StructField('yhat_lower',DoubleType())
])
from pyspark.sql.functions import pandas_udf, PandasUDFType

@pandas_udf( result_schema, PandasUDFType.GROUPED_MAP )
def forecast_sales( store_pd ):

model = Prophet(interval_width=0.95, holidays = lock_down)
model.add_country_holidays(country_name='DE')
model.add_regressor('conditions')
model.add_regressor('black_week')
model.add_regressor('promos')

train = store_pd[store_pd['ds']='2021-10-01 00:00:00']
model.fit(train[['ds', 'y', 'conditions', 'black_week', 'promos']])

forecast_pd = model.predict(future_pd[['ds', 'conditions', 'black_week', 'promos']])

f_pd = forecast_pd[ ['ds','yhat', 'yhat_upper', 'yhat_lower'] ].set_index('ds')

#store_pd = store_pd.filter(store_pd['ds'] 1 results.show()

C:\spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\dataframe.py in show(self, n, truncate, vertical)
438 """
439 if isinstance(truncate, bool) and truncate:
--> 440 print(self._jdf.showString(n, 20, vertical))
441 else:
442 print(self._jdf.showString(n, int(truncate), vertical))

C:\spark-3.0.3-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:

C:\spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\utils.py in deco(*a, **kw)
126 def deco(*a, **kw):
127 try:
--> 128 return f(*a, **kw)
129 except py4j.protocol.Py4JJavaError as e:
130 converted = convert_exception(e.java_exception)

C:\spark-3.0.3-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(

Py4JJavaError: An error occurred while calling o128.showString.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 15.0 failed 1 times, most recent failure: Lost task 0.0 in stage 15.0 (TID 1243, Grogu.profiflitzer.local, executor driver): java.net.SocketException: C o n n e c t i o n r e s e t < b r / > a t j a v a . n e t . S o c k e t I n p u t S t r e a m . r e a d ( U n k n o w n S o u r c e ) < b r / > a t j a v a . n e t . S o c k e t I n p u t S t r e a m . r e a d ( U n k n o w n S o u r c e ) < b r / > a t j a v a . i o . B u f f e r e d I n p u t S t r e a m . f i l l ( U n k n o w n S o u r c e ) < b r / > a t j a v a . i o . B u f f e r e d I n p u t S t r e a m . r e a d ( U n k n o w n S o u r c e ) < b r / > a t j a v a . i o . D a t a I n p u t S t r e a m . r e a d I n t ( U n k n o w n S o u r c e ) < b r / > a t o r g . a p a c h e . s p a r k . s q l . e x e c u t i o n . p y t h o n . P y t h o n A r r o w O u t p u t $ $ a n o n $ 1 . r e a d ( P y t h o n A r r o w O u t p u t . s c a l a : 8 6 ) < b r / > a t o r g . a p a c h e . s p a r k . s q l . e x e c u t i o n . p y t h o n . P y t h o n A r r o w O u t p u t $ $ a n o n $ 1 . r e a d ( P y t h o n A r r o w O u t p u t . s c a l a : 4 9 ) < b r / > a t o r g . a p a c h e . s p a r k . a p i . p y t h o n . B a s e P y t h o n R u n n e r $ R e a d e r I t e r a t o r . h a s N e x t ( P y t h o n R u n n e r . s c a l a : 4 5 6 ) < b r / > a t o r g . a p a c h e . s p a r k . I n t e r r u p t i b l e I t e r a t o r . h a s N e x t ( I n t e r r u p t i b l e I t e r a t o r . s c a l a : 3 7 ) < b r / > a t s c a l a . c o l l e c t i o n . I t e r a t o r $ $ a n o n $ 1 1 . h a s N e x t ( I t e r a t o r . s c a l a : 4 8 9 ) < b r / > a t s c a l a . c o l l e c t i o n . I t e r a t o r $ $ a n o n $ 1 0 . h a s N e x t ( I t e r a t o r . s c a l a : 4 5 8 ) < b r / > a t o r g . a p a c h e . s p a r k . s q l . c a t a l y s t . e x p r e s s i o n s . G e n e r a t e d C l a s s $ G e n e r a t e d I t e r a t o r F o r C o d e g e n S t a g e 2 . p r o c e s s N e x t ( U n k n o w n S o u r c e ) < b r / > a t o r g . a p a c h e . s p a r k . s q l . e x e c u t i o n . B u f f e r e d R o w I t e r a t o r . h a s N e x t ( B u f f e r e d R o w I t e r a t o r . j a v a : 4 3 ) < b r / > a t o r g . a p a c h e . s p a r k . s q l . e x e c u t i o n . W h o l e S t a g e C o d e g e n E x e c $ $ a n o n $ 1 . h a s N e x t ( W h o l e S t a g e C o d e g e n E x e c . s c a l a : 7 2 9 ) < b r / > a t o r g . a p a c h e . s p a r k . s q l . e x e c u t i o n . c o l u m n a r . C a c h e d R D D B u i l d e r $ $ a n o n $ 1 . h a s N e x t ( I n M e m o r y R e l a t i o n . s c a l a : 1 3 2 ) < b r / > a t o r g . a p a c h e . s p a r k . s t o r a g e . m e m o r y . M e m o r y S t o r e . p u t I t e r a t o r ( M e m o r y S t o r e . s c a l a : 2 2 1 ) < b r / > a t o r g . a p a c h e . s p a r k . s t o r a g e . m e m o r y . M e m o r y S t o r e . p u t I t e r a t o r A s V a l u e s ( M e m o r y S t o r e . s c a l a : 2 9 9 ) < b r / > a t o r g . a p a c h e . s p a r k . s t o r a g e . B l o c k M a n a g e r . $ a n o n f u n $ d o P u t I t e r a t o r $ 1 ( B l o c k M a n a g e r . s c a l a : 1 3 7 1 ) < b r / > a t o r g . a p a c h e . s p a r k . s t o r a g e . B l o c k M a n a g e r . o r g $ a p a c h e $ s p a r k $ s t o r a g e $ B l o c k M a n a g e r $ $ d o P u t ( B l o c k M a n a g e r . s c a l a : 1 2 9 8 ) < b r / > a t o r g . a p a c h e . s p a r k . s t o r a g e . B l o c k M a n a g e r . d o P u t I t e r a t o r ( B l o c k M a n a g e r . s c a l a : 1 3 6 2 ) < b r / > a t o r g . a p a c h e . s p a r k . s t o r a g e . B l o c k M a n a g e r . g e t O r E l s e U p d a t e ( B l o c k M a n a g e r . s c a l a : 1 1 8 6 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d . R D D . g e t O r C o m p u t e ( R D D . s c a l a : 3 6 0 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d . R D D . i t e r a t o r ( R D D . s c a l a : 3 1 1 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d . M a p P a r t i t i o n s R D D . c o m p u t e ( M a p P a r t i t i o n s R D D . s c a l a : 5 2 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d . R D D . c o m p u t e O r R e a d C h e c k p o i n t ( R D D . s c a l a : 3 4 9 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d . R D D . i t e r a t o r ( R D D . s c a l a : 3 1 3 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d . M a p P a r t i t i o n s R D D . c o m p u t e ( M a p P a r t i t i o n s R D D . s c a l a : 5 2 ) < b r / > a t o r g . a p a c h e . s p a r k . r d d .RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2135)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2154)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:472)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:425)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47)
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3627)
at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2697)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3618)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:767)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2697)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2904)
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:337)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Unknown Source)
Caused by: java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(Unknown Source)
at java.net.SocketInputStream.read(Unknown Source)
at java.io.BufferedInputStream.fill(Unknown Source)
at java.io.BufferedInputStream.read(Unknown Source)
at java.io.DataInputStream.readInt(Unknown Source)
at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:86)
at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:49)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:456)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:729)
at org.apache.spark.sql.execution.columnar.CachedRDDBuilder$$anon$1.hasNext(InMemoryRelation.scala:132)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1371)
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1298)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1362)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1186)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:360)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:311)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
... 1 more



Подробнее здесь: https://stackoverflow.com/questions/699 ... 0-in-stage
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»