Код: Выделить всё
get_stores_and_discount_data = (DataprocPySparkBatchOp(
project=PROJECT_ID,
location=REGION,
batch_id=f"dataproc-job-{file_date}",
main_python_file_uri=get_data_file,
python_file_uris=[
os.path.join("gs://", DEPS_BUCKET, DEPENDENCY_PATH, "src.zip")
],
file_uris=[
os.path.join(
"gs://",
DEPS_BUCKET,
DEPENDENCY_PATH,
"settings.toml",
)
],
subnetwork_uri=SUBNETWORK_URI,
container_image=PROMO_SPARK_DATAPROC_IMAGE,
runtime_config_version=RUNTIME_CONFIG_VERSION,
service_account=SERVICE_ACCOUNT,
spark_history_dataproc_cluster=HISTORY_SERVER_CLUSTER,
runtime_config_properties=SPARK_PROPERTIES_MEDIUM,
labels=SPARK_LABELS,
).set_display_name("get-data").after(date_task)
)
Ошибка
ValueError: Значение должно быть одного из следующих типов: str, int, float, bool, dict и список. Получил: "{{channel:task=generate-date;name=curr_timestamp;type=String;}}" типа "".
< /blockquote>
Код: Выделить всё
CURR_TIMESTAMP = date_task.outputs["curr_timestamp"]
SPARK_PROPERTIES_MEDIUM["spark.dataproc.driverEnv.REPORTING_TIMESTAMP"] = CURR_TIMESTAMP
Код: Выделить всё
@component(base_image="python:3.9-slim")
def generate_date() -> NamedTuple('Output', [("file_date", str), ('curr_timestamp', str)]):
"""
Generates the current date and time in the format YYYYMMDDHHMMSS.
Returns:
str: A string representing the current date and time.
"""
from datetime import datetime
dt = datetime.today()
file_date = dt.strftime("%Y%m%d%H%M%S")
curr_timestamp = dt.strftime("%Y%m%d-%H:%m:%S")
return (file_date, curr_timestamp)
Подробнее здесь: https://stackoverflow.com/questions/791 ... ies-dictio