Цель — написать интеграционный тест для main_code.py упрощенной версии main_code.py
Цель — написать интеграционный тест для main_code.py
упрощенной версии main_code.py
сильный>
from pyspark.sql import SparkSession
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.types import StructType
def csvReader(spark: SparkSession, bucket: str, path: str, table: str, schema: StructType, sep: str) -> DataFrame:
"""Reads a CSV file as a Dataframe from S3 using user parameters for format."""
return (
spark.read.format('csv')
.option('header', 'true')
.option('sep', sep)
.schema(schema)
.load(f's3a://{bucket}/{path}/{table}.csv')
)
def dataframeWriter(
df: DataFrame, bucket: str, path: str, table: str, partition_key: str, mode: str = 'overwrite'
) -> None:
"""Writes a dataframe in S3 in parquet format using user parameters to define path and partition key."""
df.write.partitionBy(partition_key).mode(mode).parquet(f's3a://{bucket}/{path}/{table}/')
Что я хочу сделать
Написать интеграционный тест для main_code.py, пока:
Имитация функции csvReader и замените ее на local_csvReader.
Издевательство над dataframeWriter функцию и замените ее на local_dataframeWriter.
Имитация импорта из awsgluedq, чтобы избежать его локальной установки.
"""Module that contains unit tests for My_app pre silver job."""
import os
from unittest import TestCase
from unittest.mock import patch, Mock
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType
def local_csvReader(spark: SparkSession, bu: str, pa: str, table: str, schema: StructType, sep: str):
"""Mocked function that replaces real csvReader. this one reads from local rather than S3."""
return (
spark.read.format('csv')
.option('header', 'true')
.option('sep', ';')
.schema(schema)
.load(f'./tests/integration_tests/input_mock/{table}.csv')
)
def local_dataframeWriter(df, bu: str, pa: str, table: str, partition_key: str):
"""Mocked function that replaces real dataframeWriter. this one writes in local rather than S3."""
output_dir = f'./tests/integration_tests/output_mock/{table}/'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
df.write.partitionBy(partition_key).mode('overwrite').parquet(output_dir)
class IntegrationTest(TestCase):
@classmethod
def setUpClass(cls):
cls.spark = SparkSession.builder.master('local').appName('TestPerfmarketSilver').getOrCreate()
cls.spark.conf.set('spark.sql.sources.partitionOverwriteMode', 'dynamic')
@patch('My_app.application.main_code.getResolvedOptions')
@patch('My_app.application.main_code.csvReader', side_effect=local_csvReader)
@patch('My_app.application.main_code.dataframeWriter', side_effect=local_dataframeWriter)
def test_main(self, mock_csvreader, mock_datawriter, mocked_get_resolved_options: Mock):
"""Test the main function with local CSV and Parquet output."""
import My_app.application.main_code as main_code
import My_app.settings.config as stg
import tests.integration_tests.settings.config as stg_new
stg.data_schema = stg_new.data_schema_test
expected_results = {'chemins': {'nbRows': 8}}
# Mock the resolved options
mocked_get_resolved_options.return_value = {
'JOB_NAME': 'test_job',
'S3_BRONZE_BUCKET_NAME': 'test_bronze',
'S3_PRE_SILVER_BUCKET_NAME': 'test_pre_silver',
'S3_BRONZE_PATH': './tests/integration_tests/input_mock',
'S3_PRE_SILVER_PATH': './tests/integration_tests/output_mock',
'S3_DATA_QUALITY_LOGS_BUCKET_NAME': 'test_dq',
}
main_code.main([])
for table in stg.data_schema.keys():
# Verify that the output Parquet file is created
output_path = f'./tests/integration_tests/output_mock/{table}/'
self.assertTrue(os.path.exists(output_path))
# Read the written Parquet file and check the data
written_df = self.spark.read.parquet(output_path)
self.assertEqual(written_df.count(), expected_results[table]['nbRows']) # Check row count
self.assertTrue(
set(
[column_data['bronze_name'] for column_data in stg.data_schema[table]['columns'].to_dict().values()]
)
== set(written_df.columns)
)
# Clean up
os.system(f'rm -rf ./tests/integration_tests/output_mock/{table}/')
======================================================================
ERROR: test_main (tests.integration_tests.application.test_main_code.IntegrationTest)
Test the main function with local CSV and Parquet output.
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1248, in _dot_lookup
return getattr(thing, comp)
AttributeError: module 'My_app.application' has no attribute 'main_code'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1376, in patched
with self.decoration_helper(patched,
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/contextlib.py", line 135, in __enter__
return next(self.gen)
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1358, in decoration_helper
arg = exit_stack.enter_context(patching)
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/contextlib.py", line 492, in enter_context
result = _cm_type.__enter__(cm)
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1431, in __enter__
self.target = self.getter()
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1618, in
getter = lambda: _importer(target)
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1261, in _importer
thing = _dot_lookup(thing, comp, import_path)
File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1250, in _dot_lookup
__import__(import_path)
File "/Users/me/IdeaProjects/project_root/apps/project/src/My_app/application/main_code.py", line 10, in
from My_app.infrastructure.data_quality import evaluateDataQuality, generateSchema
File "/Users/me/IdeaProjects/project_root/apps/project/src/My_app/infrastructure/data_quality.py", line 4, in
from awsgluedq.transforms import EvaluateDataQuality
ModuleNotFoundError: No module named 'awsgluedq'
----------------------------------------------------------------------
Ran 1 test in 2.114s
FAILED (errors=1)
Хорошо ли структурирован мой тестовый класс? Я импортирую main_code, верно?
Я так не думаю из-за: AttributeError: модуль «My_app.application» не имеет атрибута «main_code»
Как я могу интегрировать технику макетирования для замены модуля awsgluedq другим кодом?
spark = SparkSession.builder.getOrCreate() # TODO replace this init with common method (waiting for S3 part) spark.conf.set('spark.sql.sources.partitionOverwriteMode', 'dynamic')
for table in list(stg.data_schema.keys()): raw_data = stg.data_schema[table].columns.to_dict()
if __name__ == '__main__': main(sys.argv) [/code] [b]упрощенная версия data_quality.py[/b] [code]import My_app.settings.config as stg from awsglue.context import GlueContext from awsglue.dynamicframe import DynamicFrame from awsgluedq.transforms import EvaluateDataQuality from pyspark.sql import SparkSession, Row from pyspark.sql import functions as F from pyspark.sql.dataframe import DataFrame from pyspark.sql.types import StructType, IntegerType, StringType, FloatType, DateType
def csvReader(spark: SparkSession, bucket: str, path: str, table: str, schema: StructType, sep: str) -> DataFrame: """Reads a CSV file as a Dataframe from S3 using user parameters for format.""" return ( spark.read.format('csv') .option('header', 'true') .option('sep', sep) .schema(schema) .load(f's3a://{bucket}/{path}/{table}.csv') )
def dataframeWriter( df: DataFrame, bucket: str, path: str, table: str, partition_key: str, mode: str = 'overwrite' ) -> None: """Writes a dataframe in S3 in parquet format using user parameters to define path and partition key.""" df.write.partitionBy(partition_key).mode(mode).parquet(f's3a://{bucket}/{path}/{table}/') [/code] [b]Что я хочу сделать[/b] Написать интеграционный тест для main_code.py, пока: [list] [*]Имитация функции csvReader и замените ее на local_csvReader. [*]Издевательство над dataframeWriter функцию и замените ее на local_dataframeWriter. [*]Имитация импорта из awsgluedq, чтобы избежать его локальной установки.
[b]Что я сделал:[/b] [b]test_main_code.py[/b] [code]"""Module that contains unit tests for My_app pre silver job."""
import os from unittest import TestCase from unittest.mock import patch, Mock
from pyspark.sql import SparkSession from pyspark.sql.types import StructType
def local_csvReader(spark: SparkSession, bu: str, pa: str, table: str, schema: StructType, sep: str): """Mocked function that replaces real csvReader. this one reads from local rather than S3.""" return ( spark.read.format('csv') .option('header', 'true') .option('sep', ';') .schema(schema) .load(f'./tests/integration_tests/input_mock/{table}.csv') )
def local_dataframeWriter(df, bu: str, pa: str, table: str, partition_key: str): """Mocked function that replaces real dataframeWriter. this one writes in local rather than S3.""" output_dir = f'./tests/integration_tests/output_mock/{table}/' if not os.path.exists(output_dir): os.makedirs(output_dir) df.write.partitionBy(partition_key).mode('overwrite').parquet(output_dir)
class IntegrationTest(TestCase): @classmethod def setUpClass(cls): cls.spark = SparkSession.builder.master('local').appName('TestPerfmarketSilver').getOrCreate() cls.spark.conf.set('spark.sql.sources.partitionOverwriteMode', 'dynamic')
@patch('My_app.application.main_code.getResolvedOptions') @patch('My_app.application.main_code.csvReader', side_effect=local_csvReader) @patch('My_app.application.main_code.dataframeWriter', side_effect=local_dataframeWriter) def test_main(self, mock_csvreader, mock_datawriter, mocked_get_resolved_options: Mock): """Test the main function with local CSV and Parquet output.""" import My_app.application.main_code as main_code
import My_app.settings.config as stg import tests.integration_tests.settings.config as stg_new
for table in stg.data_schema.keys(): # Verify that the output Parquet file is created output_path = f'./tests/integration_tests/output_mock/{table}/' self.assertTrue(os.path.exists(output_path))
# Read the written Parquet file and check the data written_df = self.spark.read.parquet(output_path) self.assertEqual(written_df.count(), expected_results[table]['nbRows']) # Check row count self.assertTrue( set( [column_data['bronze_name'] for column_data in stg.data_schema[table]['columns'].to_dict().values()] ) == set(written_df.columns) ) # Clean up os.system(f'rm -rf ./tests/integration_tests/output_mock/{table}/') [/code] [b]Вопросы:[/b] Выполняемый тестовый класс возвращается: [code]====================================================================== ERROR: test_main (tests.integration_tests.application.test_main_code.IntegrationTest) Test the main function with local CSV and Parquet output. ---------------------------------------------------------------------- Traceback (most recent call last): File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1248, in _dot_lookup return getattr(thing, comp) AttributeError: module 'My_app.application' has no attribute 'main_code'
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1376, in patched with self.decoration_helper(patched, File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/contextlib.py", line 135, in __enter__ return next(self.gen) File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1358, in decoration_helper arg = exit_stack.enter_context(patching) File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/contextlib.py", line 492, in enter_context result = _cm_type.__enter__(cm) File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1431, in __enter__ self.target = self.getter() File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1618, in getter = lambda: _importer(target) File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1261, in _importer thing = _dot_lookup(thing, comp, import_path) File "/Users/me/.asdf/installs/python/3.10.14/lib/python3.10/unittest/mock.py", line 1250, in _dot_lookup __import__(import_path) File "/Users/me/IdeaProjects/project_root/apps/project/src/My_app/application/main_code.py", line 10, in from My_app.infrastructure.data_quality import evaluateDataQuality, generateSchema File "/Users/me/IdeaProjects/project_root/apps/project/src/My_app/infrastructure/data_quality.py", line 4, in from awsgluedq.transforms import EvaluateDataQuality ModuleNotFoundError: No module named 'awsgluedq'
---------------------------------------------------------------------- Ran 1 test in 2.114s
FAILED (errors=1) [/code]
[*]Хорошо ли структурирован мой тестовый класс? Я импортирую main_code, верно? Я так не думаю из-за: AttributeError: модуль «My_app.application» не имеет атрибута «main_code»
Как я могу интегрировать технику макетирования для замены модуля awsgluedq другим кодом?
Я использую платформу Codeception и среду IDE Netbeans для автоматизации тестирования с использованием PHP.
Я хотел бы запустить 2 теста один за другим в группе, где сначала будет запущен тест API, а после успешного запуска теста API следующим...
Я пытаюсь запустить тест xunit с использованием тестовых контейнеров для проекта API dotnet 6. Экземпляр контейнера запущен, и я могу протестировать конечную точку в браузере и получить ответ, но проект xunit застрял на первом тесте без каких-либо...