Pytest-recording/VCR для S3: IncompleteReadError (но только иногда?)

Pytest-recording/VCR для S3: IncompleteReadError (но только иногда?) ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Pytest-recording/VCR для S3: IncompleteReadError (но только иногда?)

Цитата

Сообщение Anonymous » 22 ноя 2024, 21:28

Хочу использовать pytest-recording в своих тестах, которые включают подключение и загрузку данных из S3.
Я импортирую все функции из тестируемого скрипта. Здесь используются переменные среды prod, но только для проверки загрузки и чтения данных из S3 (не выгрузки). В REPL тот же код работает нормально — я могу подключиться к своему экземпляру S3, прочитать и загрузить данные, находящиеся на нем.
Теперь в своем наборе тестов я сохраняю получение ошибки IncompleteReadError при запуске pytest --record-mode=once. Это происходит независимо от того, удаляю ли я существующие кассеты или нет.
Вот оригинальные функции, которые я тестирую:

Код: Выделить всё

def s3_connect_get_files(
validated_target_params: dict,
) -> Tuple[s3fs.S3FileSystem, List[str]]:
"""
connects to our s3 instance, returning
our bucket s3fs object and a list of the files
in the target infile directory specified in
our data model for the target.

returns:
- Tuple(the_bucket (s3fs obj), files (list) )

raises:
- FileNotFoundError if we can't find the dir
on our s3
"""
try:
the_bucket = s3fs.S3FileSystem(
key=AWS_ACCESS_KEY_ID,
secret=AWS_SECRET_ACCESS_KEY,
client_kwargs={"endpoint_url": validated_target_params["endpoint_url"]},
)
files = the_bucket.glob(
f"{validated_target_params['in_path']}/"
f"{validated_target_params['glob_pattern']}"
)
return the_bucket, files
except FileNotFoundError as e:
logger.error("could not connect to s3. check credentials!")
logger.error(f"original error type: {type(e).__name__}")
logger.error(f"original error message: {e}")
raise

def read_files(
the_bucket: s3fs.S3FileSystem, files: List[str], validated_target_params: dict
) -> dict:
"""reads all files into memory

raises:
- NotImplementedError; if we encounter a reader
type we haven't defined yet.
"""
records = {}

logger.info("Now reading data to be validated and de-duped.")
for file in files:
if (
validated_target_params["reader"].value == "pandas"
):  # we need to call value as we're using an Enum
try:
df = pd.read_csv(the_bucket.open(file))
file_last_modified = the_bucket.info(file).get("LastModified")

df["file_last_modified"] = file_last_modified

records[file] = {
"data": df,
"last_modified_at": file_last_modified,
}
logger.info(f"Loaded {file} with {len(df)} rows using pandas")
except Exception as e:
logger.error(f"original error type: {type(e).__name__}")
logger.error(f"original error message: {e}")
raise

else:
raise NotImplementedError(
f"{validated_target_params['reader']} is not yet implemented as a reader."
)

return records

и вот соответствующие тесты:

Код: Выделить всё

@pytest.mark.vcr()
def test_s3_connect_get_files(validated_target_params) -> None:
"""test s3 connection and file retrieval"""
the_bucket, files = s3_connect_get_files(validated_target_params)
assert isinstance(the_bucket, S3FileSystem)
assert isinstance(files, list)

@pytest.mark.vcr()
def test_read_files_pandas(validated_target_params) -> None:
"""test reading files using pandas"""
the_bucket, files = s3_connect_get_files(validated_target_params)
records = read_files(the_bucket, files, validated_target_params)
assert isinstance(records, dict)
assert len(records) == len(files)
assert all(isinstance(df["data"], pd.DataFrame) for df in records.values())

Если я закомментирую test_read_files_pandas, все работает нормально и все тесты проходят успешно. Если я оставлю его, он неизбежно потерпит неудачу, вот так:

Код: Выделить всё

E           botocore.exceptions.IncompleteReadError: 0 read, but total bytes expected is 6163243.

.venv/lib/python3.11/site-packages/aiobotocore/response.py:125: IncompleteReadError

Я новичок в записи pytest и, честно говоря, не лучший писатель тестов. Итак, я приношу свои извинения, если допустил глупую ошибку, и буду очень признателен за любые подсказки о том, как пройти эти тесты или изменить мои функции.

Подробнее здесь: https://stackoverflow.com/questions/792 ... -sometimes

1732300102

Anonymous

Хочу использовать pytest-recording в своих тестах, которые включают подключение и загрузку данных из S3.
Я импортирую все функции из тестируемого скрипта. Здесь используются переменные среды prod, но только для проверки загрузки и чтения данных из S3 (не выгрузки). В REPL тот же код работает нормально — я могу подключиться к своему экземпляру S3, прочитать и загрузить данные, находящиеся на нем.
Теперь в своем наборе тестов я сохраняю получение ошибки IncompleteReadError при запуске pytest --record-mode=once. Это происходит независимо от того, удаляю ли я существующие кассеты или нет.
Вот оригинальные функции, которые я тестирую:
[code]def s3_connect_get_files(
validated_target_params: dict,
) -> Tuple[s3fs.S3FileSystem, List[str]]:
"""
connects to our s3 instance, returning
our bucket s3fs object and a list of the files
in the target infile directory specified in
our data model for the target.

returns:
- Tuple(the_bucket (s3fs obj), files (list) )

raises:
- FileNotFoundError if we can't find the dir
on our s3
"""
try:
the_bucket = s3fs.S3FileSystem(
key=AWS_ACCESS_KEY_ID,
secret=AWS_SECRET_ACCESS_KEY,
client_kwargs={"endpoint_url": validated_target_params["endpoint_url"]},
)
files = the_bucket.glob(
f"{validated_target_params['in_path']}/"
f"{validated_target_params['glob_pattern']}"
)
return the_bucket, files
except FileNotFoundError as e:
logger.error("could not connect to s3. check credentials!")
logger.error(f"original error type: {type(e).__name__}")
logger.error(f"original error message: {e}")
raise

def read_files(
the_bucket: s3fs.S3FileSystem, files: List[str], validated_target_params: dict
) -> dict:
"""reads all files into memory

raises:
- NotImplementedError; if we encounter a reader
type we haven't defined yet.
"""
records = {}

logger.info("Now reading data to be validated and de-duped.")
for file in files:
if (
validated_target_params["reader"].value == "pandas"
):  # we need to call value as we're using an Enum
try:
df = pd.read_csv(the_bucket.open(file))
file_last_modified = the_bucket.info(file).get("LastModified")

df["file_last_modified"] = file_last_modified

records[file] = {
"data": df,
"last_modified_at": file_last_modified,
}
logger.info(f"Loaded {file} with {len(df)} rows using pandas")
except Exception as e:
logger.error(f"original error type: {type(e).__name__}")
logger.error(f"original error message: {e}")
raise

else:
raise NotImplementedError(
f"{validated_target_params['reader']} is not yet implemented as a reader."
)

return records
[/code]
и вот соответствующие тесты:
[code]@pytest.mark.vcr()
def test_s3_connect_get_files(validated_target_params) -> None:
"""test s3 connection and file retrieval"""
the_bucket, files = s3_connect_get_files(validated_target_params)
assert isinstance(the_bucket, S3FileSystem)
assert isinstance(files, list)

@pytest.mark.vcr()
def test_read_files_pandas(validated_target_params) -> None:
"""test reading files using pandas"""
the_bucket, files = s3_connect_get_files(validated_target_params)
records = read_files(the_bucket, files, validated_target_params)
assert isinstance(records, dict)
assert len(records) == len(files)
assert all(isinstance(df["data"], pd.DataFrame) for df in records.values())
[/code]
Если я закомментирую test_read_files_pandas, все работает нормально и все тесты проходят успешно.  Если я оставлю его, он неизбежно потерпит неудачу, вот так:
[code]E           botocore.exceptions.IncompleteReadError: 0 read, but total bytes expected is 6163243.

.venv/lib/python3.11/site-packages/aiobotocore/response.py:125: IncompleteReadError
[/code]
Я новичок в записи pytest и, честно говоря, не лучший писатель тестов. Итак, я приношу свои извинения, если допустил глупую ошибку, и буду очень признателен за любые подсказки о том, как пройти эти тесты или изменить мои функции. 

Подробнее здесь: [url]https://stackoverflow.com/questions/79216188/pytest-recording-vcr-for-s3-incompletereaderror-but-only-sometimes[/url]