У меня есть фрагмент файла JSON, который я нормализую и использую Record_path с мета-параметрами в других столбцах, и он работает нормально, но когда я загружаю весь файл, я получаю ключевую ошибку. Ниже приведен мой код для загрузки фрагмента и полной загрузки файла JSON.
Фрагмент работает
import pandas as pd
data = {
"meta": {
"disclaimer": "Do not rely on openFDA to make decisions regarding medical care. While we make every effort to ensure that data is accurate, you should assume all results are unvalidated. We may limit or otherwise restrict your access to the API in line with our Terms of Service.",
"terms": "https://open.fda.gov/terms/",
"license": "https://open.fda.gov/license/",
"last_updated": "2024-11-15",
"results": {
"skip": 0,
"limit": 2,
"total": 118943
}
},
"results": [
{
"product_ndc": "73647-062",
"generic_name": "MENTHOL, CAMPHOR",
"labeler_name": "Just Brands LLC",
"brand_name": "JUST CBD - CBD AND THC ULTRA RELIEF",
"active_ingredients": [
{
"name": "CAMPHOR (SYNTHETIC)",
"strength": "2 g/100g"
},
{
"name": "MENTHOL",
"strength": "6 g/100g"
}
],
"finished": True,
"packaging": [
{
"package_ndc": "73647-062-04",
"description": "113 g in 1 BOTTLE, PUMP (73647-062-04)",
"marketing_start_date": "20230314",
"sample": False
}
],
"listing_expiration_date": "20251231",
"openfda": {
"manufacturer_name": ["Just Brands LLC"],
"spl_set_id": ["f664eb79-8897-3a49-e053-2995a90a37b4"],
"is_original_packager": [True],
"unii": ["5TJD82A1ET", "L7T10EIP3A"]
},
"marketing_category": "OTC MONOGRAPH DRUG",
"dosage_form": "GEL",
"spl_id": "16c906dd-6989-9a79-e063-6394a90afa71",
"product_type": "HUMAN OTC DRUG",
"route": ["TOPICAL"],
"marketing_start_date": "20230314",
"product_id": "73647-062_16c906dd-6989-9a79-e063-6394a90afa71",
"application_number": "M017",
"brand_name_base": "JUST CBD - CBD AND THC ULTRA RELIEF"
},
{
"product_ndc": "0591-4039",
"marketing_end_date": "20250930",
"generic_name": "CLOBETASOL PROPIONATE",
"labeler_name": "Actavis Pharma, Inc.",
"brand_name": "CLOBETASOL PROPIONATE",
"active_ingredients": [
{
"name": "CLOBETASOL PROPIONATE",
"strength": ".05 g/mL"
}
],
"finished": True,
"packaging": [
{
"package_ndc": "0591-4039-46",
"description": "1 BOTTLE in 1 CARTON (0591-4039-46) / 59 mL in 1 BOTTLE",
"marketing_start_date": "20150828",
"marketing_end_date": "20250930",
"sample": False
},
{
"package_ndc": "0591-4039-74",
"description": "1 BOTTLE in 1 CARTON (0591-4039-74) / 125 mL in 1 BOTTLE",
"marketing_start_date": "20150828",
"marketing_end_date": "20250930",
"sample": False
}
],
"openfda": {
"manufacturer_name": ["Actavis Pharma, Inc."],
"rxcui": ["861512"],
"spl_set_id": ["907e425a-720a-4180-b97c-9e25008a3658"],
"is_original_packager": [True],
"unii": ["779619577M"]
},
"marketing_category": "NDA AUTHORIZED GENERIC",
"dosage_form": "SPRAY",
"spl_id": "33a56b8b-a9a6-4287-bbf4-d68ad0c59e07",
"product_type": "HUMAN PRESCRIPTION DRUG",
"route": ["TOPICAL"],
"marketing_start_date": "20150828",
"product_id": "0591-4039_33a56b8b-a9a6-4287-bbf4-d68ad0c59e07",
"application_number": "NDA021835",
"brand_name_base": "CLOBETASOL PROPIONATE",
"pharm_class": [
"Corticosteroid Hormone Receptor Agonists [MoA]",
"Corticosteroid [EPC]"
]
}
]
}
packaging_data = pd.json_normalize(
data['results'],
record_path=["packaging"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
active_ingredients_data = pd.json_normalize(
data['results'],
record_path=["active_ingredients"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
combined_data = pd.merge(
packaging_data,
active_ingredients_data,
on=['product_ndc', 'brand_name', 'generic_name'],
how='outer'
)
полная загрузка и загрузка файла JSON выдает ошибку ключа при использовании мета
import pandas as pd
import json
import requests, zipfile, io, os
cwd = os.getcwd()
zip_url = 'https://download.open.fda.gov/drug/ndc/ ... 1.json.zip'
r = requests.get(zip_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(cwd)
with open('drug-ndc-0001-of-0001.json', 'r') as file:
data = json.load(file)
packaging_data = pd.json_normalize(
data['results'],
record_path=["packaging"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
active_ingredients_data = pd.json_normalize(
data['results'],
record_path=["active_ingredients"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
combined_data = pd.merge(
packaging_data,
active_ingredients_data,
on=['product_ndc', 'brand_name', 'generic_name'],
how='outer'
)
Подробнее здесь: https://stackoverflow.com/questions/792 ... d-does-not