Вот код:
Код: Выделить всё
from Bio import Entrez
from Bio import Medline
import datetime
import pandas as pd
Entrez.email = 'example@example.com'
search_results = Entrez.read(
Entrez.esearch(
db="pubmed", term='Autism AND has abstract[FILT]', mindate = '2012/12/31', maxdate = '3000/12/12', usehistory="y"
)
)
count = int(search_results["Count"])
print("Found %i results" % count)
webEnv = search_results["WebEnv"]
queryKey = search_results["QueryKey"]
handle = Entrez.efetch(db="pubmed", retmode="text", rettype='medline', webenv=webEnv, query_key=queryKey)
data = handle.read()
with open('./abstracts_medline.txt', 'w', encoding="utf-8") as f:
f.write(data)
# convert in dataframes
pmids = []
years = []
months = []
days = []
titles = []
abstracts = []
with open('./abstracts_medline.txt', 'r', encoding="utf-8") as f:
medline_rec = Medline.parse(f)
for record in medline_rec:
if('AB' in record.keys()):
pmids.append(record['PMID'])
try:
years.append(int(record['PHST'][-1].split('/')[0]))
months.append(int(record['PHST'][-1].split('/')[1]))
days.append(int(record['PHST'][-1].split('/')[2].split(' ')[0]))
except KeyError:
years.append(None)
months.append(None)
days.append(None)
try:
titles.append(record['TI'])
except KeyError:
titles.append(None)
abstracts.append(record['AB'])
df = pd.DataFrame(
{
'PMID': pmids,
'Year': years,
'Month': months,
'Day': days,
'Title': titles,
'Abstract': abstracts
}
)
# filter for NA
df = df.dropna()
# change format
df['Year'] = df['Year'].astype(int)
df['Month'] = df['Month'].astype(int)
df['Day'] = df['Day'].astype(int)
count = df.shape[0]
print("Fetched %i available abstracts" % count)
docs = list(df['Abstract'])
timestamps = [datetime.date(i,j,k) for i,j,k in zip(df['Year'], df['Month'], df['Day'])]
print('Read %i abstracts' % len(timestamps))
Код: Выделить всё
Found 55574 results
Fetched 9946 available abstracts
Read 9946 abstracts
Подробнее здесь: https://stackoverflow.com/questions/793 ... rom-pubmed