1 回答
TA贡献1799条经验 获得超8个赞
由于某种原因,无论是 R api 还是 Python API,每个人似乎都有相同的问题。我找到了一种解决方法来获得相同的结果。它很慢,但它可以完成工作。如果你的结果小于 10k,你可以使用 Selenium 来获取 pubmedid。否则,我们可以使用下面的代码来抓取数据。我希望这对将来的人有帮助。
import requests
# # Custom Date Range
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=dates.2009/01/01-2020/03/01&format=pmid&sort=pubdate&size=200&page={}".format(i))
# # Custom Year Range
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=years.2010-2019&format=pmid&sort=pubdate&size=200&page={}".format(i))
# #Relative Date
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=datesearch.y_1&format=pmid&sort=pubdate&size=200&page={}".format(i))
# # filter language
# # &filter=lang.english
# # filter human
# #&filter=hum_ani.humans
# Systematic Review
#&filter=pubt.systematicreview
# Case Reports
# &filter=pubt.casereports
# Age
# &filter=age.newborn
search = "covid lungs"
# search_list = "+".join(search.split(' '))
def id_retriever(search_string):
string = "+".join(search_string.split(' '))
result = []
old_result = len(result)
for page in range(1,10000000):
req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term={string}&format=pmid&sort=pubdate&size=200&page={page}".format(page=page,string=string))
for j in req.iter_lines():
decoded = j.decode("utf-8").strip(" ")
length = len(decoded)
if "log_displayeduids" in decoded and length > 46:
data = (str(j).split('"')[-2].split(","))
result = result + data
data = []
new_result = len(result)
if new_result != old_result:
old_result = new_result
else:
break
return result
ids=id_retriever(search)
len(ids)
添加回答
举报