Spaces:
Build error
Build error
# Specify the number of articles to download | |
limit=10 | |
# Fetch the list of articles with metadata in XML format | |
response=$(curl -s "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf&limit=$limit") | |
# Parse each record in the response | |
echo "$response" | while read -r line; do | |
# Extract the PMC ID | |
if [[ $line =~ id=\"(PMC[0-9]+)\" ]]; then | |
pmc_id="${BASH_REMATCH[1]}" | |
echo "Processing article ID: $pmc_id" | |
# Extract the title for metadata | |
title=$(echo "$response" | sed -n "/<record id=\"$pmc_id\"/,/<\/record>/p" | sed -n 's/.*citation="\(.*\)".*/\1/p') | |
# Extract the PDF link for download | |
pdf_link=$(echo "$response" | sed -n "/<record id=\"$pmc_id\"/,/<\/record>/p" | sed -n 's/.*<link format="pdf"[^>]* href="\([^"]*\)".*/\1/p') | |
# Check if we found a PDF link | |
if [[ -n $pdf_link ]]; then | |
# Print metadata | |
echo "Title: $title" | |
echo "Downloading PDF from: $pdf_link" | |
# Download the PDF | |
curl -O "$pdf_link" | |
# Optional: Save metadata to a file | |
echo "Title: $title" >> metadata.txt | |
echo "PDF Link: $pdf_link" >> metadata.txt | |
echo "---------------------" >> metadata.txt | |
else | |
echo "No PDF link found for article ID: $pmc_id" | |
fi | |
fi | |
done | |