Spaces:
Build error
Build error
File size: 1,399 Bytes
965ac15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
#!/bin/bash
# Specify the number of articles to download
limit=10
# Fetch the list of articles with metadata in XML format
response=$(curl -s "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf&limit=$limit")
# Parse each record in the response
echo "$response" | while read -r line; do
# Extract the PMC ID
if [[ $line =~ id=\"(PMC[0-9]+)\" ]]; then
pmc_id="${BASH_REMATCH[1]}"
echo "Processing article ID: $pmc_id"
# Extract the title for metadata
title=$(echo "$response" | sed -n "/<record id=\"$pmc_id\"/,/<\/record>/p" | sed -n 's/.*citation="\(.*\)".*/\1/p')
# Extract the PDF link for download
pdf_link=$(echo "$response" | sed -n "/<record id=\"$pmc_id\"/,/<\/record>/p" | sed -n 's/.*<link format="pdf"[^>]* href="\([^"]*\)".*/\1/p')
# Check if we found a PDF link
if [[ -n $pdf_link ]]; then
# Print metadata
echo "Title: $title"
echo "Downloading PDF from: $pdf_link"
# Download the PDF
curl -O "$pdf_link"
# Optional: Save metadata to a file
echo "Title: $title" >> metadata.txt
echo "PDF Link: $pdf_link" >> metadata.txt
echo "---------------------" >> metadata.txt
else
echo "No PDF link found for article ID: $pmc_id"
fi
fi
done
|