|
import sys |
|
import pandas as pd |
|
|
|
from SPARQLWrapper import SPARQLWrapper, JSON |
|
|
|
|
|
endpoint_url = "https://query.wikidata.org/sparql" |
|
|
|
query = """ |
|
# Places of residence of accused witches in Scotland 1563-1736 |
|
SELECT ?accusedurl ?item ?itemLabel ?residenceLabel ?genderLabel ?occupationLabel ?classLabel ?manner_of_inhumane_treatmentLabel ?place_of_detentionLabel ?cause_of_deathLabel ?fatherLabel ?motherLabel ?siblingLabel ?childLabel ?spouseLabel ?coords WHERE { |
|
?item wdt:P31 wd:Q5; |
|
wdt:P4478 ?accused. |
|
wd:P4478 wdt:P1630 ?formatterurl. |
|
BIND(IRI(REPLACE(?accused, "^(.+)$", ?formatterurl)) AS ?accusedurl) |
|
?item wdt:P551 ?residence. |
|
?residence wdt:P625 ?coords. |
|
|
|
OPTIONAL { ?item wdt:P21 ?gender. } |
|
OPTIONAL { ?item wdt:P106 ?occupation. } |
|
OPTIONAL { ?item wdt:P3716 ?class. } |
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } |
|
OPTIONAL { ?item wdt:P7160 ?manner_of_inhumane_treatment. } |
|
OPTIONAL { ?item wdt:P2632 ?place_of_detention. } |
|
OPTIONAL { ?item wdt:P509 ?cause_of_death. } |
|
OPTIONAL { ?item wdt:P22 ?father. } |
|
OPTIONAL { ?item wdt:P25 ?mother. } |
|
OPTIONAL { ?item wdt:P3373 ?sibling. } |
|
OPTIONAL { ?item wdt:P40 ?child. } |
|
OPTIONAL { ?item wdt:P26 ?spouse. } |
|
OPTIONAL { ?item wdt:P551 ?residence. } |
|
} |
|
""" |
|
|
|
|
|
def get_results(endpoint_url, query): |
|
""" |
|
Obtain SPARQL query results. |
|
""" |
|
user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1]) |
|
|
|
sparql = SPARQLWrapper(endpoint_url, agent=user_agent) |
|
sparql.setQuery(query) |
|
sparql.setReturnFormat(JSON) |
|
return sparql.query().convert() |
|
|
|
|
|
def load_data(): |
|
""" |
|
Obtain data for accused witches charged with witchcraft. |
|
""" |
|
lst = [] |
|
results = get_results(endpoint_url, query) |
|
for result in results["results"]["bindings"]: |
|
d = {} |
|
for k, v in result.items(): |
|
d[k] = v['value'] |
|
lst.append(d) |
|
data = pd.DataFrame(lst) |
|
data.dropna(subset=['siblingLabel', 'spouseLabel', 'childLabel', 'fatherLabel', 'motherLabel'], how="all", inplace=True) |
|
data['longitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[0].lstrip("(")).astype(float) |
|
data['latitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[-1].rstrip(")")).astype(float) |
|
data.drop(['coords'], axis=1, inplace=True) |
|
data.columns = [col.replace("Label", "") if col != "itemLabel" else col for col in data.columns.tolist()] |
|
return data |
|
|
|
|
|
df = load_data() |
|
|