Title: | Retrieve Data on European Union Law |
---|---|
Description: | Access to data on European Union laws and court decisions made easy with pre-defined 'SPARQL' queries and 'GET' requests. See Ovadek (2021) <doi:10.1080/2474736X.2020.1870150> . |
Authors: | Michal Ovadek [aut, cre, cph] |
Maintainer: | Michal Ovadek <[email protected]> |
License: | GPL-3 |
Version: | 0.4.8 |
Built: | 2025-01-24 04:43:07 UTC |
Source: | https://github.com/michalovadek/eurlex |
Harvests data from lists of EU court cases from curia.europa.eu. CELEX identifiers are extracted from hyperlinks where available.
elx_curia_list( data = c("all", "ecj_old", "ecj_new", "gc_all", "cst_all"), parse = TRUE )
elx_curia_list( data = c("all", "ecj_old", "ecj_new", "gc_all", "cst_all"), parse = TRUE )
data |
Data to be scraped from four separate lists of cases maintained by Curia, defaults to "all" which contains cases from Court of Justice, General Court and Civil Service Tribunal. |
parse |
If |
A data frame containing case identifiers and information as character columns. Where the case id contains a hyperlink to Eur-Lex, the CELEX identifier is retrieved as well. Hyperlinks to Eur-Lex disappeared from more recent cases.
elx_curia_list(data = "cst_all", parse = FALSE)
elx_curia_list(data = "cst_all", parse = FALSE)
Downloads an XML notice of a given type associated with a Cellar resource.
elx_download_xml( url, file = paste(basename(url), ".xml", sep = ""), notice = c("tree", "branch", "object"), language_1 = "en", language_2 = "fr", language_3 = "de", mode = "wb" )
elx_download_xml( url, file = paste(basename(url), ".xml", sep = ""), notice = c("tree", "branch", "object"), language_1 = "en", language_2 = "fr", language_3 = "de", mode = "wb" )
url |
A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI. |
file |
A character string with the name where the downloaded file is saved. |
notice |
The type of notice requested controls what kind of metadata are returned. |
language_1 |
The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code |
language_2 |
If data not available in |
language_3 |
If data not available in |
mode |
A character string specifying the mode with which to write the file. Useful values are "w", "wb" (binary), "a" (append) and "ab". |
To retrieve all identifiers associated with a url, use elx_fetch_data(type = "ids").
Path of downloaded file (invisibly) if server validates request (http status code has to be 200). For more information about notices, see Cellar documentation.
temploc <- paste(tempdir(), "elxnotice.xml", sep = "\\") elx_download_xml(url = "http://publications.europa.eu/resource/celex/32022D0154", file = temploc, notice = "object") unlink(temploc)
temploc <- paste(tempdir(), "elxnotice.xml", sep = "\\") elx_download_xml(url = "http://publications.europa.eu/resource/celex/32022D0154", file = temploc, notice = "object") unlink(temploc)
Get titles, texts, identifiers and XML notices for EU resources.
elx_fetch_data( url, type = c("title", "text", "ids", "notice"), notice = c("tree", "branch", "object"), language_1 = "en", language_2 = "fr", language_3 = "de", include_breaks = TRUE, html_text = c("text2", "text") )
elx_fetch_data( url, type = c("title", "text", "ids", "notice"), notice = c("tree", "branch", "object"), language_1 = "en", language_2 = "fr", language_3 = "de", include_breaks = TRUE, html_text = c("text2", "text") )
url |
A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI. |
type |
The type of data to be retrieved. When type = "text", the returned list contains named elements reflecting the source of each text. When type = "notice", the results return an XML notice associated with the url. |
notice |
If type = "notice", controls what kind of metadata are returned by the notice. |
language_1 |
The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code |
language_2 |
If data not available in |
language_3 |
If data not available in |
include_breaks |
If TRUE, text includes tags showing where pages ("—pagebreak—", for pdfs) and documents ("—documentbreak—") were concatenated |
html_text |
Choose whether to read text from html using |
A character vector of length one containing the result. When type = "text"
, named character vector where the name contains the source of the text.
elx_fetch_data(url = "http://publications.europa.eu/resource/celex/32014R0001", type = "title")
elx_fetch_data(url = "http://publications.europa.eu/resource/celex/32014R0001", type = "title")
Create a look-up table with labels for EuroVoc concept URIs. Only unique identifiers are returned.
elx_label_eurovoc(uri_eurovoc = "", alt_labels = FALSE, language = "en")
elx_label_eurovoc(uri_eurovoc = "", alt_labels = FALSE, language = "en")
uri_eurovoc |
Character vector with valid EuroVoc URIs |
alt_labels |
If |
language |
Language in which to return the labels, in ISO 639 2-char code |
A tibble
containing EuroVoc unique concept identifiers and labels.
elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", language = "fr")
elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", language = "fr")
Generates pre-defined or manual SPARQL queries to retrieve document ids from Cellar. List of available resource types: http://publications.europa.eu/resource/authority/resource-type . Note that not all resource types are compatible with default parameter values.
elx_make_query( resource_type = c("any", "directive", "regulation", "decision", "recommendation", "intagr", "caselaw", "manual", "proposal", "national_impl"), manual_type = "", directory = NULL, sector = NULL, include_corrigenda = FALSE, include_celex = TRUE, include_lbs = FALSE, include_date = FALSE, include_date_force = FALSE, include_date_endvalid = FALSE, include_date_transpos = FALSE, include_date_lodged = FALSE, include_force = FALSE, include_eurovoc = FALSE, include_citations = FALSE, include_citations_detailed = FALSE, include_author = FALSE, include_directory = FALSE, include_directory_code = FALSE, include_sector = FALSE, include_ecli = FALSE, include_court_procedure = FALSE, include_judge_rapporteur = FALSE, include_advocate_general = FALSE, include_court_formation = FALSE, include_court_scholarship = FALSE, include_court_origin = FALSE, include_original_language = FALSE, include_proposal = FALSE, order = FALSE, limit = NULL )
elx_make_query( resource_type = c("any", "directive", "regulation", "decision", "recommendation", "intagr", "caselaw", "manual", "proposal", "national_impl"), manual_type = "", directory = NULL, sector = NULL, include_corrigenda = FALSE, include_celex = TRUE, include_lbs = FALSE, include_date = FALSE, include_date_force = FALSE, include_date_endvalid = FALSE, include_date_transpos = FALSE, include_date_lodged = FALSE, include_force = FALSE, include_eurovoc = FALSE, include_citations = FALSE, include_citations_detailed = FALSE, include_author = FALSE, include_directory = FALSE, include_directory_code = FALSE, include_sector = FALSE, include_ecli = FALSE, include_court_procedure = FALSE, include_judge_rapporteur = FALSE, include_advocate_general = FALSE, include_court_formation = FALSE, include_court_scholarship = FALSE, include_court_origin = FALSE, include_original_language = FALSE, include_proposal = FALSE, order = FALSE, limit = NULL )
resource_type |
Type of resource to be retrieved via SPARQL query |
manual_type |
Define manually the type of resource to be retrieved |
directory |
Restrict the results to a given directory code |
sector |
Restrict the results to a given sector code |
include_corrigenda |
If |
include_celex |
If |
include_lbs |
If |
include_date |
If |
include_date_force |
If |
include_date_endvalid |
If |
include_date_transpos |
If |
include_date_lodged |
If |
include_force |
If |
include_eurovoc |
If |
include_citations |
If |
include_citations_detailed |
If |
include_author |
If |
include_directory |
If |
include_directory_code |
If |
include_sector |
If |
include_ecli |
If |
include_court_procedure |
If |
include_judge_rapporteur |
If |
include_advocate_general |
If |
include_court_formation |
If |
include_court_scholarship |
If |
include_court_origin |
If |
include_original_language |
If |
include_proposal |
If |
order |
Order results by ids |
limit |
Limit the number of results, for testing purposes mainly |
A character string containing the SPARQL query
elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) elx_make_query(resource_type = "regulation", include_corrigenda = TRUE, order = TRUE) elx_make_query(resource_type = "any", sector = 2) elx_make_query(resource_type = "manual", manual_type = "SWD")
elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) elx_make_query(resource_type = "regulation", include_corrigenda = TRUE, order = TRUE) elx_make_query(resource_type = "any", sector = 2) elx_make_query(resource_type = "manual", manual_type = "SWD")
Executes cURL request to a pre-defined endpoint of the EU Publications Office. Relies on elx_make_query to generate valid SPARQL queries. Results are capped at 1 million rows.
elx_run_query( query = "", endpoint = "http://publications.europa.eu/webapi/rdf/sparql" )
elx_run_query( query = "", endpoint = "http://publications.europa.eu/webapi/rdf/sparql" )
query |
A valid SPARQL query specified by |
endpoint |
SPARQL endpoint |
A data frame containing the results of the SPARQL query.
Column work
contains the Cellar URI of the resource.
elx_run_query(elx_make_query("directive", include_force = TRUE, limit = 10))
elx_run_query(elx_make_query("directive", include_force = TRUE, limit = 10))