Commit beec9948 authored by Sébastien Galais's avatar Sébastien Galais
Browse files

Many changes

Version 0.6.0.\nUpdate NEWS.\nAdd clearer error message to the function to_xts() (@Martin Feldkircher, 20200326 email).\nTwo new functions rdb_datasets() and rdb_dimensions().\nUpdate vignette.
parent 7c4a5a24
Pipeline #134998 passed with stage
in 10 minutes
Package: rdbnomics
Type: Package
Title: Download DBnomics Data
Version: 0.5.2
Version: 0.6.0
Authors@R: c(person("Sebastien", "Galais", role = c("cre", "ctb"),
email = "s915.stem@gmail.com"),
person("Thomas", "Brand", role = c("aut"),
......@@ -11,15 +11,15 @@ Description: R access to hundreds of millions data series from DBnomics API
Depends:
R (>= 3.1.0)
License: AGPL-3
URL: https://github.com/dbnomics/rdbnomics
BugReports: https://github.com/dbnomics/rdbnomics/issues
URL: https://git.nomics.world/dbnomics/rdbnomics
BugReports: https://git.nomics.world/dbnomics/rdbnomics/issues
Encoding: UTF-8
LazyData: true
Imports:
curl,
jsonlite,
data.table
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
Suggests:
knitr,
rmarkdown,
......
......@@ -3,6 +3,8 @@
export(dbnomics)
export(rdb)
export(rdb_by_api_link)
export(rdb_datasets)
export(rdb_dimensions)
export(rdb_last_updates)
export(rdb_providers)
import(curl)
......
# rdbnomics 0.6.0
* New function `rdb_datasets()` to request the available datasets of the
providers (@fmgithub2017, #3 github).
* New function `rdb_dimensions()` to request the list of the dimensions of
the available datasets of the providers (@fmgithub2017, #3 github).
* Add a clearer error message to the function to_xts() in README (Martin
Feldkircher, 20200326 email).
# rdbnomics 0.5.2
* Correction of a bug in the internal function `deploy`. The cases
......
#' Download list of datasets for DBnomics providers.
#'
#' \code{rdb_datasets} downloads the list of available datasets for a selection
#' of providers (or all of them) from \href{https://db.nomics.world/}{DBnomics}.
#'
#' By default, the function returns a named list of \code{data.table}s
#' containing the datasets of the providers from
#' \href{https://db.nomics.world/}{DBnomics}.
#'
#' @param provider_code Character string (default \code{NULL}). DBnomics code
#' of one or multiple providers. If \code{NULL}, the providers are firstly
#' dowloaded with the function \code{\link{rdb_providers}} and then the
#' available datasets are requested.
#' @param use_readLines Logical (default \code{FALSE}). If \code{TRUE}, then
#' the data are requested and read with the base function \code{readLines} i.e.
#' through the default R internet connection. This can be used to get round the
#' error \code{Could not resolve host: api.db.nomics.world}.
#' @param curl_config Named list (default \code{NULL}). If not
#' \code{NULL}, it is used to configure a proxy connection. This
#' configuration is passed to the function \code{curl_fetch_memory} of the package
#' \pkg{curl}. A temporary \code{curl_handle} object is created internally
#' with arguments equal to the provided list in \code{curl_config}.\cr
#' For \code{curl_fetch_memory} arguments see \code{\link[curl]{curl_fetch}}.
#' For available curl options see \code{\link[curl]{curl_options}},
#' \code{names(curl_options())} and
#' \href{https://curl.haxx.se/libcurl/c/curl_easy_setopt.html}{libcurl}.
#' @param simplify Logical (default \code{FALSE}). If \code{TRUE}, when the
#' datasets are requested for only one provider then a \code{data.table}
#' is returned, not a list of \code{data.table}s.
#' @return A named list of \code{data.table}s or a \code{data.table}.
#' @examples
#' \dontrun{
#' rdb_datasets(provider_code = "IMF")
#'
#' rdb_datasets(provider_code = "IMF", simplify = TRUE)
#'
#' rdb_datasets(provider_code = c("IMF", "BDF"))
#'
#' options(rdbnomics.progress_bar_datasets = TRUE)
#' rdb_datasets()
#' options(rdbnomics.progress_bar_datasets = FALSE)
#'
#'
#' rdb_datasets(provider_code = "IMF", use_readLines = TRUE)
#'
#' rdb_datasets(
#' provider_code = "IMF",
#' curl_config = list(proxy = "<proxy>", proxyport = <port>)
#' )
#' }
#' @seealso \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
#' \code{\link{rdb_dimensions}}
#' @author Sebastien Galais
#' @export
rdb_datasets <- function(
provider_code = NULL,
use_readLines = getOption("rdbnomics.use_readLines"),
curl_config = getOption("rdbnomics.curl_config"),
simplify = FALSE
) {
# TO CHECK THE DATASETS
# parse_web <- sapply(provider_code, function(x) {
# y <- readLines(paste0("https://db.nomics.world/", x))
# y <- y[grepl(paste0("<a rel=\"prefetch\" href=\"/", x), y)]
# y <- gsub(paste0(".*href=\"/", x, "/"), "", y)
# gsub("\".*", "", y)
# }, simplify = FALSE)
# All providers
if (is.null(provider_code)) {
provider_code <- rdb_providers(
code = TRUE,
use_readLines = use_readLines, curl_config = curl_config
)
}
# Checking arguments
check_argument(provider_code, "character", len = FALSE)
check_argument(use_readLines, "logical")
check_argument(simplify, "logical")
# Setting API url
api_base_url <- getOption("rdbnomics.api_base_url")
check_argument(api_base_url, "character")
# Setting API version
api_version <- getOption("rdbnomics.api_version")
check_argument(api_version, c("numeric", "integer"))
authorized_version(api_version)
# Fetching all datasets
if (getOption("rdbnomics.progress_bar_datasets")) {
pb <- utils::txtProgressBar(min = 0, max = length(provider_code), style = 3)
}
datasets <- sapply(seq_along(provider_code), function(i) {
tryCatch({
pc <- provider_code[i]
tmp <- paste0(api_base_url, "/v", api_version, "/providers/", pc)
tmp <- get_data(tmp, use_readLines, curl_config)
tmp <- tmp$category_tree
tmp <- unpack(tmp)
tmp <- rbindlist_recursive(tmp)
tmp <- tmp[, .(code, name)]
tmp <- unique(tmp)
if (getOption("rdbnomics.progress_bar_datasets")) {
utils::setTxtProgressBar(pb, i)
}
tmp[order(code)]
}, error = function(e) {
NULL
})
}, simplify = FALSE)
if (getOption("rdbnomics.progress_bar_datasets")) { close(pb) }
datasets <- stats::setNames(datasets, provider_code)
datasets <- Filter(Negate(is.null), datasets)
datasets <- check_datasets(datasets)
if (length(datasets) <= 0) {
warning(
"Error when fetching the datasets codes.",
call. = FALSE
)
return(NULL)
}
if (simplify) {
if (length(datasets) == 1) {
return(datasets[[1]])
}
}
datasets
}
#' Download list of dimensions for datasets of DBnomics providers.
#'
#' \code{rdb_dimensions} downloads the list of dimensions (if they exist) for
#' available datasets of a selection of providers from
#' \href{https://db.nomics.world/}{DBnomics}.
#'
#' By default, the function returns a nested named list of \code{data.table}s
#' containing the dimensions of datasets for providers from
#' \href{https://db.nomics.world/}{DBnomics}.
#'
#' @param provider_code Character string (default \code{NULL}). DBnomics code
#' of one or multiple providers. If \code{NULL}, the providers are firstly
#' dowloaded with the function \code{\link{rdb_providers}} and then the
#' datasets are requested.
#' @param dataset_code Character string (default \code{NULL}). DBnomics code
#' of one or multiple datasets of a provider. If \code{NULL}, the datasets
#' codes are dowloaded with the function \code{\link{rdb_datasets}} and then
#' the dimensions are requested.
#' @param use_readLines Logical (default \code{FALSE}). If \code{TRUE}, then
#' the data are requested and read with the base function \code{readLines} i.e.
#' through the default R internet connection. This can be used to get round the
#' error \code{Could not resolve host: api.db.nomics.world}.
#' @param curl_config Named list (default \code{NULL}). If not
#' \code{NULL}, it is used to configure a proxy connection. This
#' configuration is passed to the function \code{curl_fetch_memory} of the package
#' \pkg{curl}. A temporary \code{curl_handle} object is created internally
#' with arguments equal to the provided list in \code{curl_config}.\cr
#' For \code{curl_fetch_memory} arguments see \code{\link[curl]{curl_fetch}}.
#' For available curl options see \code{\link[curl]{curl_options}},
#' \code{names(curl_options())} and
#' \href{https://curl.haxx.se/libcurl/c/curl_easy_setopt.html}{libcurl}.
#' @param simplify Logical (default \code{FALSE}). If \code{TRUE}, when the
#' dimensions are requested for only one provider and one dataset then a
#' named list of \code{data.table}s is returned, not a nested named list of
#' \code{data.table}s.
#' @return A nested named list of \code{data.table}s or a named list of
#' \code{data.table}s.
#' @examples
#' \dontrun{
#' rdb_dimensions(provider_code = "IMF", dataset_code = "WEO")
#'
#' rdb_dimensions(provider_code = "IMF", dataset_code = "WEO", simplify = TRUE)
#'
#' rdb_dimensions(provider_code = "IMF")
#'
#' # /!\ It is very long !
#' options(rdbnomics.progress_bar_dimensions = TRUE)
#' rdb_dimensions()
#' options(rdbnomics.progress_bar_dimensions = FALSE)
#'
#' rdb_dimensions(
#' provider_code = "IMF", dataset_code = "WEO",
#' use_readLines = TRUE
#' )
#'
#' rdb_dimensions(
#' provider_code = "IMF", dataset_code = "WEO",
#' curl_config = list(proxy = "<proxy>", proxyport = <port>)
#' )
#' }
#' @seealso \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
#' \code{\link{rdb_datasets}}
#' @author Sebastien Galais
#' @export
rdb_dimensions <- function(
provider_code = NULL, dataset_code = NULL,
use_readLines = getOption("rdbnomics.use_readLines"),
curl_config = getOption("rdbnomics.curl_config"),
simplify = FALSE
) {
# All providers
if (is.null(provider_code) & !is.null(dataset_code)) {
stop(
"If you give datasets codes, please give also a provider code.",
call. = FALSE
)
}
if (is.null(provider_code)) {
provider_code <- rdb_providers(
code = TRUE,
use_readLines = use_readLines, curl_config = curl_config
)
}
check_argument(provider_code, "character", len = FALSE)
if (is.null(dataset_code)) {
dataset_code <- rdb_datasets(
provider_code = provider_code,
use_readLines = use_readLines, curl_config = curl_config,
simplify = FALSE
)
dataset_code <- sapply(dataset_code, `[[`, "code", simplify = FALSE)
} else {
check_argument(dataset_code, "character", len = FALSE)
dataset_code <- list(dataset_code)
dataset_code <- stats::setNames(dataset_code, provider_code)
}
# Checking arguments
check_argument(use_readLines, "logical")
check_argument(simplify, "logical")
# Setting API url
api_base_url <- getOption("rdbnomics.api_base_url")
check_argument(api_base_url, "character")
# Setting API version
api_version <- getOption("rdbnomics.api_version")
check_argument(api_version, c("numeric", "integer"))
authorized_version(api_version)
# Fetching all datasets
dimensions <- sapply(provider_code, function(pc) {
if (getOption("rdbnomics.progress_bar_dimensions")) {
pb <- utils::txtProgressBar(
min = 0, max = length(dataset_code[[pc]]), style = 3
)
}
tmp_dim <- sapply(seq_along(dataset_code[[pc]]), function(i) {
tryCatch({
dc <- dataset_code[[pc]][i]
tmp <- paste0(api_base_url, "/v", api_version, "/datasets/", pc, "/", dc)
tmp <- get_data(tmp, use_readLines, curl_config)
tmp1 <- tmp$datasets$docs$dimensions_labels
if (is.null(tmp1)) {
tmp1 <- try(
tmp$datasets[[paste0(pc, "/", dc)]]$dimensions_labels,
silent = TRUE
)
if (inherits(tmp1, "try-error")) {
tmp1 <- NULL
}
}
if (is.null(tmp1)) {
# Sometimes "dimensions_labels" is missing
tmp1 <- data.table::data.table(A = character(), B = character())
} else {
tmp1 <- as.list(tmp1)
tmp1 <- data.table::data.table(A = unlist(tmp1), B = names(tmp1))
tmp1 <- unique(tmp1)
}
tmp2 <- tmp$datasets$docs$dimensions_values_labels
if (is.null(tmp2)) {
tmp2 <- try(
tmp$datasets[[paste0(pc, "/", dc)]]$dimensions_values_labels,
silent = TRUE
)
if (inherits(tmp2, "try-error")) {
tmp2 <- NULL
}
}
tmp3 <- sapply(names(tmp2), function(nm) {
z <- tmp2[[nm]]
if (
is.list(z) & !is.data.frame(z) &
!data.table::is.data.table(z)
) {
# "z" is actually a list with a matrix
z <- z[[1]]
z <- as.data.table(z)
} else {
if (is.matrix(z)) {
# "z" is actually a matrix
z <- as.data.table(z)
} else {
z <- as.data.table(z)
z <- as.list(z)
z <- data.table::data.table(V1 = names(z), V2 = unlist(z))
}
}
data.table::setnames(z, "V1", nm)
new_name <- tmp1[B == nm]$A
data.table::setnames(
z, "V2", ifelse(length(new_name) <= 0, new_title(nm), new_name)
)
z
}, simplify = FALSE)
if (getOption("rdbnomics.progress_bar_dimensions")) {
utils::setTxtProgressBar(pb, i)
}
tmp3
}, error = function(e) {
NULL
})
}, simplify = FALSE)
if (getOption("rdbnomics.progress_bar_dimensions")) { close(pb) }
tmp_dim <- stats::setNames(tmp_dim, dataset_code[[pc]])
Filter(Negate(is.null), tmp_dim)
}, simplify = FALSE)
dimensions <- Filter(Negate(is.null), dimensions)
# We remove the empty lists, the empty data.tables, etc.
dimensions <- check_dimensions(dimensions, 2)
if (length(dimensions) <= 0) {
warning(
"Error when fetching the dimensions.",
call. = FALSE
)
return(NULL)
}
if (simplify) {
len <- sapply(dimensions, length)
if (length(dimensions) == 1 & len[1] == 1) {
return(dimensions[[1]][[1]])
}
}
dimensions
}
......@@ -33,7 +33,8 @@
#'
#' rdb_last_updates(curl_config = list(proxy = "<proxy>", proxyport = <port>))
#' }
#' @seealso \code{\link{rdb_providers}}
#' @seealso \code{\link{rdb_providers}}, \code{\link{rdb_datasets}},
#' \code{\link{rdb_dimensions}}
#' @author Sebastien Galais
#' @export
rdb_last_updates <- function(
......
......@@ -34,7 +34,8 @@
#'
#' rdb_providers(curl_config = list(proxy = "<proxy>", proxyport = <port>))
#' }
#' @seealso \code{\link{rdb_last_updates}}
#' @seealso \code{\link{rdb_last_updates}}, \code{\link{rdb_datasets}},
#' \code{\link{rdb_dimensions}}
#' @author Sebastien Galais
#' @export
rdb_providers <- function(
......
......@@ -13,7 +13,7 @@ if (getRversion() >= "2.15.1") {
vars <- c(
".", ":=", "value", "dotI", "period", "period_start_day", "series_code",
"filtered", "original_period", "series_name", "original_value",
"period_middle_day"
"period_middle_day", "code", "name", "A", "B", "C", "k", "V1", "V2"
)
utils::globalVariables(unique(vars))
}
\ No newline at end of file
No preview for this file type
......@@ -651,3 +651,145 @@ list_has_curl_handle <- function(x) {
FALSE
}
}
#-------------------------------------------------------------------------------
# unpack
unpack <- function(DT) {
if (is.null(DT)) {
return(NULL)
}
if (is.list(DT) & !data.table::is.data.table(DT)) {
if (length(DT) <= 0) {
return(NULL)
}
}
if (nrow(DT) <= 0) {
return(NULL)
}
data.table::setDT(DT)
DT[, k := .I]
DT <- split(DT, by = "k")
lapply(DT, function(DT_) {
if (!("children" %in% colnames(DT_))) {
DT_[, k := NULL]
DT_[]
} else {
if (is.null(DT_$children[[1]])) {
DT_[, k := NULL]
DT_[, .SD, .SDcols = setdiff(colnames(DT_), "children")]
} else {
lapply(DT_$children, unpack)
}
}
})
}
#-------------------------------------------------------------------------------
# rbindlist_recursive
rbindlist_recursive <- function(l) {
if (is.null(l)) {
return(NULL)
}
if (data.table::is.data.table(l)) {
return(l)
}
if (
sum(sapply(l, is.data.table), na.rm = TRUE) != length(l)
) {
l <- lapply(l, rbindlist_recursive)
}
data.table::rbindlist(l, use.names = TRUE, fill = TRUE)
}
#-------------------------------------------------------------------------------
# check_datasets
check_datasets <- function(l, run = 1) {
if (is.null(l)) {
return(l)
}
if (run == 0) {
return(l)
}
for (i1 in names(l)) {
if (is.null(l[[i1]])) {
l[[i1]] <- NULL
} else {
if (length(l[[i1]]) <= 0) {
l[[i1]] <- NULL
} else {
if (nrow(l[[i1]]) <= 0) {
l[[i1]] <- NULL
}
}
}
}
check_datasets(l, run = run - 1)
}
#-------------------------------------------------------------------------------
# check_dimensions
check_dimensions <- function(l, run = 1) {
if (is.null(l)) {
return(l)
}
if (run == 0) {
return(l)
}
for (i1 in names(l)) {
if (length(l[[i1]]) <= 0) {
l[[i1]] <- NULL
} else {
for (i2 in names(l[[i1]])) {
if (is.null(l[[i1]][[i2]])) {
l[[i1]][[i2]] <- NULL
} else {
if (length(l[[i1]][[i2]]) <= 0) {
l[[i1]][[i2]] <- NULL
}
}
}
}
}
check_dimensions(l, run = run - 1)
}
#-------------------------------------------------------------------------------
# capital_first
capital_first <- function(x) {
if (is.null(x)) {
return(x)
}
if (length(x) <= 0) {
return(x)
}
paste0(
toupper(substr(x, 1, 1)),
tolower(substr(x, 2, nchar(x)))
)
}
#-------------------------------------------------------------------------------
# new_title
new_title <- function(x) {
if (is.null(x)) {
return("unknown")
}
if (length(x) <= 0) {
return("unknown")
}
if (x != capital_first(x)) {
return(capital_first(x))
}
toupper(x)
}
......@@ -38,7 +38,9 @@
rdbnomics.metadata = TRUE,
rdbnomics.filters = NULL,
rdbnomics.progress_bar = TRUE,
rdbnomics.translate_codes = TRUE
rdbnomics.translate_codes = TRUE,
rdbnomics.progress_bar_datasets = FALSE,
rdbnomics.progress_bar_dimensions = FALSE
)
opts <- append(
opts,
......
......@@ -277,6 +277,23 @@ to_xts <- function(
no_code = numeric()
)
}
if (!inherits(x[[1]], "Date")) {
    stop(
      paste0(
        "The first needed column '", needed_columns[1], "' is not of class ",
        "'Date' which is a problem for data.table::as.xts.data.table()."