From 2102b8cb11e73adbfd8724560a8d628d4c9e25e2 Mon Sep 17 00:00:00 2001
From: s915 <s915.stem@gmail.com>
Date: Tue, 14 Apr 2020 00:17:05 +0200
Subject: [PATCH] New function 'rdb_series()'.

---
 NAMESPACE             |   1 +
 R/dot_rdb.R           |   4 +-
 R/rdb_datasets.R      |  24 ++++-
 R/rdb_dimensions.R    |  10 +-
 R/rdb_providers.R     |   2 +-
 R/rdb_series.R        | 244 ++++++++++++++++++++++++++++++++++++++++++
 R/zzz.R               |   3 +-
 man/rdb_datasets.Rd   |   7 +-
 man/rdb_dimensions.Rd |   2 +-
 man/rdb_providers.Rd  |   2 +-
 man/rdb_series.Rd     |  99 +++++++++++++++++
 11 files changed, 383 insertions(+), 15 deletions(-)
 create mode 100644 R/rdb_series.R
 create mode 100644 man/rdb_series.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 62f5dda..64bf2c0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ export(rdb_datasets)
 export(rdb_dimensions)
 export(rdb_last_updates)
 export(rdb_providers)
+export(rdb_series)
 import(curl)
 import(data.table)
 import(jsonlite)
diff --git a/R/dot_rdb.R b/R/dot_rdb.R
index bb162a9..1358993 100644
--- a/R/dot_rdb.R
+++ b/R/dot_rdb.R
@@ -142,7 +142,9 @@
 
     DBdata <- lapply(sequence, function(i) {
       # Modifying link
-      tmp_api_link <- paste0(api_link, sep, "offset=", i * limit)
+      tmp_api_link <- paste0(
+        api_link, sep, "offset=", format(i * limit, scientific = FALSE)
+      )
       # Fetching data
       DBlist <- get_data(tmp_api_link, use_readLines, curl_config)
 
diff --git a/R/rdb_datasets.R b/R/rdb_datasets.R
index e83632f..bd34ccf 100644
--- a/R/rdb_datasets.R
+++ b/R/rdb_datasets.R
@@ -27,6 +27,7 @@
 #' @param simplify Logical (default \code{FALSE}). If \code{TRUE}, when the
 #' datasets are requested for only one provider then a \code{data.table}
 #' is returned, not a list of \code{data.table}s.
+#' @param ... Additionals arguments.
 #' @return A named list of \code{data.table}s or a \code{data.table}.
 #' @examples
 #' \dontrun{
@@ -49,14 +50,15 @@
 #' )
 #' }
 #' @seealso \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
-#' \code{\link{rdb_dimensions}}
+#' \code{\link{rdb_dimensions}}, \code{\link{rdb_series}}
 #' @author Sebastien Galais
 #' @export
 rdb_datasets <- function(
   provider_code = NULL,
   use_readLines = getOption("rdbnomics.use_readLines"),
   curl_config = getOption("rdbnomics.curl_config"),
-  simplify = FALSE
+  simplify = FALSE,
+  ...
 ) {
   # TO CHECK THE DATASETS
   # parse_web <- sapply(provider_code, function(x) {
@@ -66,6 +68,16 @@ rdb_datasets <- function(
   #   gsub("\".*", "", y)
   # }, simplify = FALSE)
 
+  # Additionals arguments
+  progress_bar = TRUE
+  if (length(list(...)) > 0) {
+    tmp_progress_bar = list(...)$progress_bar
+    if (!is.null(tmp_progress_bar)) {
+      progress_bar <- tmp_progress_bar
+    }
+  }
+  check_argument(progress_bar, "logical")
+
   # All providers
   if (is.null(provider_code)) {
     provider_code <- rdb_providers(
@@ -89,7 +101,7 @@ rdb_datasets <- function(
   authorized_version(api_version)
 
   # Fetching all datasets
-  if (getOption("rdbnomics.progress_bar_datasets")) {
+  if (getOption("rdbnomics.progress_bar_datasets") & progress_bar) {
     pb <- utils::txtProgressBar(min = 0, max = length(provider_code), style = 3)
   }
 
@@ -105,7 +117,7 @@ rdb_datasets <- function(
       tmp <- tmp[, .(code, name)]
       tmp <- unique(tmp)
 
-      if (getOption("rdbnomics.progress_bar_datasets")) {
+      if (getOption("rdbnomics.progress_bar_datasets") & progress_bar) {
         utils::setTxtProgressBar(pb, i)
       }
 
@@ -115,7 +127,9 @@ rdb_datasets <- function(
     })
   }, simplify = FALSE)
 
-  if (getOption("rdbnomics.progress_bar_datasets")) { close(pb) }
+  if (getOption("rdbnomics.progress_bar_datasets") & progress_bar) {
+    close(pb)
+  }
 
   datasets <- stats::setNames(datasets, provider_code)
   datasets <- Filter(Negate(is.null), datasets)
diff --git a/R/rdb_dimensions.R b/R/rdb_dimensions.R
index 3329743..385347d 100644
--- a/R/rdb_dimensions.R
+++ b/R/rdb_dimensions.R
@@ -60,7 +60,7 @@
 #' )
 #' }
 #' @seealso \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
-#' \code{\link{rdb_datasets}}
+#' \code{\link{rdb_datasets}}, \code{\link{rdb_series}}
 #' @author Sebastien Galais
 #' @export
 rdb_dimensions <- function(
@@ -73,8 +73,12 @@ rdb_dimensions <- function(
   # Additionals arguments
   progress_bar = TRUE
   if (length(list(...)) > 0) {
-    progress_bar = list(...)$progress_bar
+    tmp_progress_bar = list(...)$progress_bar
+    if (!is.null(tmp_progress_bar)) {
+      progress_bar <- tmp_progress_bar
+    }
   }
+  check_argument(progress_bar, "logical")
 
   # All providers
   if (is.null(provider_code) & !is.null(dataset_code)) {
@@ -96,7 +100,7 @@ rdb_dimensions <- function(
     dataset_code <- rdb_datasets(
       provider_code = provider_code,
       use_readLines = use_readLines, curl_config = curl_config,
-      simplify = FALSE
+      simplify = FALSE, progress_bar = FALSE
     )
     dataset_code <- sapply(dataset_code, `[[`, "code", simplify = FALSE)
   } else {
diff --git a/R/rdb_providers.R b/R/rdb_providers.R
index c5d173a..fc4bff4 100644
--- a/R/rdb_providers.R
+++ b/R/rdb_providers.R
@@ -35,7 +35,7 @@
 #' rdb_providers(curl_config = list(proxy = "<proxy>", proxyport = <port>))
 #' }
 #' @seealso \code{\link{rdb_last_updates}}, \code{\link{rdb_datasets}},
-#' \code{\link{rdb_dimensions}}
+#' \code{\link{rdb_dimensions}}, \code{\link{rdb_series}}
 #' @author Sebastien Galais
 #' @export
 rdb_providers <- function(
diff --git a/R/rdb_series.R b/R/rdb_series.R
new file mode 100644
index 0000000..4732a3b
--- /dev/null
+++ b/R/rdb_series.R
@@ -0,0 +1,244 @@
+#' Download list of series for datasets of DBnomics providers.
+#'
+#' \code{rdb_series} downloads the list of series for
+#' available datasets of a selection of providers from
+#' \href{https://db.nomics.world/}{DBnomics}. \cr
+#' /!\ We warn the user that this function can be (very) long to execute. We remind
+#' that DBnomics requests data from 63 providers to retrieve 21675 datasets for
+#' a total of approximately 720 millions series.
+#'
+#' By default, the function returns a nested named list of \code{data.table}s
+#' containing the dimensions of datasets for providers from
+#' \href{https://db.nomics.world/}{DBnomics}.
+#' 
+#' @param provider_code Character string (default \code{NULL}). DBnomics code
+#' of one or multiple providers. If \code{NULL}, the providers are firstly
+#' dowloaded with the function \code{\link{rdb_providers}} and then the
+#' datasets are requested.
+#' @param dataset_code Character string (default \code{NULL}). DBnomics code
+#' of one or multiple datasets of a provider. If \code{NULL}, the datasets
+#' codes are dowloaded with the function \code{\link{rdb_datasets}} and then
+#' the dimensions are requested.
+#' @param use_readLines Logical (default \code{FALSE}). If \code{TRUE}, then
+#' the data are requested and read with the base function \code{readLines} i.e.
+#' through the default R internet connection. This can be used to get round the
+#' error \code{Could not resolve host: api.db.nomics.world}.
+#' @param curl_config Named list (default \code{NULL}). If not
+#' \code{NULL}, it is used to configure a proxy connection. This
+#' configuration is passed to the function \code{curl_fetch_memory} of the package
+#' \pkg{curl}. A temporary \code{curl_handle} object is created internally
+#' with arguments equal to the provided list in \code{curl_config}.\cr
+#' For \code{curl_fetch_memory} arguments see \code{\link[curl]{curl_fetch}}.
+#' For available curl options see \code{\link[curl]{curl_options}},
+#' \code{names(curl_options())} and
+#' \href{https://curl.haxx.se/libcurl/c/curl_easy_setopt.html}{libcurl}.
+#' @param simplify Logical (default \code{FALSE}). If \code{TRUE}, when the
+#' dimensions are requested for only one provider and one dataset then a
+#' named list of \code{data.table}s is returned, not a nested named list of
+#' \code{data.table}s.
+#' @param verbose Logical (default \code{FALSE}). Show number of series per
+#' datasets and providers.
+#' @param ... Additionals arguments.
+#' @return A nested named list of \code{data.table}s or a named list of
+#' \code{data.table}s.
+#' @examples
+#' \dontrun{
+#' rdb_series(provider_code = "IMF", dataset_code = "WEO")
+#' 
+#' rdb_series(provider_code = "IMF", dataset_code = "WEO", simplify = TRUE)
+#' 
+#' rdb_series(provider_code = "IMF", verbose = TRUE)
+#' 
+#' options(rdbnomics.progress_bar_series = TRUE)
+#' rdb_series(provider_code = "IMF", dataset_code = "WEO")
+#' options(rdbnomics.progress_bar_series = FALSE)
+#' 
+#' rdb_series(
+#'   provider_code = "IMF", dataset_code = "WEO",
+#'   use_readLines = TRUE
+#' )
+#' 
+#' rdb_series(
+#'   provider_code = "IMF", dataset_code = "WEO",
+#'   curl_config = list(proxy = "<proxy>", proxyport = <port>)
+#' )
+#' }
+#' @seealso \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
+#' \code{\link{rdb_datasets}}, \code{\link{rdb_dimensions}}
+#' @author Sebastien Galais
+#' @export
+rdb_series <- function(
+  provider_code = NULL, dataset_code = NULL,
+  use_readLines = getOption("rdbnomics.use_readLines"),
+  curl_config = getOption("rdbnomics.curl_config"),
+  simplify = FALSE, verbose = FALSE,
+  ...
+) {
+  # Additionals arguments
+  progress_bar = TRUE
+  if (length(list(...)) > 0) {
+    tmp_progress_bar = list(...)$progress_bar
+    if (!is.null(tmp_progress_bar)) {
+      progress_bar <- tmp_progress_bar
+    }
+  }
+  check_argument(progress_bar, "logical")
+
+  only_first_two = FALSE
+  if (length(list(...)) > 0) {
+    tmp_only_first_two = list(...)$only_first_two
+    if (!is.null(tmp_only_first_two)) {
+      only_first_two <- tmp_only_first_two
+    }
+  }
+  check_argument(only_first_two, "logical")
+
+  # All providers
+  if (is.null(provider_code)) {
+    provider_code <- rdb_providers(
+      code = TRUE,
+      use_readLines = use_readLines, curl_config = curl_config
+    )
+  }
+  check_argument(provider_code, "character", len = FALSE)
+
+  if (is.null(dataset_code)) {
+    dataset_code <- rdb_datasets(
+      provider_code = provider_code,
+      use_readLines = use_readLines, curl_config = curl_config,
+      simplify = FALSE, progress_bar = FALSE
+    )
+    dataset_code <- sapply(dataset_code, `[[`, "code", simplify = FALSE)
+  } else {
+    check_argument(dataset_code, "character", len = FALSE)
+    dataset_code <- list(dataset_code)
+    dataset_code <- stats::setNames(dataset_code, provider_code)
+  }
+
+  # Checking arguments
+  check_argument(use_readLines, "logical")
+  check_argument(simplify, "logical")
+
+  # Setting API url
+  api_base_url <- getOption("rdbnomics.api_base_url")
+  check_argument(api_base_url, "character")
+
+  # Setting API version
+  api_version <- getOption("rdbnomics.api_version")
+  check_argument(api_version, c("numeric", "integer"))
+  authorized_version(api_version)
+
+  # Fetching all datasets
+  series <- sapply(provider_code, function(pc) {
+    if (getOption("rdbnomics.progress_bar_series") & progress_bar) {
+      pb <- utils::txtProgressBar(
+        min = 0, max = length(dataset_code[[pc]]), style = 3
+      )
+    }
+    
+    tmp_ser <- sapply(seq_along(dataset_code[[pc]]), function(i) {
+      tryCatch({
+        dc <- dataset_code[[pc]][i]
+
+        api_link <- paste0(api_base_url, "/v", api_version, "/series/", pc, "/", dc)
+        DBlist <- get_data(api_link, use_readLines, curl_config)
+
+        limit <- DBlist$series$limit
+        num_found <- DBlist$series$num_found
+
+        if (verbose) {
+          if (getOption("rdbnomics.progress_bar_series") & progress_bar) {
+            cat("\n")
+          }
+          cat(
+            paste0(
+              "The dataset '", dc, "' from provider '", pc, "' contains ",
+              num_found, " series."
+            ),
+            "\n"
+          )
+        }
+
+        DBdata <- list(
+          data.table::data.table(
+            series_code = DBlist$series$docs$series_code,
+            series_name = DBlist$series$docs$series_name
+          )
+        )
+
+        if (num_found > limit) {
+          DBdata0 <- DBdata
+          rm(DBdata)
+
+          sequence <- seq(1, floor(num_found / limit), 1)
+
+          # Modifying link
+          if (grepl("offset=", api_link)) {
+            api_link <- gsub("\\&offset=[0-9]+", "", api_link)
+            api_link <- gsub("\\?offset=[0-9]+", "", api_link)
+          }
+          sep <- ifelse(grepl("\\?", api_link), "&", "?")
+
+          if (only_first_two) {
+            sequence <- utils::head(sequence, 1)
+          }
+
+          DBdata <- lapply(sequence, function(j) {
+            # Modifying link
+            tmp_api_link <- paste0(
+              api_link, sep, "offset=", format(j * limit, scientific = FALSE)
+            )
+            # Fetching data
+            DBlist <- get_data(tmp_api_link, use_readLines, curl_config)
+
+            # Extracting data
+            data.table::data.table(
+              series_code = DBlist$series$docs$series_code,
+              series_name = DBlist$series$docs$series_name
+            )
+          })
+
+          DBdata <- append(DBdata, DBdata0, 0)
+          rm(DBdata0)
+        }
+
+        DBdata <- rbindlist(DBdata, use.names = TRUE, fill = TRUE)
+
+        if (getOption("rdbnomics.progress_bar_series") & progress_bar) {
+          utils::setTxtProgressBar(pb, i)
+        }
+
+        DBdata
+      }, error = function(e) {
+        NULL
+      })
+    }, simplify = FALSE)
+
+    if (getOption("rdbnomics.progress_bar_series") & progress_bar) {
+      close(pb)
+    }
+
+    tmp_ser <- stats::setNames(tmp_ser, dataset_code[[pc]])
+    Filter(Negate(is.null), tmp_ser)
+  }, simplify = FALSE)
+  series <- Filter(Negate(is.null), series)
+  # We remove the empty lists, the empty data.tables, etc.
+  series <- check_dimensions(series, 2)
+
+  if (length(series) <= 0) {
+    warning(
+      "Error when fetching the series.",
+      call. = FALSE
+    )
+    return(NULL)
+  }
+
+  if (simplify) {
+    len <- sapply(series, length)
+    if (length(series) == 1 & len[1] == 1) {
+      return(series[[1]][[1]])
+    }
+  }
+
+  series
+}
diff --git a/R/zzz.R b/R/zzz.R
index f8441a0..344c066 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -40,7 +40,8 @@
     rdbnomics.progress_bar = TRUE,
     rdbnomics.translate_codes = TRUE,
     rdbnomics.progress_bar_datasets = FALSE,
-    rdbnomics.progress_bar_dimensions = FALSE
+    rdbnomics.progress_bar_dimensions = FALSE,
+    rdbnomics.progress_bar_series = FALSE
   )
   opts <- append(
     opts,
diff --git a/man/rdb_datasets.Rd b/man/rdb_datasets.Rd
index dbba6a1..3a55a07 100644
--- a/man/rdb_datasets.Rd
+++ b/man/rdb_datasets.Rd
@@ -8,7 +8,8 @@ rdb_datasets(
   provider_code = NULL,
   use_readLines = getOption("rdbnomics.use_readLines"),
   curl_config = getOption("rdbnomics.curl_config"),
-  simplify = FALSE
+  simplify = FALSE,
+  ...
 )
 }
 \arguments{
@@ -35,6 +36,8 @@ For available curl options see \code{\link[curl]{curl_options}},
 \item{simplify}{Logical (default \code{FALSE}). If \code{TRUE}, when the
 datasets are requested for only one provider then a \code{data.table}
 is returned, not a list of \code{data.table}s.}
+
+\item{...}{Additionals arguments.}
 }
 \value{
 A named list of \code{data.table}s or a \code{data.table}.
@@ -71,7 +74,7 @@ rdb_datasets(
 }
 \seealso{
 \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
-\code{\link{rdb_dimensions}}
+\code{\link{rdb_dimensions}}, \code{\link{rdb_series}}
 }
 \author{
 Sebastien Galais
diff --git a/man/rdb_dimensions.Rd b/man/rdb_dimensions.Rd
index babe7d8..538be4c 100644
--- a/man/rdb_dimensions.Rd
+++ b/man/rdb_dimensions.Rd
@@ -86,7 +86,7 @@ rdb_dimensions(
 }
 \seealso{
 \code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
-\code{\link{rdb_datasets}}
+\code{\link{rdb_datasets}}, \code{\link{rdb_series}}
 }
 \author{
 Sebastien Galais
diff --git a/man/rdb_providers.Rd b/man/rdb_providers.Rd
index dc4c198..35d2e8c 100644
--- a/man/rdb_providers.Rd
+++ b/man/rdb_providers.Rd
@@ -55,7 +55,7 @@ rdb_providers(curl_config = list(proxy = "<proxy>", proxyport = <port>))
 }
 \seealso{
 \code{\link{rdb_last_updates}}, \code{\link{rdb_datasets}},
-\code{\link{rdb_dimensions}}
+\code{\link{rdb_dimensions}}, \code{\link{rdb_series}}
 }
 \author{
 Sebastien Galais
diff --git a/man/rdb_series.Rd b/man/rdb_series.Rd
new file mode 100644
index 0000000..3a4d8fa
--- /dev/null
+++ b/man/rdb_series.Rd
@@ -0,0 +1,99 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rdb_series.R
+\name{rdb_series}
+\alias{rdb_series}
+\title{Download list of series for datasets of DBnomics providers.}
+\usage{
+rdb_series(
+  provider_code = NULL,
+  dataset_code = NULL,
+  use_readLines = getOption("rdbnomics.use_readLines"),
+  curl_config = getOption("rdbnomics.curl_config"),
+  simplify = FALSE,
+  verbose = FALSE,
+  ...
+)
+}
+\arguments{
+\item{provider_code}{Character string (default \code{NULL}). DBnomics code
+of one or multiple providers. If \code{NULL}, the providers are firstly
+dowloaded with the function \code{\link{rdb_providers}} and then the
+datasets are requested.}
+
+\item{dataset_code}{Character string (default \code{NULL}). DBnomics code
+of one or multiple datasets of a provider. If \code{NULL}, the datasets
+codes are dowloaded with the function \code{\link{rdb_datasets}} and then
+the dimensions are requested.}
+
+\item{use_readLines}{Logical (default \code{FALSE}). If \code{TRUE}, then
+the data are requested and read with the base function \code{readLines} i.e.
+through the default R internet connection. This can be used to get round the
+error \code{Could not resolve host: api.db.nomics.world}.}
+
+\item{curl_config}{Named list (default \code{NULL}). If not
+\code{NULL}, it is used to configure a proxy connection. This
+configuration is passed to the function \code{curl_fetch_memory} of the package
+\pkg{curl}. A temporary \code{curl_handle} object is created internally
+with arguments equal to the provided list in \code{curl_config}.\cr
+For \code{curl_fetch_memory} arguments see \code{\link[curl]{curl_fetch}}.
+For available curl options see \code{\link[curl]{curl_options}},
+\code{names(curl_options())} and
+\href{https://curl.haxx.se/libcurl/c/curl_easy_setopt.html}{libcurl}.}
+
+\item{simplify}{Logical (default \code{FALSE}). If \code{TRUE}, when the
+dimensions are requested for only one provider and one dataset then a
+named list of \code{data.table}s is returned, not a nested named list of
+\code{data.table}s.}
+
+\item{verbose}{Logical (default \code{FALSE}). Show number of series per
+datasets and providers.}
+
+\item{...}{Additionals arguments.}
+}
+\value{
+A nested named list of \code{data.table}s or a named list of
+\code{data.table}s.
+}
+\description{
+\code{rdb_series} downloads the list of series for
+available datasets of a selection of providers from
+\href{https://db.nomics.world/}{DBnomics}. \cr
+/!\ We warn the user that this function can be (very) long to execute. We remind
+that DBnomics requests data from 63 providers to retrieve 21675 datasets for
+a total of approximately 720 millions series.
+}
+\details{
+By default, the function returns a nested named list of \code{data.table}s
+containing the dimensions of datasets for providers from
+\href{https://db.nomics.world/}{DBnomics}.
+}
+\examples{
+\dontrun{
+rdb_series(provider_code = "IMF", dataset_code = "WEO")
+
+rdb_series(provider_code = "IMF", dataset_code = "WEO", simplify = TRUE)
+
+rdb_series(provider_code = "IMF", verbose = TRUE)
+
+options(rdbnomics.progress_bar_series = TRUE)
+rdb_series(provider_code = "IMF", dataset_code = "WEO")
+options(rdbnomics.progress_bar_series = FALSE)
+
+rdb_series(
+  provider_code = "IMF", dataset_code = "WEO",
+  use_readLines = TRUE
+)
+
+rdb_series(
+  provider_code = "IMF", dataset_code = "WEO",
+  curl_config = list(proxy = "<proxy>", proxyport = <port>)
+)
+}
+}
+\seealso{
+\code{\link{rdb_providers}}, \code{\link{rdb_last_updates}},
+\code{\link{rdb_datasets}}, \code{\link{rdb_dimensions}}
+}
+\author{
+Sebastien Galais
+}
-- 
GitLab