Commit ba376523 authored by Mateo CAICEDO's avatar Mateo CAICEDO

Code improvement and harmonization in the light of db.nomics changes in terms...

Code improvement and harmonization in the light of db.nomics changes in terms of variable types and names.
parent 71140004
......@@ -9,7 +9,6 @@ Summary: Description step by step to build automatic update of the Christiano, M
Download: https://git.nomics.world/macro/cmr14-EA-data
output: html_document
---
```{r, message=FALSE, warning=FALSE, echo=FALSE, results='hide'}
if (!"pacman" %in% installed.packages()[,"Package"]) install.packages("pacman")
pacman::p_load(tidyverse,magrittr,lubridate,knitr,rsdmx,zoo,devtools)
......@@ -57,7 +56,7 @@ We take data directly from <a href="https://db.nomics.world/" target="_blank">DB
# Loans to non-financial corporations and to households
We download loans series from the Bank of International Settlements.
We download loans series from the Bank for International Settlements.
```{r}
# List of available countries in BIS data
EAtot_code <- c("DE", "FI", "FR", "IT", "PT", "AT",
......@@ -75,16 +74,11 @@ df <- rdb("BIS","CNFS",mask = filter)
loans <- df %>%
select(period, series_code, value, BORROWERS_CTY, series_name) %>%
rename(time = period,
var = series_code,
values = value,
country = BORROWERS_CTY) %>%
mutate(time=as.Date(as.yearqtr(gsub("Q","",time))),
values=ifelse(values=="NA",NA,values),
values = as.numeric(values)) %>%
rename(var = series_code,
country = BORROWERS_CTY) %>%
na.omit() %>%
filter(year(time)>=1980) %>%
arrange(var, time)
filter(year(period)>=1980) %>%
arrange(var, period)
loans_nfc <- loans %>%
filter(substr(var,6,6)=="N") %>%
......@@ -105,20 +99,20 @@ We can check the first date available for loans to non-financial corporations an
```{r}
loans_nfc %>%
group_by(country) %>%
summarize(firstdate = min(time)) %>%
summarize(firstdate = min(period)) %>%
arrange(firstdate) %>%
ungroup()
loans_hh %>%
group_by(country) %>%
summarize(firstdate = min(time)) %>%
summarize(firstdate = min(period)) %>%
arrange(firstdate) %>%
ungroup()
```
We decide to retain only countries which are available before 1990 to compute the aggregated series.
```{r, fig.align="center", fig.height=7.5, fig.width=8}
available_country <- filter(loans_nfc,time=="1990-10-01")$country
available_country <- filter(loans_nfc,period=="1990-10-01")$country
loans_nfc_countries <-
loans_nfc %>%
......@@ -126,10 +120,10 @@ loans_nfc_countries <-
loans_nfc_EA <-
loans_nfc %>%
filter(country=="XM",
time>="1999-01-01") %>%
period>="1999-01-01") %>%
mutate(country = "EA")
ggplot(bind_rows(loans_nfc_countries,loans_nfc_EA),aes(time,values))+
ggplot(bind_rows(loans_nfc_countries,loans_nfc_EA),aes(period,value))+
geom_line(colour=blueObsMacro)+
facet_wrap(~country,ncol=3,scales = "free_y")+
theme + xlab(NULL) + ylab(NULL)+
......@@ -142,35 +136,36 @@ loans_hh_EA <- loans_hh %>%
filter(country=="XM") %>%
mutate(country = "EA")
ggplot(bind_rows(loans_hh_countries,loans_hh_EA),aes(time,values))+
ggplot(bind_rows(loans_hh_countries,loans_hh_EA),aes(period,value))+
geom_line(colour=blueObsMacro)+
facet_wrap(~country,ncol=3,scales = "free_y")+
theme + xlab(NULL) + ylab(NULL)+
ggtitle("Loans to Households and NPISHs (billions of euro)")
```
```{r chain, echo = FALSE}
chain <- function(to_rebase, basis, date_chain) {
date_chain <- as.Date(date_chain, "%Y-%m-%d")
valref <- basis %>%
filter(time == date_chain) %>%
transmute(var, values_ref = values)
filter(period == date_chain) %>%
transmute(var, values_ref = value)
res <- to_rebase %>%
filter(time <= date_chain) %>%
arrange(desc(time)) %>%
filter(period <= date_chain) %>%
arrange(desc(period)) %>%
group_by(var) %>%
mutate(growth_rate = c(1, values[-1]/lag(values)[-1])) %>%
mutate(growth_rate = c(1, value[-1]/lag(value)[-1])) %>%
full_join(valref, by = "var") %>%
ungroup() %>%
transmute(var,time, values = cumprod(growth_rate)*values_ref)
transmute(var,period, value = cumprod(growth_rate)*values_ref)
res %<>%
bind_rows(filter(basis, time > date_chain)) %>%
arrange(time)
bind_rows(filter(basis, period > date_chain)) %>%
arrange(period)
return(res)
......@@ -184,20 +179,20 @@ loans_nfc_countries %<>%
mutate(var=country)
loans_nfc_sumAll <-
loans_nfc_countries %>%
group_by(time) %>%
summarize(values=sum(values)) %>%
group_by(period) %>%
summarize(value=sum(value)) %>%
mutate(var="sum")
loans_nfc_sumNoNL <-
loans_nfc_countries %>%
filter(! var == "NL") %>%
group_by(time) %>%
summarize(values=sum(values)) %>%
group_by(period) %>%
summarize(value=sum(value)) %>%
mutate(var="sum")
loans_nfc_sumNoNLESBE <-
loans_nfc_countries %>%
filter(! var %in% c("NL","ES","BE")) %>%
group_by(time) %>%
summarize(values=sum(values)) %>%
group_by(period) %>%
summarize(value=sum(value)) %>%
mutate(var="sum")
loans_nfc_chainedNL <-
......@@ -212,7 +207,7 @@ loans_nfc_chained <-
loans_nfc_EA %<>% select(-country) %>% mutate(var="EA")
ggplot(bind_rows(loans_nfc_sumAll, loans_nfc_EA, loans_nfc_chained), aes(time, values,colour=var))+
ggplot(bind_rows(loans_nfc_sumAll, loans_nfc_EA, loans_nfc_chained), aes(period, value,colour=var))+
geom_line()+
scale_x_date(expand = c(0.01,0.01)) +
theme + xlab(NULL) + ylab(NULL)+
......@@ -231,20 +226,20 @@ loans_hh_countries %<>%
mutate(var=country)
loans_hh_sumAll <-
loans_hh_countries %>%
group_by(time) %>%
summarize(values=sum(values)) %>%
group_by(period) %>%
summarize(value=sum(value)) %>%
mutate(var="sum")
loans_hh_sumNoNL <-
loans_hh_countries %>%
filter(! var == "NL") %>%
group_by(time) %>%
summarize(values=sum(values)) %>%
group_by(period) %>%
summarize(value=sum(value)) %>%
mutate(var="sum")
loans_hh_sumNoNLESBE <-
loans_hh_countries %>%
filter(! var %in% c("NL","ES","BE")) %>%
group_by(time) %>%
summarize(values=sum(values)) %>%
group_by(period) %>%
summarize(value=sum(value)) %>%
mutate(var="sum")
loans_hh_chainedNL <-
......@@ -259,7 +254,7 @@ loans_hh_chained <-
loans_hh_EA %<>% select(-country) %>% mutate(var="EA")
ggplot(bind_rows(loans_hh_sumAll, loans_hh_EA, loans_hh_chained), aes(time, values,colour=var))+
ggplot(bind_rows(loans_hh_sumAll, loans_hh_EA, loans_hh_chained), aes(period, value,colour=var))+
geom_line()+
scale_x_date(expand = c(0.01,0.01)) +
theme + xlab(NULL) + ylab(NULL)+
......@@ -301,12 +296,8 @@ df <- rdb("IMF","IFS",mask = filter)
lendingrate_bycountry <- df %>%
select(REF_AREA,period, value) %>%
rename(time = period,
values = value,
country = REF_AREA) %>%
mutate(values = as.numeric(values),
time=as.Date(as.yearqtr(gsub("Q","",time)))) %>%
filter(year(time)>=1985)
rename(country = REF_AREA) %>%
filter(year(period)>=1985)
# Download the 8 countries' PPP GDP from WEO
country_iso <- c("BEL","FRA", "DEU", "ITA", "NLD", "FIN", "IRL", "ESP")
......@@ -334,13 +325,13 @@ sum_pppgdp <- sum(pppgdp$values_pppgdp)
# Merge databases and build a weighted mean
lendingrate_old <-
left_join(lendingrate_bycountry, pppgdp, by = "country") %>%
transmute(time = time,
transmute(period = period,
country = country,
values = values * values_pppgdp) %>%
group_by(time) %>%
summarise(values = sum(values) / sum_pppgdp) %>%
value = value * values_pppgdp) %>%
group_by(period) %>%
summarise(value = sum(value) / sum_pppgdp) %>%
mutate(var="lendingrate") %>%
filter(year(time)<=2002)
filter(year(period)<=2002)
```
......@@ -355,18 +346,16 @@ varname <- unique(as.character(df$series_name))
lendingrate_recent <- df %>%
mutate(year = substr(period,1,4),
month = substr(period,6,7),
time= as.Date(as.yearqtr(paste0(year,"-",month,"-","01"),format="%Y-%m-%d"))) %>%
select(time, value) %>%
rename(values= value) %>%
mutate(values= as.numeric(values)) %>%
group_by(time) %>%
summarize(values=mean(values)) %>%
period= as.Date(as.yearqtr(paste0(year,"-",month,"-","01"),format="%Y-%m-%d"))) %>%
select(period, value) %>%
group_by(period) %>%
summarize(value=mean(value)) %>%
mutate(var= "lendingrate")
dataplot <- bind_rows(data.frame(lendingrate_recent,ind="recent"),
data.frame(lendingrate_old,ind="old"))
ggplot(dataplot,aes(time,values, colour=ind)) +
ggplot(dataplot,aes(period,value, colour=ind)) +
geom_line() +
scale_x_date(expand = c(0.01,0.01)) +
theme + xlab(NULL) + ylab(NULL)+
......@@ -378,6 +367,7 @@ lendingrate <- chain(basis = lendingrate_recent,
date_chain = "2000-01-01")
```
More precisely, the recent bank lending rates come from the ECB and are described as:
```{r}
varname
......@@ -401,9 +391,9 @@ awm <- read.csv("awm19up15.csv", sep=",")
longrate_old <-
awm %>%
transmute(longrate = LTN, # Long-Term Interest Rate (Nominal)
time = as.Date(as.yearqtr(X))) %>%
gather(var, values, -time, convert = TRUE) %>%
filter(year(time)>=1980)
period = as.Date(as.yearqtr(X))) %>%
gather(var, value, -period, convert = TRUE) %>%
filter(year(period)>=1980)
# Long term interest rate Euro area 19 (fixed composition), Long-term interest rate for convergence purposes - Unspecified rate type, Debt security issued, 10 years maturity, New business coverage, denominated in Euro - Unspecified counterpart sector
......@@ -414,19 +404,17 @@ varname <- unique(as.character(df$series_name))
longrate_recent <- df %>%
mutate(year = substr(period,1,4),
month = substr(period,6,7),
time= as.Date(as.yearqtr(paste0(year,"-",month,"-","01"),format="%Y-%m-%d"))) %>%
select(time, value) %>%
rename(values= value) %>%
mutate(values= as.numeric(values)) %>%
select(values, time) %>%
group_by(time) %>%
summarize(values=mean(values)) %>%
period= as.Date(as.yearqtr(paste0(year,"-",month,"-","01"),format="%Y-%m-%d"))) %>%
select(period, value) %>%
select(value, period) %>%
group_by(period) %>%
summarize(value=mean(value)) %>%
mutate(var= "longrate")
dataplot <- bind_rows(data.frame(longrate_recent,ind="recent"),
data.frame(longrate_old,ind="old"))
ggplot(dataplot,aes(time,values, colour=ind)) +
ggplot(dataplot,aes(period,value, colour=ind)) +
geom_line() +
scale_x_date(expand = c(0.01,0.01)) +
theme + xlab(NULL) + ylab(NULL)+
......@@ -456,13 +444,9 @@ varname <- unique(as.character(df$series_name))
networth <- df %>%
select(value, period) %>%
rename(time= period,
values= value) %>%
mutate(time=as.Date(as.yearqtr(gsub("Q","",time))),
values= as.numeric(values),
var = as.factor("networth"))
mutate(var = as.factor("networth"))
ggplot(networth,aes(time,values)) +
ggplot(networth,aes(period,value)) +
geom_line(colour=blueObsMacro) +
scale_x_date(expand = c(0.01,0.01)) +
theme + xlab(NULL) + ylab(NULL)+
......@@ -486,13 +470,9 @@ varname <- unique(as.character(df$series_name))
houseprice<- df %>%
select(value, period) %>%
rename(time= period,
values= value) %>%
mutate(time=as.Date(as.yearqtr(gsub("Q","",time))),
values= as.numeric(values),
var = as.factor("houseprice"))
mutate(var = as.factor("houseprice"))
ggplot(houseprice,aes(time,values)) +
ggplot(houseprice,aes(period,value)) +
geom_line(colour=blueObsMacro) +
scale_x_date(expand = c(0.01,0.01)) +
theme + xlab(NULL) + ylab(NULL)+
......@@ -518,14 +498,14 @@ EA_Finance_data <- bind_rows(loans_nfc,
networth,
houseprice)
EA_Finance_data %<>%
mutate(time=gsub(" ", "", as.yearqtr(time)))
mutate(period=gsub(" ", "", as.yearqtr(period)))
```
We can check the last date available for each variable.
```{r}
maxDate <- EA_Finance_data %>%
group_by(var) %>%
summarize(maxdate=max(time)) %>%
summarize(maxdate=max(period)) %>%
arrange(maxdate)
maxDate
```
......@@ -533,15 +513,16 @@ maxDate
```{r}
minmaxDateFinance <- min(as.yearqtr(maxDate$maxdate))
EA_Finance_data %<>%
filter(time <= minmaxDateFinance)
filter(period <= minmaxDateFinance)
```
So we filter the database until `r as.yearqtr(minmaxDateFinance)`.
```{r, fig.align="center"}
plot_df <- EA_Finance_data %>%
mutate(time = as.Date(as.yearqtr(time)))
mutate(period = as.Date(as.yearqtr(period)))
listVar <- list("Loans to NFC" = "loans_nfc",
"Loans to households" = "loans_hh",
"Bank lending rate" = "lendingrate",
......@@ -552,7 +533,7 @@ listVar <- list("Loans to NFC" = "loans_nfc",
plot_df$var <- factor(plot_df$var)
levels(plot_df$var)<-listVar
ggplot(plot_df,aes(time,values))+
ggplot(plot_df,aes(period,value))+
geom_line(colour=blueObsMacro)+
facet_wrap(~var,scales = "free_y",ncol = 3)+
scale_x_date(expand = c(0.01,0.01)) +
......@@ -565,7 +546,7 @@ You can download the 6 financial series directly as csv <a href="http://shiny.no
```{r}
EA_Finance_rawdata <-
EA_Finance_data %>%
spread(key = var, value = values)
spread(key = var, value = value)
EA_Finance_rawdata %>%
write.csv("EA_Finance_rawdata.csv", row.names=FALSE)
......@@ -580,18 +561,18 @@ We eventually want to build a database similar to the [@Chri14a] database, but f
# Import EA_SW_rawadata.csv in wide format
EA_SW_rawdata <-
read.csv("http://shiny.nomics.world/data/EA_SW_rawdata.csv") %>%
mutate(time = gsub(" ","",as.yearqtr(time)))
minmaxDateRaw <- max(as.yearqtr(EA_SW_rawdata$time))
mutate(period = gsub(" ","",as.yearqtr(period)))
minmaxDateRaw <- max(as.yearqtr(EA_SW_rawdata$period))
# Transform EA_SW_rawdata in long format to bind with EA_Finance_data
EA_CMR_rawdata <-
EA_SW_rawdata %>%
gather(var, values, -time) %>%
gather(var, value, -period) %>%
bind_rows(EA_Finance_data) %>%
filter(#time <= min(minmaxDateRaw,minmaxDateFinance),
time >= "1980Q1") %>%
spread(key = var, value = values)
period >= "1980Q1") %>%
spread(key = var, value = value)
EA_CMR_rawdata %>%
write.csv("EA_CMR_rawdata.csv", row.names=FALSE)
......@@ -601,7 +582,7 @@ Then data are normalized by capita and price if needed. Eventually we have 14 se
```{r}
EA_CMR_data <-
EA_CMR_rawdata %>%
transmute(time=time,
transmute(period=period,
gdp_rpc=1e+6*gdp/(pop*1000),
conso_rpc=1e+6*conso/(pop*1000),
inves_rpc=1e+6*inves/(pop*1000),
......@@ -643,12 +624,12 @@ listVar <- list("Real GDP per capita" = "gdp_rpc",
```{r, fig.align="center", fig.height=8.5, fig.width=8}
plot_EA_CMR_data <- EA_CMR_data %>%
gather(var, values, - time)
plot_EA_CMR_data$time <- as.Date(as.yearqtr(plot_EA_CMR_data$time))
gather(var, value, - period)
plot_EA_CMR_data$period <- as.Date(as.yearqtr(plot_EA_CMR_data$period))
plot_EA_CMR_data$var <- as.factor(plot_EA_CMR_data$var)
levels(plot_EA_CMR_data$var)<-listVar
p <- ggplot(plot_EA_CMR_data,aes(time,values))+
p <- ggplot(plot_EA_CMR_data,aes(period,value))+
geom_line(colour=blueObsMacro)+
facet_wrap(~var,ncol=3,scales = "free_y")+
scale_x_date(expand = c(0.01,0.01)) +
......@@ -664,11 +645,11 @@ You can also download ready-to-use (normalized) data for the estimation on Dynar
# Appendix
## Chaining function
To chain two datasets, we build a chain function whose input must be two dataframes with three standard columns (`time`, `var`, `values`). It returns a dataframe composed of chained values, ie the dataframe "to rebase" will be chained on the "basis" dataframe.
To chain two datasets, we build a chain function whose input must be two dataframes with three standard columns (`period`, `var`, `value`). It returns a dataframe composed of chained values, ie the dataframe "to rebase" will be chained on the "basis" dataframe.
More specifically, the function :
* compute the growth rates from `values` in the dataframe of the 1st argument
* multiply it with the value of reference chosen in `values` in the dataframe of the 2nd argument
* compute the growth rates from `value` in the dataframe of the 1st argument
* multiply it with the value of reference chosen in `value` in the dataframe of the 2nd argument
* at the `date` specified in the 3rd argument.
```{r chain}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment