########################################################### ## Prosjekt: "Hvor presise er prognosene i Nasjonal- ## budsjettet?" ## ## av Sofian Gharsallah og Genaro Sucarrat ## ## Koden inneholdt i denne filen gjennomfører utregningene ## og de statistiske analysene til prosjektet. ## ## Opprettet første gang 26. februar 2019, Oslo. ## ## 1 INITIÉR, LAST DATA ## 2 ENKLE MODELLER ## 3 FIN PROGNOSER ## 4 NORGES BANK PROGNOSER ## 5 SSB PROGNOSER ## 6 SAMMENLIGN ## ## Alle analysene og utregningene er utført i R versjon ## 3.5.3 under Windows 10 Pro 64bit på en Lenovo ThinkPad ## X250 med en Intel Core i7-5600U prosessor (2.60 GHZ). ## ########################################################### ########################################################### ## 1 INITIÉR ########################################################### ##definer arbeidskatalog: setwd("C:/Users/sucarrat/Documents/projects/nasjonalbudsjettet/") #setwd(choose.dir()) #setwd("C:/Users/sofia/Dropbox/R") #setwd("~/Dropbox/R") ##slett alt i arbeidsrom: rm(list = ls()) ##last pakker: library(gets) #versjon 0.21 library(lgarch) #trenger gdiff og glag; versjon 0.6-2 library(xtable) #for LaTeX; versjon 1.8-4 ##========================================================= ## modifisert Diebold-Mariano test; koden er lånt fra ## forecast pakka: dm.test <- function (e1, e2, alternative = c("two.sided", "less", "greater"), h=1, power=2) { alternative <- match.arg(alternative) d <- c(abs(e1))^power - c(abs(e2))^power d.cov <- acf(d, na.action = na.omit, lag.max = h - 1, type = "covariance", plot = FALSE)$acf[, , 1] d.var <- sum(c(d.cov[1], 2 * d.cov[-1]))/length(d) dv <- d.var if (dv > 0) { STATISTIC <- mean(d, na.rm = TRUE)/sqrt(dv) } else if (h == 1) { stop("Variance of DM statistic is zero") } else { warning("Variance is negative, using horizon h=1") return(dm.test(e1, e2, alternative, h = 1, power)) } n <- length(d) k <- ((n + 1 - 2 * h + (h/n) * (h - 1))/n)^(1/2) STATISTIC <- STATISTIC * k names(STATISTIC) <- "DM" if (alternative == "two.sided") { PVAL <- 2 * pt(-abs(STATISTIC), df = n - 1) } else if (alternative == "less") { PVAL <- pt(STATISTIC, df = n - 1) } else if (alternative == "greater") { PVAL <- pt(STATISTIC, df = n - 1, lower.tail = FALSE) } PARAMETER <- c(h, power) names(PARAMETER) <- c("Forecast horizon", "Loss function power") structure(list(statistic = STATISTIC, parameter = PARAMETER, alternative = alternative, p.value = PVAL, method = "Diebold-Mariano Test", data.name = c(deparse(substitute(e1)), deparse(substitute(e2)))), class = "htest") } ########################################################### ## 2 ENKLE MODELLER ########################################################### ##last datasett m/faktiske verdier: actualData <- read.csv("./analysis/data/actual-data-2019m9vintage.csv") #faktiske verdier rownames(actualData) <- as.character(actualData$aar) ##hvilke serier? series <- c("bnpfast", "privatkons", "investbrutto", "sysselsetting", "ledighetaku", "kpi", "lonnsvekst", "nibor3m", "brentblend") ##lag liste med seriene i zoo-format: seriesList <- list() for(i in 1:length(series)){ x <- actualData[ ,series[i] ] x <- zoo(x, order.by=actualData[,"aar"]) ##hvis olje: if(series[i] == "brentblend"){ nokusd <- actualData[ ,"nokusd" ] nokusd <- zoo(nokusd, order.by=actualData[,"aar"]) x <- x*nokusd x <- 100*gdiff(x)/glag(x) #gdiff og glag er fra lgarch pakka } seriesList[[i]] <- x } names(seriesList) <- series ##bestem startår (delvis fra Prognoseprisen 2013): startaar <- c(1971, #bnpfast 1971, #privatkons 1971, #investbrutto 1971, #sysselsetting 1983, #ledighetaku 1990, #kpi 1988, #lonnsvekst 1992, #nibor3m ## før 1993 var renten høy (over 10%), og 1992 pga. lagging 1987) #brentblend ## basert på første data punkt i actualData ##ar(1) modeller: ##=============== ##matrise m/beregninger: ar1matrise <- matrix(NA, length(series), 7) colnames(ar1matrise) <- c("snitt","pval1","corr", "pval2", "ser", "utvalg", "T") rownames(ar1matrise) <- series ar1matrise <- as.data.frame(ar1matrise) ##AR(1), EY og RW prognoser: EYforecasts <- matrix(NA, length(1999:2018), length(series)) rownames(EYforecasts) <- 1999:2018 colnames(EYforecasts) <- series RWforecasts <- AR1forecasts <- EYforecasts ##faktiske verdier AR(1), EY og RW: RWactuals <- AR1actuals <- EYactuals <- EYforecasts ##løkke: for(i in 1:length(series)){ ##beregninger: y <- window(seriesList[[i]], start=startaar[i]) yar1 <- arx(y, mc=TRUE, ar=1) ar1matrise[i,"snitt"] <- mean(y, na.rm=TRUE) ar1matrise[i,"pval1"] <- yar1$mean.results[1,"p-value"] ar1matrise[i,"corr"] <- coef(yar1)["ar1"] ar1matrise[i,"pval2"] <- yar1$mean.results[2,"p-value"] ar1matrise[i,"ser"] <- sigma(yar1) ar1matrise[i,"utvalg"] <- paste0(yar1$aux$y.index[1], " -- ", yar1$aux$y.index[yar1$aux$y.n]) ar1matrise[i,"T"] <- yar1$aux$y.n printtex(yar1, fitted.name=series[i], xreg.names=c("","Y_{t-1}")) ##EY prognoser: EYforecasts[,i] <- ar1matrise[i,"snitt"] ##AR(1) prognoser: AR1forecasts[,i] <- coredata( window(fitted(yar1), start=1999) ) ##RW prognoser: RWforecasts[,i] <- window( glag(y), start=1999 ) ##actuals: EYactuals[,i] <- window(y, start=1999) AR1actuals[,i] <- window(y, start=1999) RWactuals[,i] <- window(y, start=1999) } ##matriser m/prognosefeil: EYerrors <- EYactuals - EYforecasts AR1errors <- AR1actuals - AR1forecasts RWerrors <- RWactuals - RWforecasts ##lag bedre radnavn for ar1matrise: rownames(ar1matrise) <- c("BNP vekst (fastland)", "Konsumvekst", "Investeringsvekst (brutto)", "Sysselsettingsvekst", "Ledighet (AKU)", "Inflasjon (KPI)", "Lonnsvekst", "Renteniva (NIBOR3M)", "Oljeprisvekst") #LaTeX kode: ar1matriseTex <- xtable(ar1matrise, align=c("l","c","c","c","c","c","c","c"), digits=2) print(ar1matriseTex) ##lag grafer: ##=========== ablineList <- list() ablineList[[1]] <- c(0,2,4,6) #bnptotal ablineList[[2]] <- c(-2,0,2,4,6,8) #privatkons ablineList[[3]] <- c(-10,-5,0,5,10,15) #investbrutto ablineList[[4]] <- c(-2,-1,0,1,2,3,4) #sysselsetting ablineList[[5]] <- c(2,3,4,5) #ledighetaku ablineList[[6]] <- c(1,2,3,4) #kpi ablineList[[7]] <- c(0,2,3,4,5,6) #lonnsvekst ablineList[[8]] <- c(0,2,4,6,8,10,12) #nibor3m ablineList[[9]] <- c(-20,0,20,40,60,80) #brentblend legendList <- list() legendList[[1]] <- "bottomleft" #bnptotal legendList[[2]] <- "topright" #privatkons legendList[[3]] <- "bottomright" #investbrutto legendList[[4]] <- "bottomleft" #sysselsetting legendList[[5]] <- "topleft" #ledighetaku legendList[[6]] <- "bottomleft" #kpi legendList[[7]] <- "bottomleft" #lonnsvekst legendList[[8]] <- "bottomleft" #nibor3m legendList[[9]] <- "bottomleft" #brentblend legendNames <- c("BNP-vekst i %", "Konsumvekst i %", "Investeringsvekst i %", "Sysselsettingsvekst i %", "Ledighet (AKU) i %", "Inflasjon (KPI) i %", "Lønnsvekst i %", "Rentenivå (NIBOR3M)", "Oljeprisvekst i %") ##løkke (lag grafer): for(i in 1:length(seriesList)){ y <- window(seriesList[[i]], start=startaar[i]) snitt <- ar1matrise[i,"snitt"] autokorr1 <- ar1matrise[i,"corr"] windows(15,9) ##get current par-values: def.par <- par(no.readonly=TRUE) ##margins (default=5.1,4.1,4.1,2.1): par(mar = c(2,4.1,0.5,2.1)) ##plot: plot(y, las=1, main="", xlab="", ylab="Vekst i %", col="blue", lty="solid", lwd=3) abline(h=ablineList[[i]], col="grey", lty="dashed") abline(h=snitt, lty="dotted",col="red",lwd=2) axis(side=4, at=snitt, labels = round(snitt,2)) legend(legendList[[i]], legend=c(legendNames[i], paste0("Snitt = ", round(snitt,2), "%"), paste0("AR(1) = ", round(autokorr1,2)) ), lty=c("solid","dotted","dotted"), lwd=c(3,2,2), col=c("blue","red","white"), bty="n") ##save plot: desttxt <- paste0("./article/figures/", series[i], ".pdf") savePlot(desttxt, type="pdf") ##return to old par-values: par(def.par) ##turn off graphics: graphics.off() } ########################################################### ## 3 FIN PROGNOSER ########################################################### ##last datasettet m/FIN prognoser: forecastData <- read.csv("./analysis/data/forecasts-fin.csv") rownames(forecastData) <- as.character(forecastData$aar) ##serier som skal anslås: series <- c("bnpfast", "privatkons", "investbrutto", "sysselsetting", "ledighetaku", "kpi", "lonnsvekst", "nibor3m", "brentblend") ##lag matrise m/prognoser: ##======================== FINforecasts <- matrix(NA,NROW(forecastData),length(series)) rownames(FINforecasts) <- forecastData$aar colnames(FINforecasts) <- series for(i in 1:length(series)){ ##hvis ikke olje: if( series[i] %in% setdiff(series,c("brentblend")) ){ yname <- paste0("FIN", series[i]) FINforecasts[,i] <- forecastData[,yname] } ##hvis olje: if( series[i] == "brentblend" ){ t1 <- which( actualData$aar == forecastData$aar[1] ) t2 <- which( actualData$aar == forecastData$aar[NROW(forecastData)] ) brentblendnok <- actualData[,"brentblend"]*actualData[,"nokusd"] xtmin1 <- brentblendnok[c(t1-1):c(t2-1)] FINforecasts[,i] <- 100*(forecastData[,"FINbrentblendnok"]/xtmin1 - 1) } } ##lag matrise m/faktiske verdier tilpasset FIN-prognoser: ##======================================================= FINactuals <- matrix(NA,NROW(forecastData),length(series)) rownames(FINactuals) <- forecastData$aar colnames(FINactuals) <- series for(i in 1:length(series)){ ##start og sluttår: t1 <- which( actualData$aar == forecastData$aar[1] ) t2 <- which( actualData$aar == forecastData$aar[NROW(forecastData)] ) ##hvis ikke olje: if( series[i] %in% setdiff(series,c("brentblend")) ){ FINactuals[,i] <- actualData[t1:t2,series[i]] } ##hvis olje: if( series[i] == "brentblend" ){ brentblendnok <- actualData[,"brentblend"]*actualData[,"nokusd"] xt <- brentblendnok[t1:t2] xtmin1 <- brentblendnok[c(t1-1):c(t2-1)] FINactuals[,i] <- 100*(xt/xtmin1 - 1) } } ##matrise med prognosefeil: ##========================= FINerrors <- FINactuals - FINforecasts ########################################################### ## 4 NORGES BANK PROGNOSER ########################################################### ##last datasett m/NB prognoser: forecastData <- read.csv("./analysis/data/forecasts-nb.csv") rownames(forecastData) <- as.character(forecastData$aar) ##serier som skal anslås: series <- c("bnpfast", "privatkons", "investfast", "sysselsetting", "ledighetaku", "kpi", "lonnsvekst", "folio", "brentblend") ##lag matrise m/prognoser: ##======================== NBforecasts <- matrix(NA,NROW(forecastData),length(series)) rownames(NBforecasts) <- forecastData$aar colnames(NBforecasts) <- series for(i in 1:length(series)){ ##hvis ikke olje: if( series[i] %in% setdiff(series,c("brentblend")) ){ yname <- paste0("NB", series[i]) NBforecasts[,i] <- forecastData[,yname] } ##hvis olje: if( series[i] == "brentblend" ){ ##merk: i nok i 1999-2001, i usd 2002-2018, NA for 2004-2005 t1 <- which( actualData$aar == forecastData$aar[1] ) t2 <- which( actualData$aar == forecastData$aar[NROW(forecastData)] ) estbrentblendusd <- #1999-2001 forecastData$NBbrentblendnok/actualData[(t1:t2),"nokusd"] brentblendusd <- forecastData$NBbrentblendusd whichNotNA <- which(estbrentblendusd != "NA") brentblendusd[whichNotNA] <- estbrentblendusd[whichNotNA] xtmin1 <- (actualData$brentblend)[c(t1-1):c(t2-1)] NBforecasts[,i] <- 100*(brentblendusd/xtmin1 - 1) } } ##lag matrise m/faktiske verdier tilpasset NB-prognoser: ##====================================================== NBactuals <- matrix(NA,NROW(forecastData),length(series)) rownames(NBactuals) <- forecastData$aar colnames(NBactuals) <- series for(i in 1:length(series)){ ##start og sluttår: t1 <- which( actualData$aar == forecastData$aar[1] ) t2 <- which( actualData$aar == forecastData$aar[NROW(forecastData)] ) ##hvis ikke olje: if( series[i] %in% setdiff(series,c("brentblend")) ){ NBactuals[,series[i]] <- actualData[t1:t2,series[i]] } ##hvis olje: if( series[i] == "brentblend" ){ brentblend <- actualData[,"brentblend"] xt <- brentblend[t1:t2] xtmin1 <- brentblend[c(t1-1):c(t2-1)] NBactuals[,series[i]] <- 100*(xt/xtmin1 - 1) } } ##matrise med prognosefeil: ##========================= NBerrors <- NBactuals - NBforecasts ########################################################### ## 5 SSB PROGNOSER ########################################################### ##last datasett m/SSB prognoser: forecastData <- read.csv("./analysis/data/forecasts-ssb.csv") rownames(forecastData) <- as.character(forecastData$aar) ##serier som skal anslås: series <- c("bnpfast", "privatkons", "investbrutto", "sysselsetting", "ledighetaku", "kpi", "lonnsvekst", "nibor3m", "brentblend") ##lag matrise m/prognoser: ##======================== SSBforecasts <- matrix(NA,NROW(forecastData),length(series)) rownames(SSBforecasts) <- forecastData$aar colnames(SSBforecasts) <- series for(i in 1:length(series)){ ##hvis ikke olje: if( series[i] %in% setdiff(series,c("brentblend")) ){ yname <- paste0("SSB", series[i]) SSBforecasts[,i] <- forecastData[,yname] } ##hvis olje: if( series[i] == "brentblend" ){ t1 <- which( actualData$aar == forecastData$aar[1] ) t2 <- which( actualData$aar == forecastData$aar[NROW(forecastData)] ) brentblendnok <- actualData[,"brentblend"]*actualData[,"nokusd"] xtmin1 <- brentblendnok[c(t1-1):c(t2-1)] SSBforecasts[,i] <- 100*(forecastData[,"SSBbrentblendnok"]/xtmin1 - 1) } } ##lag matrise m/faktiske verdier tilpasset SSB-prognoser: ##======================================================= SSBactuals <- matrix(NA,NROW(forecastData),length(series)) rownames(SSBactuals) <- forecastData$aar colnames(SSBactuals) <- series for(i in 1:length(series)){ ##start og sluttår: t1 <- which( actualData$aar == forecastData$aar[1] ) t2 <- which( actualData$aar == forecastData$aar[NROW(forecastData)] ) ##hvis ikke olje: if( series[i] %in% setdiff(series,c("brentblend")) ){ SSBactuals[,i] <- actualData[t1:t2,series[i]] } ##hvis olje: if( series[i] == "brentblend" ){ brentblendnok <- actualData[,"brentblend"]*actualData[,"nokusd"] xt <- brentblendnok[t1:t2] xtmin1 <- brentblendnok[c(t1-1):c(t2-1)] SSBactuals[,i] <- 100*(xt/xtmin1 - 1) } } ##matrise med prognosefeil: ##========================= SSBerrors <- SSBactuals - SSBforecasts ########################################################### ## 6 SAMMENLIGN ########################################################### deltakere <- c("EY", "RW", "AR1", "FIN", "NB", "SSB") MEmatrise <- matrix(NA, length(deltakere), NCOL(FINforecasts)) rownames(MEmatrise) <- deltakere colnames(MEmatrise) <- colnames(FINforecasts) MSEmatrise <- MAEmatrise <- MEmatrise ##lag ME, MAE og MSE matriser: ##============================= for(i in 1:NCOL(MEmatrise)){ ##ME matrise: MEmatrise["EY",i] <- mean(EYerrors[,i], na.rm=TRUE) MEmatrise["RW",i] <- mean(RWerrors[,i], na.rm=TRUE) MEmatrise["AR1",i] <- mean(AR1errors[,i], na.rm=TRUE) MEmatrise["FIN",i] <- mean(FINerrors[,i], na.rm=TRUE) MEmatrise["NB",i] <- mean(NBerrors[,i], na.rm=TRUE) MEmatrise["SSB",i] <- mean(SSBerrors[,i], na.rm=TRUE) ##MAE matrise: MAEmatrise["EY",i] <- mean( abs(EYerrors[,i]) , na.rm=TRUE) MAEmatrise["RW",i] <- mean( abs(RWerrors[,i]) , na.rm=TRUE) MAEmatrise["AR1",i] <- mean( abs(AR1errors[,i]) , na.rm=TRUE) MAEmatrise["FIN",i] <- mean( abs(FINerrors[,i]) , na.rm=TRUE) MAEmatrise["NB",i] <- mean( abs(NBerrors[,i]) , na.rm=TRUE) MAEmatrise["SSB",i] <- mean( abs(SSBerrors[,i]) , na.rm=TRUE) ##MSE matrise: MSEmatrise["EY",i] <- mean( (EYerrors[,i])^2, na.rm=TRUE) MSEmatrise["RW",i] <- mean( (RWerrors[,i])^2, na.rm=TRUE) MSEmatrise["AR1",i] <- mean( (AR1errors[,i])^2, na.rm=TRUE) MSEmatrise["FIN",i] <- mean( (FINerrors[,i])^2, na.rm=TRUE) MSEmatrise["NB",i] <- mean( (NBerrors[,i])^2, na.rm=TRUE) MSEmatrise["SSB",i] <- mean( (SSBerrors[,i])^2, na.rm=TRUE) } ##tester av "rasjonalitet": ##========================= listErrors <- list(EYerrors=EYerrors, RWerrors=RWerrors, AR1errors=AR1errors, FINerrors=FINerrors, NBerrors=NBerrors, SSBerrors=SSBerrors) ##ME (ubetinget og betinget): MEtestPverdier <- matrix(NA, NROW(MEmatrise), NCOL(MEmatrise)) colnames(MEtestPverdier) <- colnames(MEmatrise) rownames(MEtestPverdier) <- rownames(MEmatrise) MEtestSEs <- MEtestPverdier MEtest2Corrs <- MEtestPverdier MEtest2Pverdier <- MEtestPverdier for(i in 1:length(listErrors)){ mErrors <- listErrors[[i]] for(j in 1:NCOL(MEtestPverdier)){ ydiff <- na.omit(mErrors[,j]) ydiffmod <- arx(ydiff, mc=TRUE, vcov.type="newey-west") ##ubetinget: MEtestPverdier[i,j] <- as.numeric(ydiffmod$mean.results[1,"p-value"]) MEtestSEs[i,j] <- as.numeric(ydiffmod$mean.results[1,"std.error"]) ##betinget: MEtest2Corrs[i,j] <- acf(ydiff, lag.max=2, plot=FALSE)$acf[2] MEtest2Pverdier[i,j] <- Box.test(ydiff, lag=1, type="L")$p.value } } ##LaTeX kode MEtest: for(i in 1:NROW(MEtestPverdier)){ texcode <- paste0(" $\\underset{[p-val]}{\\text{", rownames(MEtestPverdier)[i], "}}$") for(j in 1:NCOL(MEtestPverdier)){ texcode <- paste0(texcode, " & $\\underset{[", round(MEtestPverdier[i,j],digits=2), "]}{", round(MEmatrise[i,j], digits=2), "}$") } texcode <- paste0(texcode, " \\\\ \n") cat(texcode) } ##LaTeX kode MEtest2: for(i in 1:NROW(MEtest2Pverdier)){ texcode <- paste0(" $\\underset{[p-val]}{\\text{", rownames(MEtest2Pverdier)[i], "}}$") for(j in 1:NCOL(MEtest2Pverdier)){ texcode <- paste0(texcode, " & $\\underset{[", round(MEtest2Pverdier[i,j],digits=2), "]}{", round(MEtest2Corrs[i,j], digits=2), "}$") } texcode <- paste0(texcode, " \\\\ \n") cat(texcode) } ##tester av MAE og MSE mot AR1 som benchmark: ##=========================================== ##MAE: ##modifisert DM-test mot AR1: MAEtestPverdier <- matrix(NA, NROW(MAEmatrise), NCOL(MAEmatrise)) colnames(MAEtestPverdier) <- colnames(MAEmatrise) rownames(MAEtestPverdier) <- rownames(MAEmatrise) for(i in setdiff(1:length(listErrors),3) ){ #i hopper over 3 siden AR1 er benchmark m1Errors <- listErrors[[i]] m2Errors <- listErrors$AR1errors #AR1 (benchmark) for(j in 1:NCOL(MAEtestPverdier)){ e1 <- m1Errors[,j] e2 <- m2Errors[,j] whichNAs <- which(is.na(e1)) if(length(whichNAs)>0){ e1 <- e1[-whichNAs] e2 <- e2[-whichNAs] } dmTest <- dm.test(e1, e2, alternative="two.sided", h=1, power=1) MAEtestPverdier[i,j] <- as.numeric(dmTest$p.value) } } ##LaTeX kode: for(i in 1:NROW(MAEtestPverdier)){ texcode <- paste0(" $\\underset{[p-val]}{\\text{", rownames(MAEtestPverdier)[i], "}}$") for(j in 1:NCOL(MAEtestPverdier)){ texcode <- paste0(texcode, " & $\\underset{[", round(MAEtestPverdier[i,j],digits=2), "]}{", round(MAEmatrise[i,j], digits=2), "}$") } texcode <- paste0(texcode, " \\\\ \n") cat(texcode) } ##MSE: ##modifisert DM-test mot AR1: MSEtestPverdier <- matrix(NA, NROW(MSEmatrise), NCOL(MSEmatrise)) colnames(MSEtestPverdier) <- colnames(MSEmatrise) rownames(MSEtestPverdier) <- rownames(MSEmatrise) for(i in setdiff(1:length(listErrors),3) ){ #i hopper over 3 siden AR1 er benchmark m1Errors <- listErrors[[i]] m2Errors <- listErrors$AR1errors #AR1 (benchmark) for(j in 1:NCOL(MSEtestPverdier)){ e1 <- m1Errors[,j] e2 <- m2Errors[,j] whichNAs <- which(is.na(e1)) if(length(whichNAs)>0){ e1 <- e1[-whichNAs] e2 <- e2[-whichNAs] } dmTest <- dm.test(e1, e2, alternative="two.sided", h=1, power=2) MSEtestPverdier[i,j] <- as.numeric(dmTest$p.value) } } ##LaTeX kode: for(i in 1:NROW(MSEtestPverdier)){ texcode <- paste0(" $\\underset{[p-val]}{\\text{", rownames(MSEtestPverdier)[i], "}}$") for(j in 1:NCOL(MSEtestPverdier)){ texcode <- paste0(texcode, " & $\\underset{[", round(MSEtestPverdier[i,j],digits=2), "]}{", round(MSEmatrise[i,j], digits=2), "}$") } texcode <- paste0(texcode, " \\\\ \n") cat(texcode) }