Comparing Many Models: Crude Oil Futures Price – DataGeeek

Economics, Machine Learning, R, time series forecast, Time Series Machine Learning

Comparing Many Models: Crude Oil Futures Price

Published by

Selcuk Disci

on

August 22, 2024

Comparing Many Models: Crude Oil Futures Price

Crude prices are declining over sharp revision to U.S. employment data which leads to concerns about slowing demand. U.S. Crude oil futures price has fluctuated above the point forecast line (XGBoost) year to date.

Source code:

library(tidyverse)
library(tidymodels)
library(tidyquant)
library(timetk)
library(modeltime)
library(workflowsets)


#Crude Oil Futures(USD) (Index 2014 = 100)
df_crude_oil <- 
  tq_get("CL=F") %>% 
  tq_transmute(select = close,
               mutate_fun = to.monthly,
               col_rename = "crude_oil") %>% 
  mutate(date = as.Date(date))


#Federal Funds Effective Rate
df_fed_funds <- 
  tq_get("FEDFUNDS", get = "economic.data") %>% 
  select(date, fed_funds = price)

#Industrial Production: Mining: Crude Oil (NAICS = 21112) (Index 2014 = 100)
df_crude_oil_production <- 
  tq_get("IPG21112S", get = "economic.data") %>% 
  select(date, crude_oil_production = price) 

#Merging all the data sets
df_merged <- 
  df_crude_oil %>%
  left_join(df_fed_funds) %>% 
  left_join(df_crude_oil_production) %>% 
  drop_na()


#Splitting tha data
df_split <- 
  df_merged %>% 
  time_series_split(assess = "1 year",
                    cumulative = TRUE)

df_train <- training(df_split)
df_test <- testing(df_split)

#Bootstrapping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train,
                       times = 100)


#Preprocessing
rec_all <- 
  recipe(crude_oil ~ ., data = df_train) %>% 
  step_mutate(date_num = as.numeric(date)) %>% 
  step_rm(date) %>% 
  step_normalize(all_numeric_predictors()) 
  

#Models

#Radial basis function support vector machines (SVMs) via kernlab
mod_svm_rbf <- 
  svm_rbf(cost = tune(),
          rbf_sigma = tune(), 
          margin = tune()) %>%  
  set_engine("kernlab") %>% 
  set_mode("regression")


#Multivariate adaptive regression splines (MARS) via earth
mod_mars <- 
  mars(num_terms = tune(), 
       prune_method = tune()) %>% 
  set_engine("earth", nfold = 10) %>% 
  set_mode("regression") 


#Boosted trees via xgboost
mod_boost_tree <- 
  boost_tree(mtry = tune(), 
             trees = tune(), 
             min_n = tune(), 
             learn_rate = tune()) %>%
  set_engine("xgboost") %>%
  set_mode("regression") 



#Workflow sets
wflow_all <- 
  workflow_set(
    preproc = list(svm_all = rec_all),
    models = list(SVM_rbf = mod_svm_rbf,
                  MARS = mod_mars,
                  XGBoost = mod_boost_tree)) %>% 
  #Making the workflow ID's a little more simple:
  mutate(wflow_id = str_remove(wflow_id, "svm_all_"))



#Tuning and evaluating all the models
grid_ctrl <-
  control_grid(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE
  )

grid_results <-
  wflow_all %>%
  workflow_map(
    seed = 98765,
    resamples = df_folds,
    grid = 10,
    control = grid_ctrl
  )


#Accuracy of the grid results
grid_results %>% 
  rank_results(select_best = TRUE, 
               rank_metric = "rsq") %>%
  select(Models = wflow_id, .metric, mean)


#Finalizing the model with the best parameters
best_param <- 
  grid_results %>%
  extract_workflow_set_result("XGBoost") %>% 
  select_best(metric = "rsq")


wflw_fit <- 
  grid_results %>% 
  extract_workflow("XGBoost") %>% 
  finalize_workflow(best_param) %>% 
  fit(df_train)

#Calibration data
df_cal <- 
  wflw_fit %>% 
  modeltime_calibrate(new_data = df_test)

#Predictive intervals for XGBoost
df_cal %>%
  modeltime_forecast(actual_data = df_merged %>% 
                                   filter(date >= last(date) - months(12)),
                     new_data = df_test) %>%
  plot_modeltime_forecast(.interactive = FALSE,
                          .legend_show = FALSE,
                          .line_size = 1,
                          .color_lab = "",
                          .title = "Confidence Intervals for Crude Oil Future Prices (WTI)") +
  labs(subtitle = "Using XGBoost") + 
  theme_minimal(base_family = "Bricolage Grotesque",
                base_size = 16)

Leave a comment Cancel reply

Hello,

I’m Selcuk Disci

The DataGeeek focuses on machine learning, deep learning, and Generative AI in data science using financial data for educational and informational purposes.

Let’s connect

Join the fun!

Stay updated with our latest tutorials and ideas by joining our newsletter.