Comparing Many Models: An Uptrend for Nvidia?

Since April, Nvidia has tried to hold above the point forecasting line. As predictive intervals’ slopes are slightly up; could Nvidia continue an uptrend?

Source code:

library(tidyverse)
library(timetk)
library(tidymodels)
library(modeltime)
library(ggthemes)

#Nvidia (monthly)
df_nvidia <- 
  tq_get("NVDA", to = "2024-08-29") %>%
  tq_transmute(select = close,
               mutate_fun = to.monthly,
               col_rename = "nvidia") %>% 
  mutate(date = as.Date(date))



#FEDFUNDS
df_fedfunds <- read_csv("https://raw.githubusercontent.com/mesdi/investingcom/refs/heads/main/fedfunds.csv")

#Tidying the data
df_fedfunds_tidy <- 
  df_fedfunds %>% 
  janitor::clean_names() %>% 
  select(date = release_date, fedfunds = actual) %>% 
  #Converts string to date object
  mutate(date = case_when(
    !is.na(parse_date(date, format = "%b %d, %Y")) ~ parse_date(date, format = "%b %d, %Y"),
    !is.na(parse_date(date, format = "%d-%b-%y")) ~ parse_date(date, format = "%d-%b-%y")
  )) %>% 
  #Removes the first three blank rows
  slice_tail(n = -3) %>% 
  mutate(date = floor_date(date, "month") %m+% months(1),
         fedfunds = str_remove(fedfunds, "%") %>% as.numeric()) %>% 
  #makes regular time series by filling the time gaps
  pad_by_time(date, .by = "month") %>% 
  fill(fedfunds, .direction = "up") %>% 
  distinct(date, .keep_all = TRUE)

#Mergs all the data sets
df_merged <- 
  df_nvidia %>% 
  left_join(df_fedfunds_tidy)


#Modeling

#Splitting tha data
df_split <- 
  df_merged %>% 
  time_series_split(assess = "1 year",
                    cumulative = TRUE)

df_train <- training(df_split)
df_test <- testing(df_split)

#Bootstrapping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train,
                       times = 100)


#Preprocessing

#Preprocessing for Boosting ARIMA
rec_arima_boost <- 
  recipe(nvidia ~ ., data = df_train) %>% 
  step_date(date, features = c("year", "month")) %>% 
  step_dummy(date_month, one_hot = TRUE) %>% 
  step_normalize(all_numeric_predictors())

#Preprocessing for XGBoost and MARS
rec_mars_xgboost <- 
  rec_arima_boost %>% 
  step_rm(date)


#Models

#Boosted ARIMA Regression
#(https://business-science.github.io/modeltime/reference/arima_boost.html)
mod_arima_boost <- 
  arima_boost(
    min_n = tune(),
    learn_rate = tune(),
    trees = tune()
  ) %>%
  set_engine(engine = "auto_arima_xgboost")

# Model 1: auto_arima ----
arima_reg() %>%
  set_engine(engine = "auto_arima") %>%
  fit(nvidia ~ ., data = df_merged)



#Multivariate adaptive regression splines (MARS) via earth
#(https://parsnip.tidymodels.org/reference/details_mars_earth.html)
mod_mars <- 
  mars(num_terms = tune(), 
       prune_method = tune()) %>% 
  set_engine("earth", nfold = 10) %>% 
  set_mode("regression") 


#Boosted trees via xgboost
#(https://parsnip.tidymodels.org/reference/details_boost_tree_xgboost.html)
mod_boost_tree <- 
  boost_tree(mtry = tune(), 
             trees = tune(), 
             min_n = tune(), 
             learn_rate = tune()) %>%
  set_engine("xgboost") %>%
  set_mode("regression") 



#Workflow sets

wflow_arima_boost <- 
  workflow_set(
    preproc = list(rec_arima_boost = rec_arima_boost),
    models = list(ARIMA_boost = mod_arima_boost)
  ) 

wflow_mars_xgboost <- 
  workflow_set(
    preproc = list(rec_mars_xgboost = rec_mars_xgboost),
    models = list(MARS = mod_mars,
                  XGBoost = mod_boost_tree)
  )

#Combining all the workflows
wflow_all <- 
  bind_rows(
    wflow_arima_boost,
    wflow_mars_xgboost
  ) %>% 
  #Making the workflow ID's a little more simple:
  mutate(wflow_id = str_remove(wflow_id, "(rec_arima_boost_)|(rec_mars_xgboost_)"))



#Tuning and evaluating all the models
grid_ctrl <-
  control_grid(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE
  )

grid_results <-
  wflow_all %>%
  workflow_map(
    seed = 98765,
    resamples = df_folds,
    grid = 10,
    control = grid_ctrl
  )


#Accuracy of the grid results
grid_results %>% 
  rank_results(select_best = TRUE, 
               rank_metric = "rsq") %>%
  select(Models = wflow_id, .metric, mean)


#Finalizing the model with the best parameters
best_param <- 
  grid_results %>%
  extract_workflow_set_result("ARIMA_boost") %>% 
  select_best(metric = "rsq")


wflw_fit <- 
  grid_results %>% 
  extract_workflow("ARIMA_boost") %>% 
  finalize_workflow(best_param) %>% 
  fit(df_train)

#Calibration data
df_cal <- 
  wflw_fit %>% 
  modeltime_calibrate(new_data = df_test)

#Predictive intervals for Boosted ARIMA
df_cal %>%
  modeltime_forecast(actual_data = df_merged %>% 
                       filter(date >= last(date) - months(12)),
                     new_data = df_test) %>%
  plot_modeltime_forecast(.interactive = FALSE,
                          .legend_show = FALSE,
                          .line_size = 1,
                          .color_lab = "",
                          .title = "Predictive Intervals for Nvidia") +
  labs(subtitle = "Monthly Stock Prices<br><span style = 'color:red;'>Boosted ARIMA Point Forecasting Line</span>") + 
  scale_x_date(breaks = c(make_date(2023,8,1), 
                          make_date(2024,4,1),
                          make_date(2024,8,1)),
               labels = scales::label_date(format = "%Y %b"),
               expand = expansion(mult = c(.1, .1))) +
  theme_wsj(base_family = "Bricolage Grotesque",
            color = "grey",
            base_size = 12) +
  theme(legend.position = "none",
        plot.subtitle = ggtext::element_markdown(size = 17, face = "bold"))

5 responses to “Comparing Many Models: An Uptrend for Nvidia?”

  1. Mauricio Avatar
    Mauricio

    Hi,

    Where can I download the “fedfunds.csv” file?

    Like

    1. Selcuk Disci Avatar

      I’ve just updated the article; please check again.

      Like

  2. Foma Ovolevor Avatar

    Nice work @Selcuk Disci . Please confirm if this link https://www.kaggle.com/datasets/lucamtb/fedfunds?resource=download contain the same dataset use as fedfund.csv

    Like

    1. Selcuk Disci Avatar

      Thank you for your interest; I fetched the data from investing.com

      Like

      1. Foma Ovolevor Avatar

        Thanks for the response and clarification.

        Liked by 1 person

Leave a reply to Mauricio Cancel reply

I’m Selcuk Disci

The DataGeeek focuses on machine learning, deep learning, and Generative AI in data science using financial data for educational and informational purposes.

Let’s connect