##load data, make slight modifications##

rm(list=ls())
#setwd("C:/Users/Thor/Dropbox/Spring2019/Econ 487")
oj <- data.frame(read.csv("C:/Users/Thor/Dropbox/Spring2019/Econ 487/oj.csv"))

names(oj)[6]="p"; names(oj)[4]="q"; 
oj$p <- log(oj$p)
oj_ol <- oj  #a copy to be modified
##make major modifications -- lags, store/week dums, interactions##

#lags for price, quantity, feat
lag = 5
for(i in 1:lag){
  Df1 <- oj_ol
  Df1$week <- Df1$week+i
  myvars <- c("p","q","feat", "week", "brand","store")
  Df1 <- Df1[myvars]
  oj_ol <- merge(oj_ol, Df1, by=c("brand","store","week"))
  names(oj_ol)[ncol(oj_ol)-2]=paste('p','_l',i,sep='')
  names(oj_ol)[ncol(oj_ol)-1]=paste('q','_l',i,sep='')
  names(oj_ol)[ncol(oj_ol)-0]=paste('feat','_l',i,sep='')
  names(oj_ol)[6] <- "p"; names(oj_ol)[4] <- "q"; names(oj_ol)[5] <- "feat"
}

#dummies for store and week (there will be many...)
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 3.5.3
oj_ol <- fastDummies::dummy_cols(oj_ol, select_columns = c("store","week"))
dim(oj_ol)
## [1] 18537   221
#interaction terms
# 1lagged p, q, feat with AGE60, EDUC, INCOME, HVAL150
interacts <- matrix(c(oj_ol$p_l1*oj_ol$AGE60, oj_ol$p_l1*oj_ol$EDUC, oj_ol$p_l1*oj_ol$INCOME, oj_ol$p_l1*oj_ol$HVAL150,
                      oj_ol$q_l1*oj_ol$AGE60, oj_ol$q_l1*oj_ol$EDUC, oj_ol$q_l1*oj_ol$INCOME, oj_ol$q_l1*oj_ol$HVAL150,
                      oj_ol$feat_l1*oj_ol$AGE60, oj_ol$feat_l1*oj_ol$EDUC, oj_ol$feat_l1*oj_ol$INCOME, oj_ol$feat_l1*oj_ol$HVAL150)
                    , ncol=12)
oj_ol <- cbind(oj_ol,interacts)    #note don't need to create interacts, but aids readability

#issue when the names of the last 12 entered into Forest...had to rename
my_names <- c("AGE60","EDUC","INCOME","HVAL150")
for(i in 1:length(my_names)){
  names(oj_ol)[ncol(oj_ol)-12+i] = paste('p_',my_names[i],sep='')
  names(oj_ol)[ncol(oj_ol)-12+i+4] = paste('q_',my_names[i],sep='')
  names(oj_ol)[ncol(oj_ol)-12+i+8] = paste('feat_',my_names[i],sep='')
}
##estimate f and g using random forest##

library(randomForest)
## Warning: package 'randomForest' was built under R version 3.5.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
#with feat
q_forest <- randomForest(q~.,data=oj_ol[,-which(names(oj_ol)=="p")], ntree=25, keep.forest=TRUE)
p_forest <- randomForest(p~.,data=oj_ol[,-which(names(oj_ol)=="q")], ntree=25, keep.forest=TRUE)

#form matrix with residuals and brand for separate elasticities
# note only works if order is preserved
q_res <- oj_ol$q - predict(q_forest)
p_res <- oj_ol$p - predict(p_forest)

res <- data.frame(q_res, p_res, "brand"=oj_ol$brand)

#without feat
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
oj_ol_no_feat <- select(oj_ol, -starts_with("feat"))
q_forest_nf <- randomForest(q~.,data=oj_ol_no_feat[,-which(names(oj_ol)=="p")], ntree=25, keep.forest=TRUE)
p_forest_nf <- randomForest(p~.,data=oj_ol_no_feat[,-which(names(oj_ol)=="q")], ntree=25, keep.forest=TRUE)

q_res_nf <- oj_ol$q - predict(q_forest_nf)
p_res_nf <- oj_ol$p - predict(p_forest_nf)

res_nf <- data.frame(q_res_nf, p_res_nf, "brand"=oj_ol$brand)
##estimate elasticities using residuals from ML prediction

#final regressions for elasticities by brand
elast = data.frame("brand"=c("dominicks","minute.maid","tropicana"))

fitd <- lm(q_res ~ p_res, data=res[which(res$brand=="dominicks"),])
fitm <- lm(q_res ~ p_res, data=res[which(res$brand=="minute.maid"),])
fitt <- lm(q_res ~ p_res, data=res[which(res$brand=="tropicana"),])

elast <- cbind(elast, c(coef(fitd)["p_res"],coef(fitm)["p_res"],coef(fitt)["p_res"]))

fitd_nf <- lm(q_res_nf ~ p_res_nf, data=res_nf[which(res_nf$brand=="dominicks"),])
fitm_nf <- lm(q_res_nf ~ p_res_nf, data=res_nf[which(res_nf$brand=="minute.maid"),])
fitt_nf <- lm(q_res_nf ~ p_res_nf, data=res_nf[which(res_nf$brand=="tropicana"),])

elast <- cbind(elast, c(coef(fitd_nf)["p_res_nf"],coef(fitm_nf)["p_res_nf"],coef(fitt_nf)["p_res_nf"]))

names(elast)[2]="elasticities"
names(elast)[3]="elasticities_nf"
elast
##         brand elasticities elasticities_nf
## 1   dominicks    -2.096544     -0.38160042
## 2 minute.maid    -1.951346      0.03937146
## 3   tropicana    -2.349698     -0.51248373