
/******************* Editing the data set ***********************/

data Rawdata (keep=day_of_week irregular holiday date kperiod count ncount bill14 deliver );
set half_hour;
halfhour=kperiod;
kperiod=halfhour- 20;                                             
if halfhour le 20 or halfhour gt 44 then delete ; /* start from 10am and end at 10pm */ 
day_of_week = weekday (date) ;                    /* weekday                         */
ncount= sqrt(Count + 1/4) ;                       /* square-root transformation */
run ;


/* Delete irregular days from the data set and set z to be the dependent variable */
data Regdata;
set Rawdata;
if irregular=1 then delete;
else z=ncount;
run;


/* Remove Saturday from the analysis and create two indicators for Friday and Sunday*/
data Weekdata ;
set Regdata; 
if day_of_week=7 then delete;
if day_of_week=6 then Fri=1;
else Fri=0;
if day_of_week=1 then Sun=1;
else Sun=0;
numDate=date;
run;

/***************************** TWO-STAGE model **************************/

/* 
The following macro will create a predictions vector for the desired period based on the learning data.
It will then add it to a data set called pred 
The arguments list:
learnlag - the length of the learning period
predictdt0 - the starting date for the prediction date
predictlag - the length of the prediction lead-time period
forecast_dur - the length of forecast period 
weeknum - a flag variable. Takes the value 0 if this is the first time this function is used and 1 otherwise. 
          This variable is used to create the pred data set
An example for using this function is : 
%twostage(42,'30jun2004'D,0,1,0) 
This call will create prediction for Jun 30th, 2004 based on the six weeks 
(42 days) prior to it (since the predictlag=0). It will then create a data set
named pred with the desired predictions.
*/ 



%macro twostage(learnlag, predictdt0, predictlag, forecast_dur, weeknum) ;


/*  Creating the six weeks learning data and the prediction data set 
(without the dependent variable z (later refered to as y) */


%let SPredD    = &predictdt0;                    /* The starting date for the prediction period */
%let EPredD    = &predictdt0 +  &forecast_dur-1; /* The last date for the prediction period */
%let SLearnD   = &predictdt0 - (&learnlag+&predictlag)-1; /* The starting date for the learning period */
%let ELearnD   = &predictdt0 - &predictlag -1;   /* The last date of the learning period */

Data window ;
 set Weekdata ;
 if date < &SLearnD  then delete ;
 y = z ;
 if date > &ELearnD AND date =< &EPredD then y= . ;
 if date > &EPredD then delete  ;
run ;


/* Produce daily mean of the transformed counts */
proc means data= window noprint ;
 var y ;
 by date numdate day_of_week sun fri deliver bill14;
 output out= daily mean=mean_y ;
run ;

/* First stage analysis - of means with G marix to be estimated and all day-level fixed effects */
ods output g= gest ;
proc mixed data= daily order= data
		maxiter= 100 method=ml scoring=15 CONVH=1E-6 ;
class day_of_week date ;
model mean_y = day_of_week sun fri deliver bill14/ ddfm= satterth noint;
random date  / type= sp(pow)(numdate) g;       /* using an AR(1) covariance structure for G */
parms (6,0.5) (.1,.8) (1e-4,1) / noprofile ;    /* initial values for the parameters of var(My) (page 13) */ 
run ;
ods output close ;

/* Second stage analysis - fixing G at estimated value, estimating all fixed effects, R and sigma^2 */

proc mixed data= window	method= ml order= data ;
 class day_of_week kperiod date ;
 model y= day_of_week kperiod sun*kperiod fri*kperiod deliver bill14
				/ noint ddfm= satterth outp= predict; /* The fixed level effects */
 random date / gdata= gest;                           /* The daily level random effects  */
 repeated kperiod / type= ar(1) subject= date local r ; /* The period level random effects (+residuals variance) */
 parms (4.7) (0.91) (0.25) / noprofile ;            
run ;

/* Creating the predicted values data set */

%if &weeknum = 0  %then %do ; /* If it is the first day we are predicting the create a data set calle pred */
	data pred ;
 	set predict (where= (date= &SPredD)) ; /* Add the prediction for the first day */
    run ;

	%end;
		  %else %do ;       /* If the data set pred already exists then just append the predictions */
	data pred ;             
 	set  pred predict (where= (date= &SPredD)) ;
	run ;

	%end ;


%mend twostage ;


/* The following macro loops through the data to create the out-of-sample predictions for some days
(depending on the user request).
The arguments:
learnlag - the length of the learning period
predictlag - the length of the prediction lead-time period
forecast_dur - the length of forecast period  
startdt - the first day to predict (provide the initial date)
enddt -  the last day to predict (provide the  date)
*/

%macro PredMacro(learnlag, predictlag, forecast_dur, startdt, enddt);

data null ;
 start= &startdt ;
 end  = &enddt ;
 call symput('start',start) ;
 call symput('end', end) ;
run ;

%let flag=0;

%do day = &start %to &end %by 1; /* Loop over the dates */
		%twostage(&learnlag, &day, &predictlag, &forecast_dur ,&flag);
%let flag=1;
%end;

%mend PredMacro;

/* Predict the days between April 13th,2004 and December 24th, 2004. The learning period is 6 weeks and
the prediction lead time is 0 */

%predMacro(42,0,1,'13Apr2004'D,'24Dec2004'D);




