/*----------------------Web Analytics-----------------------------------------*/ /* WAPATH */ /* Create the Interactive Path Report */ /* */ /* two calls are used: the 1st (&INFO eq YES) creates a template data set */ /* that serves as the model for the report data set, which is created */ /* by the 2nd call. */ /* */ /* if the user-provided path criteria do not select any obs, WAPATH */ /* returns a report data set that contains 0 obs */ /* */ /*----------------------------------------------------------------------------*/ /* Supported By: Frank Roediger, FRROED; Kevin Scott, SASKXS */ /* Caroline Bahler, CABAHL */ /* */ /*----------------------------------------------------------------------------*/ /* History: */ /* Date Description Username Change Code */ /* 20040930 Program Created saskxs NA */ /* 20041215 Remove supportcount and cabahl C0001 */ /* replaced with wab_interactive_support */ /* 20050614 MAX & MIN sequence length frroed C0002 */ /* 20050617 Updated for new version of PROC cabahl C0003 */ /* PATH */ /* 20050719 Added % root node stat and cabahl C0004 */ /* changed % visit to % total visits */ /* 20060704 HotFix re-engineering of frroed NA */ /* Pathing analyses */ /* 20060915 Post-HotFix defect correction frroed NA */ /*----------------------------------------------------------------------------*/ /* Dependencies: */ /* The following macros need to be available: */ /* -- none -- */ /* */ /* The following macro variables need to be defined and assigned values */ /* by the User Interface/Middleware */ /* &INFO: a switch variable that indicates whether to create a */ /* place-holding (1 obs with missing values) or a populated */ /* (summarization of actual data) version of &OUTPUT_DATASET */ /* &START_PAGE: a comma-delimited list of the reference numbers for the */ /* path start pages. the $IDXTORF format associates the */ /* reference number with the actual REQUESTED_FILE. */ /* &END_PAGE: a comma-delimited list of the reference numbers for the */ /* path end pages. the $IDXTORF format associates the */ /* reference number with the actual REQUESTED_FILE. if no */ /* END_PAGEs have been specified, the report includes all */ /* end pages that were associated with any &START_PAGE. */ /* &TOPN: the maximum number of rows that will be in the results */ /* table. */ /* &MAX_SEQUENCE_LENGTH: the maximum number of requested files in a path. */ /* if no value has been provided, the default of 7 is used. */ /* if the provided value is 1 or less, or is less than */ /* the MIN_SEQUENCE_LENGTH, then the default of 7 is used. */ /* &MIN_SEQUENCE_LENGTH: the maximum number of requested files in a path. */ /* if no value has been provided, the default of 2 is used. */ /* if the provided value is 2 or less, or is greater than */ /* the MAX_SEQUENCE_LENGTH, then the default of 1 is used. */ /* &DATES: a comma-delimited list of the YYYY-MM-DD values that are */ /* the dates for the analysis. */ /* &LIBREFS: a comma-delimited list of the SAS libnames that need to */ /* be defined by the process. */ /* &LIBPATHS: a comma-delimited list of the location that corresponds */ /* to the corresponding libname in the LIBREFS list. */ /* */ /*----------------------------------------------------------------------------*/ /* Parameters: */ /* WAB_DEBUG: a Y/N switch that indicates whether to perform the house- */ /* keeping clean-up of the STP and WORK libraries. the */ /* default value is N -- clean-up to libraries. to keep the */ /* libraries contents over-ride the default with Y. */ /* */ /*----------------------------------------------------------------------------*/ /* Input: */ /* -- none -- */ /* */ /*----------------------------------------------------------------------------*/ /* Output: */ /* */ /*----------------------------------------------------------------------------*/ /* Copyright (c) 2004 SAS Institute Inc. All Rights Reserved */ /*----------------------------------------------------------------------------*/ %macro wapath(wab_debug=N); %******************************************************************************; %* Initialize SWA resources ; %******************************************************************************; %wautils; %let wab_note = %nrstr(NOTE:(&macname) %left(%sysfunc(time(),time.))); %let wab_warning = %nrstr(WARNING:(&macname) %left(%sysfunc(time(),time.))); %let wab_error = %nrstr(ERROR:(&macname) %left(%sysfunc(time(),time.))); %let wab_rc = 0; %******************************************************************************; %* Initialize local macro variables to default values. ; %******************************************************************************; data _null_; length tmprefs $ 1024 tmppaths $ 4096; tmprefs="&librefs"; tmppaths="&libpaths"; %* make sure that both lists have the same number of elements ; num_ref_commas=countc(tmprefs,','); num_path_commas=countc(tmppaths,','); if num_ref_commas eq num_path_commas then do; do i=1 to (1+num_ref_commas); libname=scan(tmprefs,i,','); libpath=strip(scan(tmppaths,i,',')); rc = filename("fileref",libpath); did = dopen("fileref"); if did then do; rc=dclose(did); rc = libname(strip(libname), libpath); call symput("_wab_lib_error_","0"); end; else do; call symput("_wab_lib_error_","2"); call symput("_wab_lib_msg_" ,"The physical location "||strip(libpath) ||" either does not exist or cannot be " ||"opened. The library "||strip(libname) ||" must be respecified with a valid location."); end; end; end; else do; call symput("_wab_lib_error_","3"); call symput("_wab_lib_msg_","You have different numbers of " ||"library references than physical paths." ||"Please respecify your libraries."); end; run; %if %sysfunc(exist(config.waconfig)) ne 0 %then %do; data _null_; set config.waconfig; if lowcase(parameter_name) in ('wab_interactive_support' ,'wab_max_path_pages' ); call symput(parameter_name,strip(value)); run; %end; %else %do; %let wab_interactive_support=3; %let wab_max_path_pages=500; %end; %******************************************************************************; %* Create a unique id for the dsn that will contain the pathing report ; %******************************************************************************; data _null_; call symput("_pathing_data_uid_" ,compress( translate( strip(put(mod(datetime(),10**6),6.)) || "_" || strip(put(ranuni(-1),7.6)) ,' ' ,'.' ) ) ); run; %******************************************************************************; %* Validate macro variables that have been passed in via the interface ; %******************************************************************************; %* C0002 ; %if %quote(&max_sequence_length) eq %str() %then %do; %let max_sequence_length=7; %end; %if &max_sequence_length lt 2 %then %do; %let max_sequence_length=7; %end; %if &max_sequence_length gt 7 %then %do; %let max_sequence_length=7; %end; %if %quote(&min_sequence_length) eq %str() %then %do; %let min_sequence_length=2; %end; %if &min_sequence_length gt 7 %then %do; %let min_sequence_length=2; %end; %if &min_sequence_length lt 2 %then %do; %let min_sequence_length=2; %end; %if &max_sequence_length lt &min_sequence_length %then %do; %let max_sequence_length=7; %end; %if &min_sequence_length gt &max_sequence_length %then %do; %let min_sequence_length=2; %end; %let itemvarlist=%cmpres(item1-item%left(&max_sequence_length)); %let cnt_varlist=%cmpres(count1-count%left(&max_sequence_length)); %******************************************************************************; %* Define a macro to output the report data set ; %******************************************************************************; %macro wapath_outdsn(rpt_type=template); data &outdsn; length &itemvarlist $1024 ; format &itemvarlist $1024. ; label %do i=1 %to &max_sequence_length; item&i ="Item &i" %end; ; %wametatr(srce_dsn=paths ,retcode=wab_rc ); %* TEMPLATE reports have 1 obs, ERROR reports have 0 obs ; %if %upcase(&rpt_type) ne TEMPLATE %then %do; stop; %end; run; %mend; %if %upcase(&info)=TRUE %then %do; %let outdsn=stp.template_&_pathing_data_uid_; %wapath_outdsn(rpt_type=template); %goto DONE; %end; %else %do; %let outdsn=stp.pathing_&_pathing_data_uid_; %***************************************************************************; %* Make sure that START_PAGE or END_PAGE has been designated ; %***************************************************************************; %if %quote(&start_page) eq %str() and %quote(&end_page) eq %str() %then %do; %wapath_outdsn(rpt_type=error); %goto DONE; %end; %***************************************************************************; %* convert &DATES (YYYYMMDD) to &SASDATES (SAS date values) and create one ; %* macro variable for each date in &DATES ; %***************************************************************************; options noquotelenmax; /* avoid warning message about unbalanced quotes */ data req_dates_&_pathing_data_uid_ (keep=pathing_date); length pathing_date $ 8 sas_dates $ 3200; date_str="&dates"; sas_dates=put(input(scan(date_str,1,','),yymmdd10.),5.); do i=1 to (1+countc(date_str,',')); pathing_date=compress(tranwrd(scan(date_str,i,','),'-',' ')); output; call symput('extr_date_' || strip(put(i,2.)) ,strip(pathing_date) ); if i gt 1 then do; curr_date=scan(date_str,i,','); sas_dates=strip(sas_dates) || ',' || put(input(scan(date_str,i,','),yymmdd10.),5.); end; end; call symput('sasdates',strip(sas_dates)); call symput('extr_date_cnt' ,strip(put((1+countc(date_str,',')),2.)) ); run; %***************************************************************************; %* make sure that the user has not selected an invalid date from the ; %* calendar ; %***************************************************************************; proc sql; create table pathing_dates_&_pathing_data_uid_ as select scan(memname,2,'_') as pathing_date length=8 format=$char8. from dictionary.tables where libname="SUMMARY" and memname ne 'PATHING' and memname like 'PATHING_%' and notdigit(trim(memname),9)=0 order by pathing_date; quit; proc sort data=req_dates_&_pathing_data_uid_; by pathing_date; run; data x_req_dates_&_pathing_data_uid_; merge req_dates_&_pathing_data_uid_ (in=in_a) pathing_dates_&_pathing_data_uid_ (in=in_b); by pathing_date; if in_a and not in_b then output; run; %if %get_observation_count (indsn=x_req_dates_&_pathing_data_uid_) ne 0 %then %do; %wapath_outdsn(rpt_type=error); %goto DONE; %end; %***************************************************************************; %* get total number of sessions for time period selected - used to ; %* calculate PERCENT OF VISITS ; %***************************************************************************; proc summary data=summary.pathing_sess_count (where=(date in (&sasdates))) nway; var sess_count; output out=visits_&_pathing_data_uid_(keep=sess_count) sum=; run; data _null_; set visits_&_pathing_data_uid_; call symput('_path_total_visits_',strip(put(sess_count,9.))); run; %put Total visits = &_path_total_visits_ ; %if &_path_total_visits_ eq 0 %then %do; %wapath_outdsn(rpt_type=error); %goto DONE; %end; %***************************************************************************; %* &MAXITEMS is always 7 because of UI requirements ; %***************************************************************************; %let maxitems=7; %***************************************************************************; %* initialize an empty version of the data set that will accumulate the ; %* daily results ; %***************************************************************************; data pathing_X&_pathing_data_uid_; attrib conf length=8 count length=8 count1-count&maxitems length=8 item1-item&maxitems length=$5 rule length=$65 ruleid length=8 size length=8 support length=8 ; stop; run; %***************************************************************************; %* Get all paths for subset data ; %***************************************************************************; %do wapath_i=1 %to &extr_date_cnt; %if &wab_debug eq Y %then %do; %let path_sffx=&wapath_i; %end; %else %do; %let path_sffx= ; %end; proc path data=summary.pathing_&&extr_date_&wapath_i (where=(sequence gt 0) keep=session_id sequence date reference ) out=pathing_A&_pathing_data_uid_.&path_sffx support=&wab_interactive_support items=&max_sequence_length; customer session_Id; sequence Sequence / max=&wab_max_path_pages; target reference; funnel out = funnel_A&_pathing_data_uid_.&path_sffx; run; %************************************************************************; %* determine if there are paths with the requested START_PAGE and/or the ; %* requested END_PAGE ; %************************************************************************; data pathing_B_boths&_pathing_data_uid_.&path_sffx pathing_B_starts&_pathing_data_uid_.&path_sffx pathing_B_ends&_pathing_data_uid_.&path_sffx pathing_B_null&_pathing_data_uid_.&path_sffx; set pathing_A&_pathing_data_uid_.&path_sffx; length last_item $ 12; array items {1:&maxitems} item1-item&maxitems; %* subset paths that are ge &MIN_SEQUENCE_LENGTH ; if size ge &min_sequence_length; %* identify the last item in the path ; do i=&maxitems to &min_sequence_length by -1; if items{i} gt ' ' then do; last_item=items{i}; leave; end; end; %* subset paths that match the designated START_PAGE/END_PAGE ; %if %bquote(&start_page) gt %str() %then %do; %if %bquote(&end_page) gt %str() %then %do; if input(item1,12.) in (&start_page) and input(last_item,12.) in (&end_page) then do; output pathing_B_boths&_pathing_data_uid_.&path_sffx; end; %end; %else %do; if input(item1,12.) in (&start_page) then do; output pathing_B_starts&_pathing_data_uid_.&path_sffx; end; %end; %end; %else %do; if input(last_item,12.) in (&end_page) then do; output pathing_B_ends&_pathing_data_uid_.&path_sffx; end; %end; drop i last_item; run; %************************************************************************; %* identify the data set for the type of request as PATHING_B1 ; %************************************************************************; %if %get_observation_count (indsn=pathing_B_ends&_pathing_data_uid_.&path_sffx) gt 0 %then %do; %let pathing_B1=pathing_B_ends&_pathing_data_uid_.&path_sffx; %end; %else %if %get_observation_count (indsn=pathing_B_starts&_pathing_data_uid_.&path_sffx) gt 0 %then %do; %let pathing_B1=pathing_B_starts&_pathing_data_uid_.&path_sffx; %end; %else %if %get_observation_count (indsn=pathing_B_boths&_pathing_data_uid_.&path_sffx) gt 0 %then %do; %let pathing_B1=pathing_B_boths&_pathing_data_uid_.&path_sffx; %end; %else %do; %*********************************************************************; %* there are no paths for the designated START_PAGE / END_PAGE ; %*********************************************************************; %let pathing_B1=pathing_B_null&_pathing_data_uid_.&path_sffx; %end; %************************************************************************; %* break out all the sub-paths from PATHING_B1 ; %************************************************************************; data pathing_B2&_pathing_data_uid_.&path_sffx; set &pathing_B1; length master_rule $ 150; array items {1:&maxitems} $ item1-item&maxitems; master_rule=rule; rule=strip(item1); do i=2 to size; rule=strip(rule) || ' ==> ' || strip(items{i}); output; end; keep rule item1-item&maxitems; run; %************************************************************************; %* remove duplicates from PATHING_B2 ; %************************************************************************; proc sort data=pathing_B2&_pathing_data_uid_.&path_sffx out =pathing_B3&_pathing_data_uid_.&path_sffx nodupkeys; by rule; run; %************************************************************************; %* m/m PATHING_B3 with PATHING_A to re-capture rule-specific information ; %************************************************************************; proc sort data=pathing_A&_pathing_data_uid_.&path_sffx; by rule; run; data pathing_B&_pathing_data_uid_.&path_sffx only_a only_b; merge pathing_B3&_pathing_data_uid_.&path_sffx (in=in_a) pathing_A&_pathing_data_uid_.&path_sffx (in=in_b); by rule; if in_a then if in_b then output pathing_B&_pathing_data_uid_.&path_sffx; else output only_a; else output only_b; run; proc sort data=pathing_B&_pathing_data_uid_.&path_sffx; by ruleid; run; %************************************************************************; %* m/m PATHING_B with FUNNEL_A to capture intermediate node counts ; %************************************************************************; data pathing_C&_pathing_data_uid_.&path_sffx; merge pathing_B&_pathing_data_uid_.&path_sffx (in=in_left) funnel_A&_pathing_data_uid_.&path_sffx (in=in_right); by ruleid; if in_left; run; proc sort data=pathing_C&_pathing_data_uid_.&path_sffx; by item1-item&maxitems; run; %************************************************************************; %* create entries for root nodes ; %************************************************************************; data pathing_D&_pathing_data_uid_.&path_sffx; set pathing_C&_pathing_data_uid_.&path_sffx; by item1-item&maxitems; array counts {1:&maxitems} count1-count&maxitems; array item {1:&maxitems} $ item1-item&maxitems; output; if first.item1 then do; count=count1; rule=item1; size=1; do i=2 to &maxitems; counts{i}=.; item{i}=' '; end; output; end; drop i; run; %*********************************************************************; %* NOTE: append done with DATA Step because attrib for RULE can vary ; %*********************************************************************; data pathing_X&_pathing_data_uid_; set pathing_X&_pathing_data_uid_ pathing_D&_pathing_data_uid_.&path_sffx; run; %end; %***************************************************************************; %* make sure that the UI parameters have not excluded all obs ; %***************************************************************************; %if %get_observation_count(indsn=pathing_X&_pathing_data_uid_) eq 0 %then %do; %wapath_outdsn(rpt_type=error); %goto DONE; %end; %let itemvarlist=%cmpres(item1-item%left(&maxitems)); %let countvarlist=%cmpres(count1-count%left(&maxitems)); %***************************************************************************; %* sum the results of all dates tallies ; %***************************************************************************; proc summary data=pathing_X&_pathing_data_uid_ nway; class rule; id item1-item&maxitems size; var count; output out=pathing_Y&_pathing_data_uid_(drop=_type_ _freq_) sum=; run; %***************************************************************************; %* select just the Top n paths ; %***************************************************************************; %* select out only the complete paths -- the complete paths (and not any ; %* of the sub-paths) are candidates for TopN selection ; %* NOTE: a complete path is: ; %* - ge the prev rows size and ge the next rows size ; %* OR ; %* - lt the prev rows size and eq the next rows size ; %***************************************************************************; data pathing_Y_ruleids&_pathing_data_uid_; set pathing_Y&_pathing_data_uid_ end=eof; retain prev1_size prev1_count prev2_size prev2_count . prev1_rule prev2_rule ; array items {1:&maxitems} $ item1-item&maxitems; %************************************************************************; %* clear out ITEM1 thru ITEM&maxitems to prep for the next step ; %************************************************************************; do i=1 to &maxitems; items{i}=' '; end; if _n_ eq 1 then do; prev2_size =size; prev2_rule =rule; prev3_count=count; end; else if _n_ eq 2 then do; if eof then do; selected_rule =rule; selected_count=count; output; end; else do; prev1_size =size; prev1_rule =rule; prev1_count=count; end; end; else do; if prev1_size ge prev2_size then do; if prev1_size ge size then do; selected_rule =prev1_rule; selected_count=prev1_count; output; end; end; else if prev1_size eq size then do; selected_rule=prev1_rule; output; end; prev2_size =prev1_size; prev2_rule =prev1_rule; prev2_count=prev1_count; prev1_size =size; prev1_rule =rule; prev1_count=count; end; if eof then do; selected_rule =rule; selected_count=count; output; end; keep selected_rule selected_count item1-item&maxitems; ; run; %************************************************************************; %* identify the Top n paths ; %************************************************************************; proc sort data=pathing_Y_ruleids&_pathing_data_uid_; by descending selected_count; run; data pathing_Y_topn&_pathing_data_uid_; set pathing_Y_ruleids&_pathing_data_uid_; array items {1:&maxitems} $ item1-item&maxitems; if _n_ le &topn; rule=selected_rule; do i=1 to (1+countc(selected_rule,'>')); items{i}=strip(scan(selected_rule,1,'=')); start_pos=1+index(selected_rule,'>'); selected_rule=substr(selected_rule,start_pos); end; rule=strip(item1); output; do j=2 to (i-1); rule=strip(rule) || ' ==> ' || strip(items{j}); output; end; keep rule selected_count ; run; proc sort data=pathing_Y_topn&_pathing_data_uid_ nodupkeys; by rule; run; data pathing_Y_final&_pathing_data_uid_ (drop=selected_count); merge pathing_Y_topn&_pathing_data_uid_ (in=in_topn) pathing_Y&_pathing_data_uid_ ; by rule; if in_topn; run; %***************************************************************************; %* Assign the format that will translate the ITEMx from their REFERENCE ; %* values to their REQUESTED_FILE values ; %***************************************************************************; %* identify the reference IDs that are in the ITEMx fields ; %***************************************************************************; data references_&_pathing_data_uid_; set pathing_Y_final&_pathing_data_uid_; length id $ 36; array items {1:&maxitems} $ item1-item&maxitems; do i=1 to &maxitems while(items{i} gt ' '); id=items{i}; output; end; keep id; run; proc sort data=references_&_pathing_data_uid_ out =uniq_ref_&_pathing_data_uid_ nodupkeys; by id; run; %***************************************************************************; %* create a subset of SUMMARY.PAGE_FREQUENCIES that contains only the ; %* REFERENCEs in the PATHING_Y data set ; %***************************************************************************; data format_&_pathing_data_uid_; length id $ 36; if _n_ eq 1 then do; declare hash h(dataset: "work.uniq_ref_&_pathing_data_uid_" ,hashexp: 10 ); h.defineKey('id'); h.defineDone(); end; set summary.page_frequencies; rc=h.find(); if rc eq 0 then output; drop rc; run; %***************************************************************************; %* build the format from the subset data set ; %***************************************************************************; proc format cntlin=format_&_pathing_data_uid_; run; data stp.pathing_&_pathing_data_uid_; length item_new $ 1024 &itemvarlist $ 1024 ; format &itemvarlist $1024. ; %wametatr(srce_dsn=paths ,retcode=wab_rc ); set pathing_Y_final&_pathing_data_uid_; by item1-item&maxitems; label %do j=1 %to &max_sequence_length; item&j="Item &j" %end; ; retain count1-count&maxitems; array counts {1:&maxitems} count1-count&maxitems; array item {1:&maxitems} $ item1-item&maxitems; do i=1 to &max_sequence_length; item_new=put(strip(item{i}),$idxtorf.); item{i}=item_new; end; top_path=1; if first.item1 then do; top_path=0; count1=count; do i=2 to &maxitems; counts{i}=.; end; end; if first.item2 then do; count2=count; do i=3 to &maxitems; counts{i}=.; end; end; if first.item3 then do; count3=count; do i=4 to &maxitems; counts{i}=.; end; end; if first.item4 then do; count4=count; do i=5 to &maxitems; counts{i}=.; end; end; if first.item5 then do; count5=count; do i=6 to &maxitems; counts{i}=.; end; end; if first.item6 then do; count6=count; count7=.; end; if first.item7 then count7=count; lhs=.; do i=1 to (size-1); lhs=sum(lhs,counts{i}); end; pc_visits=100*(count/&_path_total_visits_); if size eq 1 then do; pc_root=.; pc_prev=.; conf=.; end; else do; pc_root=100*(count/count1); pc_prev=100*(count/counts{size-1}); conf=100-pc_prev; end; support=pc_visits; keep &itemvarlist count support conf top_path pc: ; run; %end; %******************************************************************************; %* make sure that the STP.PATHING_x report contains obs ; %******************************************************************************; %if %get_observation_count (indsn=stp.pathing_&_pathing_data_uid_) eq 0 %then %do; %wapath_outdsn(rpt_type=error); %end; %DONE:; %if %upcase(&info) ne TRUE and &wab_debug ne Y %then %do; proc datasets library=work nolist nowarn; delete format_&_pathing_data_uid_ / memtype=data; delete funnel_A&_pathing_data_uid_ / memtype=data; delete only_a / memtype=data; delete only_b / memtype=data; delete pathing_A&_pathing_data_uid_ / memtype=data; delete pathing_B&_pathing_data_uid_ / memtype=data; delete pathing_B2&_pathing_data_uid_ / memtype=data; delete pathing_B3&_pathing_data_uid_ / memtype=data; delete pathing_B_boths&_pathing_data_uid_ / memtype=data; delete pathing_B_ends&_pathing_data_uid_ / memtype=data; delete pathing_B_null&_pathing_data_uid_ / memtype=data; delete pathing_B_starts&_pathing_data_uid_ / memtype=data; delete pathing_C&_pathing_data_uid_ / memtype=data; delete pathing_D&_pathing_data_uid_ / memtype=data; delete pathing_dates_&_pathing_data_uid_ / memtype=data; delete pathing_X&_pathing_data_uid_ / memtype=data; delete pathing_Y&_pathing_data_uid_ / memtype=data; delete pathing_Y_final&_pathing_data_uid_ / memtype=data; delete pathing_Y_ruleids&_pathing_data_uid_ / memtype=data; delete pathing_Y_topn&_pathing_data_uid_ / memtype=data; delete references_&_pathing_data_uid_ / memtype=data; delete req_dates_&_pathing_data_uid_ / memtype=data; delete uniq_ref_&_pathing_data_uid_ / memtype=data; delete visits_&_pathing_data_uid_ / memtype=data; delete x_req_dates_&_pathing_data_uid_ / memtype=data; quit; %end; %put *****OUTPUT PARAMETERS*****; %put stp:output_dataset=&outdsn; %let output_dataset=&outdsn; %mend wapath;