/*----------------------Web Analytics-Experimental----------------------------*/ /* Name: */ /* WASEGANL -- Web Analytics SEGmentation ANaLysis */ /* */ /* Purpose: */ /* Perform the experimental automatic segmentation using PROC ARBORETUM. */ /* */ /*----------------------------------------------------------------------------*/ /* Supported By: */ /* Caroline Bahler (CABAHL) */ /* */ /*----------------------------------------------------------------------------*/ /* History: */ /* Date Username Desc Change Code */ /* 20040323 saskxs Created program NA */ /* 20040824 sassyw S0265918:add historical seg reports S0265918 */ /* 20041008 sassyw Fixed problem with column label C001 */ /* leading underscore was missing */ /* 20041011 sassyw Fixed a grammar problem in error msg C002 */ /* 20041011 frroed Resolved case issue for variable C003 */ /* names */ /* 20041021 cabahl Add definition_to_run C004 */ /* 20041108 cabahl Moved all metadata checks to under C005 */ /* individual segments */ /* 20050616 frroed Implement ETL/non-ETL distinctions C006 */ /* 20060630 cabahl Changed test for duplicate visitor */ /*----------------------------------------------------------------------------*/ /* Dependencies: */ /* The following macros need to be available: */ /* GET_OBSERVATION_COUNT */ /* LOCKTEST */ /* WATSTDSN */ /* */ /* The following macro variables need to be defined and assigned values */ /* in the invoking environment: */ /* &TEMP_LIB: a libref for the storage location of temporary data sets */ /* &WAB_ERROR: the text that identifies WA macro error messages. */ /* &WAB_NOTE: the text that identifies WA macro notification messages. */ /* */ /*----------------------------------------------------------------------------*/ /* Parameters: */ /* AUTOSEG_REPORT_DATE: the SAS date value for the which the WAAUTOSG */ /* macro creates data. */ /* DEFINITION_TO_RUN: (Required) comma delimited list of analytic */ /* segmenation to process. The list must contains */ /* names of segmentations defined in the */ /* waautinp_config data set. _ALL_ can be specified */ /* to run all segmentation definitions */ /* RETCODE: mechanism for passing Return Codes. */ /* */ /*----------------------------------------------------------------------------*/ /* Input: */ /* (autoseg_report_date= */ /* ,definition_to_run= */ /* ,outlib= */ /* ,retcode=wab_rc */ /* ); */ /* */ /*----------------------------------------------------------------------------*/ /* Output: */ /* SUMMARY.AUTOSEG_SEGMENT_RULES */ /* */ /*----------------------------------------------------------------------------*/ /* Copyright (c) 2004 SAS Institute Inc. All Rights Reserved */ /*----------------------------Experimental------------------------------------*/ %macro waseganl(autoseg_report_date= ,definition_to_run= ,outlib= ,retcode=); %global wab_seg_message; %local macname wab_seg_reason; %let macname=&sysmacroname; /* * Initialize return code; */ %let retcode=0; /* check to make sure autoseg_report_date has date */ %if &autoseg_report_date = %str() %then %do; %put %unquote(&wab_error) No report date specified.; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %let config=config.waautinp; %***************************************************************************; %* perform standard tests on &config ; %***************************************************************************; %if %sysfunc(exist(&config,data))=0 %then %do; %put %unquote(&wab_error) The &config data set does not exist.; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %locktest(dset=&config); %if &lock_sw ne 0 %then %do; %put %unquote(&wab_error) There was a failure opening %QCMPRES( the) &config Automatic Segmentation metadata data %QCMPRES( set.); %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %if %get_observation_count(indsn=&config) eq 0 %then %do; %put %unquote(&wab_error) The &config data set %QCMPRES( contains) 0 observations.; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %***************************************************************************; %* Configuration data Validation: ; %* Does metadata dataset have all the standard variables (& attribs)? ; %***************************************************************************; %watstdsn(test_dsn=&config ,tmpl_dsn=waautinp ,retcode=wab_rc ); %if &wab_rc=1 %then %do; %* %WATSTDSN failed ; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %goto ERREXIT; %end; %let waautosg=config.waautosg; %***************************************************************************; %* perform standard tests on &WAAUTOSG ; %***************************************************************************; %if %sysfunc(exist(&waautosg,data))=0 %then %do; %put %unquote(&wab_error) The &waautosg data set does not exist.; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %locktest(dset=&waautosg); %if &lock_sw ne 0 %then %do; %put %unquote(&wab_error) There was a failure opening %QCMPRES( the) &waautosg Automatic Segmentation metadata data %QCMPRES( set.); %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %if %get_observation_count(indsn=&waautosg) eq 0 %then %do; %put %unquote(&wab_error) The &waautosg data set %QCMPRES( contains) 0 observations.; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %let retcode=1; %goto ERREXIT; %end; %***************************************************************************; %* Configuration data Validation: ; %* Does metadata dataset have all the standard variables (& attribs)? ; %***************************************************************************; %watstdsn(test_dsn=&waautosg ,tmpl_dsn=waautosg ,retcode=wab_rc ); %if &wab_rc=1 %then %do; %* %WATSTDSN failed ; %put %unquote(&wab_error) Automatic Segmentation program exiting.; %goto ERREXIT; %end; %***************************************************************************; %* create a TEMP_LIB copy of WAAUTOSG and select from it the Segmentations ; %* that have been requested via the DEFINITION_TO_RUN parameter. ; %***************************************************************************; proc sort data=config.waautosg out =&temp_lib..waautosg; by name; quit; %if %quote(&definition_to_run) eq %str() or %quote(%upcase(&definition_to_run)) eq _ALL_ %then %do; %let nsegmentations=%get_observation_count(indsn=&temp_lib..waautosg); %put %unquote(&wab_note) DEFINITION_TO_RUN parameter value %QCMPRES(will select) all &nsegmentations segmentations.; %if &nsegmentations eq 0 %then %do; %put %unquote(&wab_warning) No segmentation will be processed.; %let retcode=0; %goto ERREXIT; %end; %end; %else %if %quote(%upcase(&definition_to_run)) eq _ETL_ %then %do; data &temp_lib..waautosg; set &temp_lib..waautosg; if etl_run_sw; run; %let nsegmentations=%get_observation_count(indsn=&temp_lib..waautosg); %put %unquote(&wab_note) DEFINITION_TO_RUN parameter value %QCMPRES(will limit) processing to &nsegmentations ETL segmentations.; %if &nsegmentations eq 0 %then %do; %put %unquote(&wab_warning) No segmentation will be processed.; %let retcode=1; %goto ERREXIT; %end; %end; %else %if %quote(%upcase(&definition_to_run)) eq _NON_ETL_ %then %do; data &temp_lib..waautosg; set &temp_lib..waautosg; if not etl_run_sw; run; %let nsegmentations=%get_observation_count(indsn=&temp_lib..waautosg); %put %unquote(&wab_note) DEFINITION_TO_RUN parameter value %QCMPRES(will limit) processing to &nsegmentations non-ETL segmentations.; %if &nsegmentations eq 0 %then %do; %put %unquote(&wab_warning) No segmentation will be processed.; %let retcode=1; %goto ERREXIT; %end; %end; %else %do; %************************************************************************; %* identify each of the requested segmentations in DEFINITION_TO_RUN ; %************************************************************************; data &temp_lib..def_list; length defs $ 5000; length name $ 25; defs="&definition_to_run"; do while(defs gt ' '); name=scan(defs,1,','); output; defs=strip(tranwrd(defs,strip(name),' ')); if substr(defs,1,1) eq ',' then defs=strip(substr(defs,2)); end; keep name; run; %************************************************************************; %* m/m DEF_LIST with SCLIST to establish the metadata that is for only ; %* the requested segmentations ; %************************************************************************; proc sort data=&temp_lib..def_list; by name; run; data &temp_lib..waautosg only_a only_b; merge &temp_lib..waautosg (in=in_a) &temp_lib..def_list (in=in_b); by name; if in_a then if in_b then output &temp_lib..waautosg; else output only_a; else output only_b; run; %let nsegmentations=%get_observation_count(indsn=&temp_lib..waautosg); %put %unquote(&wab_note) DEFINITION_TO_RUN parameter value %QCMPRES(will limit) processing to &nsegmentations selected segmentations.; %if %get_observation_count(indsn=only_b) ne 0 %then %do; %put %unquote(&wab_error) There are Segmentation names in the %QCMPRES( DEFINTION_TO_RUN) argument -- &definition_to_run -- %QCMPRES( that) do not have any metadata.; %let retcode=1; %goto ERREXIT; %end; %if &nsegmentations eq 0 %then %do; %put %unquote(&wab_warning) No segmentation will be processed.; %let retcode=1; %goto ERREXIT; %end; %end; proc sql; create table &temp_lib.._wab_autosg_meta_all_ as select l.segmentation_id as id , l.role , l.level , l._name_ , l._label_ , r.name as segmentation_name , r.indsn , r.num_segments_to_create , r.data_prep_include_code from config.waautinp l, &temp_lib..waautosg r where l.segmentation_id eq r.id ; quit; %******************************************************************************; %* Determine the unique segmentations their input data sets, response/target ; %* variables, predictor/input variables, and the measurement levels of the ; %* response/target variables and predictor/input variables. ; %******************************************************************************; /* subset to contain only what is needed for proc arboretum */ proc sort data=&temp_lib.._wab_autosg_meta_all_ out=&temp_lib.._wab_autosg_meta_; where role in ("target","input","visitor id") and segmentation_name ne ''; by segmentation_name role; run; %if &syserr gt 4 %then %do; %put %unquote(&wab_error) The Automatic Segmentation metadata %QCMPRES( data) set could not be sorted.; %put %unquote(&wab_error) Web Analytics program exiting.; %let &retcode=1; %goto ERREXIT; %end; data _null_; set &temp_lib.._wab_autosg_meta_ (rename=(_name_=name)) end=eof; by segmentation_name; retain seg_cnt 0; length nominput ordinput intinput $4096 data_prep_code $200 path $1500 ; retain nominput ordinput intinput; if lowcase(role)='target' and level ne 'nominal' then level='nominal'; if first.segmentation_name then do; /* reset nominal, ordinal and interval varlists */ nominput=''; ordinput=''; intinput=''; /* increment segmentation analysis counter */ seg_cnt=seg_cnt+1; %******************************************************************************; %* Store the unique segmentations and there input data sets in macro variables ; %* for use in PROC ARBORETUM. ; %******************************************************************************; call symput("_wab_autoseg_id_"||strip(put(seg_cnt,6.)) ,strip(id)); call symput("_wab_autoseg_name_"||strip(put(seg_cnt,6.)) ,strip(segmentation_name)); call symput("_wab_autoseg_indsn_"||strip(put(seg_cnt,6.)),strip(indsn)); call symput("_autoseg_num_segments"||strip(put(seg_cnt,6.)) ,strip(put(num_segments_to_create,6.))); /* location of data prep code - assuming stored procedure directory */ if data_prep_include_code ne '' then do; if index(strip(data_prep_include_code),"&separator") = 0 then do; path="&swamart.&separator.sas"; data_prep_code=strip(data_prep_include_code); end; /* other directory */ else do; data_prep_code=reverse(scan(reverse(strip(data_prep_include_code)),1,"&separator")); path=tranwrd(strip(data_prep_include_code),"&separator"||strip(data_prep_code),''); end; custom_code='Y'; end; else custom_code='N'; call symput("_wab_autseg_custom_code_"||strip(put(seg_cnt,6.)) ,strip(custom_code)); call symput("_wab_autsg_dp_include_code_"||strip(put(seg_cnt,6.)) ,strip(path)||"&separator"||strip(data_prep_code)); call symput("_wab_autsg_dataprep_code_"||strip(put(seg_cnt,6.)) ,strip(data_prep_code)); call symput("_wab_autsg_dataprep_path_"||strip(put(seg_cnt,6.)) ,strip(path)); end; /* force target level to nominal */ if lowcase(role)='target' and level ne 'nomimal' then level='nominal'; select(lowcase(role)); when('target') do; call symput("_wab_autoseg_target_"||strip(put(seg_cnt,6.)) ,strip(name)); call symput("_wab_autoseg_target_label_"||strip(put(seg_cnt,6.)) ,strip(_label_)); call symput("_wab_autoseg_target_level_"||strip(put(seg_cnt,6.)) ,strip(lowcase(level))); end; when('input') do; if lowcase(level) = 'nominal' then nominput=strip(compbl(nominput||name)); if lowcase(level) = 'ordinal' then ordinput=strip(compbl(ordinput||name)); if lowcase(level) = 'interval' then intinput=strip(compbl(intinput||name)); end; when('visitor id') do; call symput("_wab_autoseg_visitor_id_"||strip(put(seg_cnt,6.)) ,strip(name)); end; otherwise; end; if last.segmentation_name then do; call symput("_wab_nominput_list_"||strip(put(seg_cnt,6.)) ,strip(compbl(nominput))); call symput("_wab_ordinput_list_"||strip(put(seg_cnt,6.)) ,strip(compbl(ordinput))); call symput("_wab_interinput_list_"||strip(put(seg_cnt,6.)) ,strip(compbl(intinput))); end; if eof then call symput("_wab_num_autoseg_",strip(put(seg_cnt,6.))); run; %******************************************************************************; %* Initialze the size of the training and validation sample data sets. The ; %* training data is used to build the decision tree segmentation model. The ; %* validation data set is used to verify the decision tree segmentation model ; %* can be generalized to other samples from the same population of ; %* of units that we are classifying into segments. The type of sample taken is ; %* a simple random sample. ; %******************************************************************************; %if %quote(&wab_autoseg_train_samp_pct)= %then %let wab_autoseg_train_samp_pct=0.6; %if (&wab_autoseg_train_samp_pct gt 1) or (&wab_autoseg_train_samp_pct le .50) %then %let wab_autoseg_train_samp_pct=0.6; %if %quote(&wab_autoseg_valid_samp_pct)= %then %let wab_autoseg_valid_samp_pct=%sysevalf(1-&wab_autoseg_train_samp_pct); %if &wab_autoseg_valid_samp_pct ne %sysevalf(1-&wab_autoseg_train_samp_pct) %then %let wab_autoseg_valid_samp_pct=%sysevalf(1-&wab_autoseg_train_samp_pct); %******************************************************************************; %* Working data set clean-up ; %******************************************************************************; /* delete data set that keeps track of code that was run */ proc datasets library=&temp_lib nolist; delete _wab_autosg_data_prep_code_ _wab_autosg_not_enough_data; run; quit; %put ********************** SEGMENTATION ANALYSIS START ************************; %do wab_seg_iter=1 %to &_wab_num_autoseg_; %let _dp_code_ext=; %let wab_seg_reason=; %let wab_seg_message=; %put *********************** START &&_wab_autoseg_name_&wab_seg_iter ************************; /* Remove the segmentation analysis for a specific segmentation and date */ %IF %SYSFUNC(EXIST(&outlib..autoseg_segment_rules,DATA)) %THEN %DO; /* * Remove any existing records for the day that is being processed */ data &outlib..autoseg_segment_rules; set &outlib..autoseg_segment_rules; if date = &autoseg_report_date and strip(segmentation_name) = strip(compbl("&&_wab_autoseg_name_&wab_seg_iter")) then delete ; run; %END; %******************************************************************************; %* Configuration data Validation: ; %* A. Is there only one target per Segmentation instance. ; %* B. Is there at least one input per Segmentation instance. ; %* C. Are all values for role valid, are there any missing values for ; %* segmentation name, the input data set, target variable, and role. ; %******************************************************************************; %******************************************************************************; %* Count the number of missing values for input variables, target variables, ; %* input data sets, and segmenatation names. Also count the number of variables; %* which have invalid roles specified. ; %******************************************************************************; %let _wab_target_count_ =0; %let _wab_input_count_ =0; %let _wab_visitor_count_ =0; %let _wab_target_nam_missing_ =0; %let _wab_input_nam_missing_ =0; %let _wab_visitor_nam_missing_ =0; data _null_; retain target_num_missing input_num_missing visitor_num_missing role_chk target_cnt visitor_cnt input_cnt 0; set &temp_lib.._wab_autosg_meta_ end=eof; where strip(segmentation_name) = strip(compbl("&&_wab_autoseg_name_&wab_seg_iter")); if strip(lowcase(role))="target" then do; if compbl(_name_)=" " then target_num_missing=target_num_missing+1; target_cnt = target_cnt + 1; end; if strip(lowcase(role))="input" then do; if compbl(_name_)=" " then input_num_missing=input_num_missing+1; input_cnt=input_cnt+1; end; if strip(lowcase(role))="visitor id" then do; if compbl(_name_)=" " then visitor_num_missing=visitor_num_missing+1; visitor_cnt=visitor_cnt+1; end; if eof then do; call symput("_wab_target_count_",target_cnt); call symput("_wab_input_count_",input_cnt); call symput("_wab_visitor_count_",visitor_cnt); call symput("_wab_target_nam_missing_",target_num_missing); call symput("_wab_input_nam_missing_",input_num_missing); call symput("_wab_visitor_nam_missing_",visitor_num_missing); end; run; %******************************************************************************; %* Check to see if any roles are missing or variable names are blank ; %******************************************************************************; %if &_wab_target_nam_missing_ %then %do; %put %unquote(&wab_error) &&_wab_autoseg_name_&wab_seg_iter %QCMPRES( target/response) variable that is unspecified.; %let wab_seg_reason=Segmentation analysis did not run: The target variable does not have a name. Please contact Administrator; %goto DECTREE; %end; %if &_wab_target_count_ = 0 %then %do; %put %unquote(&wab_error) &&_wab_autoseg_name_&wab_seg_iter %QCMPRES( target/response) variable was not specified in definition.; %let wab_seg_reason=Segmentation analysis did not run: The target variable has not been specified in definition. Please contact Administrator; %goto DECTREE; %end; %if &_wab_input_nam_missing_ %then %do; %put %unquote(&wab_error) &&_wab_autoseg_name_&wab_seg_iter %QCMPRES( has) at least one input variable that is unspecified.; %let wab_seg_reason=Segmentation analysis did not run: At least one input metrics does not have a name. Please contact Administrator;; %goto DECTREE; %end; %if &_wab_input_count_ = 0 %then %do; %put %unquote(&wab_error) &&_wab_autoseg_name_&wab_seg_iter %QCMPRES( no) input variable has been specified in definition.; %let wab_seg_reason=Segmentation analysis did not run: No input metric has been specified in the definition . Please contact Administrator;; %goto DECTREE; %end; %if &_wab_visitor_nam_missing_ %then %do; %put %unquote(&wab_error) &&_wab_autoseg_name_&wab_seg_iter %QCMPRES( has) an unspecified visitor variable.; %let wab_seg_reason=Segmentation analysis did not run: The visitor variable does not have a name. Please contact Administrator; %goto DECTREE; %end; %if &_wab_visitor_count_ =0 %then %do; %put %unquote(&wab_error) &&_wab_autoseg_name_&wab_seg_iter %QCMPRES( visitor) variable is unspecified.; %let wab_seg_reason=Segmentation analysis did not run: No visitor variable has been specified in the definition. Please contact Administrator; %goto DECTREE; %end; /*----------------------------------------------------------------------* * Run custom code if supplied *----------------------------------------------------------------------*/ %if &&_wab_autseg_custom_code_&wab_seg_iter = Y %then %do; /*----------------------------------------------------------------* * Run data prep code *----------------------------------------------------------------*/ %let _dp_code_ext_=Y; data _null_; length data_prep_code $2000; data_prep_code = "&&_wab_autsg_dataprep_code_&wab_seg_iter"; data_prep_code_ext = reverse(substr(reverse(strip(data_prep_code)),1,4)); if lowcase(data_prep_code_ext) ne '.sas' then call symput('_dp_code_ext_','N'); run; %if &_dp_code_ext_ = N %then %do; %put %unquote(&wab_warning) &&_wab_autsg_dp_code_&wab_seg_iter is not %QCMPRES( named) appropriately.; %put %unquote(&wab_warning) The data prep code file MUST have %quote(.sas) as the file extension, example %quote(test.sas); %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: Data prep code not named appropriately. Please contact Administrator; %goto DECTREE; %end; %if %sysfunc(fileexist(&&_wab_autsg_dp_include_code_&wab_seg_iter)) = 0 %then %do; %put %unquote(&wab_warning) Could not find &&_wab_autsg_dp_include_code_&wab_seg_iter %QCMPRES( to) create input data set. If working in UNIX check the character case of path and filename entered.; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: Could not find &&_wab_autsg_dp_include_code_&wab_seg_iter. Please contact Administrator; %goto DECTREE; %end; data &temp_lib.._wab_autosg_data_prep_code_; length data_prep_include_code $2000; %if %sysfunc(exist(&temp_lib.._wab_autosg_data_prep_code_)) > 0 %then %do; set &temp_lib.._wab_autosg_data_prep_code_; if strip(data_prep_include_code) ne strip("&&_wab_autsg_dp_include_code_&wab_seg_iter") then do; output; data_prep_include_code="&&_wab_autsg_dp_include_code_&wab_seg_iter"; call symput('_wab_autosg_run_dp_code_','Y'); output; end; else do; call symput('_wab_autosg_run_dp_code_','N'); output; end; %end; %else %do; data_prep_include_code="&&_wab_autsg_dp_include_code_&wab_seg_iter"; call symput('_wab_autosg_run_dp_code_','Y'); output; %end; run; %if &_wab_autosg_run_dp_code_ = Y %then %do; %put ************************ START CUSTOM DATA PREP CODE ************************; /*---------------------------------------------------------------* * If input data set already exists then rename *---------------------------------------------------------------*/ %if %sysfunc(exist(&&_wab_autoseg_indsn_&wab_seg_iter)) > 0 %then %do; data _null_; dsid=open("&&_wab_autoseg_indsn_&wab_seg_iter"); lib=attrc(dsid,"lib"); mem=attrc(dsid,"mem"); call symput('library',strip(lib)); call symput('custom_dataset',strip(mem)); /* rename existing data set to holding name */ proc datasets library=&library nolist; delete &custom_dataset; run; quit; %end; filename dataprep "&&_wab_autsg_dataprep_path_&wab_seg_iter"; %include dataprep("&&_wab_autsg_dataprep_code_&wab_seg_iter"); %put ************************ END CUSTOM DATA PREP CODE ************************; %end; %end; %if %nrbquote(&wab_seg_message) ne %str() %then %do; %let wab_seg_reason=Segmentation analysis did not run: &wab_seg_message; %goto DECTREE; %end; %if %sysfunc(exist(&&_wab_autoseg_indsn_&wab_seg_iter)) = 0 %then %do; %put %unquote(&wab_warning) &&_wab_autoseg_indsn_&wab_seg_iter %QCMPRES( data) source does not exist; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: &&_wab_autoseg_indsn_&wab_seg_iter does not exist. Please contact Administrator; %goto DECTREE; %end; %if %get_observation_count(indsn=&&_wab_autoseg_indsn_&wab_seg_iter)=0 %then %do; %put %unquote(&wab_warning) &&_wab_autoseg_indsn_&wab_seg_iter %QCMPRES( data) source has 0 observations; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: Need at least 30 days of data for segmentation analysis.; %goto DECTREE; %end; /*-------------------------------------------------------------* * Verify that expected variables are in the data *-------------------------------------------------------------*/ proc contents data=&&_wab_autoseg_indsn_&wab_seg_iter noprint out=indsn_vars_&wab_seg_iter; run; proc sort data=indsn_vars_&wab_seg_iter(keep=name); by name; run; proc sort data=&temp_lib.._wab_autosg_meta_all_ out=autoseg_vars_&wab_seg_iter; where strip(segmentation_name)="&&_wab_autoseg_name_&wab_seg_iter"; by _name_; run; data _null_; merge autoseg_vars_&wab_seg_iter(in=a rename=(_name_=name)) indsn_vars_&wab_seg_iter(in=b) end=eof; retain missing_input " "; in_inputdsn='Y'; if b then in_inputdsn='N'; select(lowcase(role)); when('target') do; call symput("_wab_missing_target_&wab_seg_iter",strip(in_inputdsn)); end; when('input') do; if in_inputdsn='N' then missing_input=strip(missing_input)||' '||strip(name); end; when('visitor id') do; call symput("_wab_missing_visitor_&wab_seg_iter",strip(in_inputdsn)); end; otherwise; end; if eof then do; if missing_input='' then call symput("_wab_missing_input_&wab_seg_iter",'N'); else call symput("_wab_missing_input_&wab_seg_iter",'Y'); call symput("_wab_missing_input_vars_&wab_seg_iter",strip(missing_input)); end; run; %if &&_wab_missing_target_&wab_seg_iter = Y %then %do; %put %unquote(&wab_warning) &&_wab_autoseg_indsn_&wab_seg_iter %QCMPRES( data) source is missing the expected target variable; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: &&_wab_autoseg_indsn_&wab_seg_iter missing expected target variable. Please contact Administrator; %goto DECTREE; %end; %if &&_wab_missing_input_&wab_seg_iter = Y %then %do; %put %unquote(&wab_warning) &&_wab_autoseg_indsn_&wab_seg_iter %QCMPRES( data) source is missing input variables. &&_wab_missing_input_vars&wab_seg_iter ; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: &&_wab_autoseg_indsn_&wab_seg_iter missing &&_wab_missing_input_vars&wab_seg_iter input variables. Please contact Administrator; %goto DECTREE; %end; %if &&_wab_missing_visitor_&wab_seg_iter = Y %then %do; %put %unquote(&wab_warning) &&_wab_autoseg_indsn_&wab_seg_iter %QCMPRES( data) source is missing the expected visitor id variable; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation analysis did not run: &&_wab_autoseg_indsn_&wab_seg_iter missing expected visitor id variable. Please contact Administrator; %goto DECTREE; %end; /*-----------------------------------------------------------* * Determine if there is more than 1 record per visitor *-----------------------------------------------------------*/ /* Old code %let _wab_autoseg_visitor_nonunique_= 0; proc summary data=&&_wab_autoseg_indsn_&wab_seg_iter nway; class &&_wab_autoseg_visitor_id_&wab_seg_iter ; output out=&temp_lib..autoseg_visitor_&wab_seg_iter; run; data &temp_lib..autoseg_nonunique_visitors_&wab_seg_iter; set &temp_lib..autoseg_visitor_&wab_seg_iter end=eof; retain non_unique_visitors 0; dataset="&&_wab_autoseg_indsn_&wab_seg_iter"; if _freq_ > 1 then do; non_unique_visitors=non_unique_visitors+1; put 'Duplicate visitor records in ' +1 dataset +1 'Visitor id ' +1 &&_wab_autoseg_visitor_id_&wab_seg_iter ; end; if eof then call symput('_wab_autoseg_visitor_nonunique_',strip(put(non_unique_visitors,best.))); run; %if &_wab_autoseg_visitor_nonunique_ > 0 %then %do; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) input data source contains duplicate visitor records.; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Duplicate visitor ids found in data. only one (1) records per visitor allowed. Contact Administrator.; %goto DECTREE; %end; */ /* new method for determining if there are duplicate records */ proc sort data=&&_wab_autoseg_indsn_&wab_seg_iter out=_null_ dupout=&temp_lib..autoseg_nonunique_visitors_&wab_seg_iter(keep=&&_wab_autoseg_visitor_id_&wab_seg_iter) nodupkey; by &&_wab_autoseg_visitor_id_&wab_seg_iter ; run; %if %get_observation_count(indsn=&temp_lib..autoseg_nonunique_visitors_&wab_seg_iter) > 0 %then %do; data _null_; set &temp_lib..autoseg_nonunique_visitors_&wab_seg_iter end=eof; retain non_unique_visitors 0; dataset="&&_wab_autoseg_indsn_&wab_seg_iter"; if _freq_ > 1 then do; non_unique_visitors=non_unique_visitors+1; put 'Duplicate visitor records in ' +1 dataset +1 'Visitor id ' +1 &&_wab_autoseg_visitor_id_&wab_seg_iter ; end; run; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) input data source contains duplicate visitor records.; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Duplicate visitor ids found in data. only one (1) records per visitor allowed. Contact Administrator.; %goto DECTREE; %end; /*-----------------------------------------------------------* * Determine target values and stop analysis if not 0/1 *-----------------------------------------------------------*/ %let _wab_autoseg_target_lvl_invalid_ = 0; proc summary data=&&_wab_autoseg_indsn_&wab_seg_iter nway; class &&_wab_autoseg_target_&wab_seg_iter ; output out=&temp_lib..autoseg_target_&wab_seg_iter; run; data _null_; set &temp_lib..autoseg_target_&wab_seg_iter end=eof; retain target_level_invalid 0; if &&_wab_autoseg_target_&wab_seg_iter not in (0,1) then target_level_invalid=1; if eof then do; call symput('_wab_autoseg_target_lvl_invalid_',put(target_level_invalid,z1.)); call symput('_wab_autoseg_target_lvls_',put(_n_,z1.)); call symput('_wab_autoseg_target_lvl_',put(&&_wab_autoseg_target_&wab_seg_iter,z1.)); end; run; %if &_wab_autoseg_target_lvl_invalid_ > 0 %then %do; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) input data source target variable levels are not 0/1.; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Target variable not %QCMPRES( configured) correctly in data. Contact Administrator.; %goto DECTREE; %end; %if &_wab_autoseg_target_lvls_ < 2 %then %do; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) input data source target variable, &&_wab_autoseg_target_&wab_seg_iter, %QCMPRES( contains) only &_wab_autoseg_target_lvl_ .; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Target variable contains %QCMPRES( only) &_wab_autoseg_target_lvl_ .; %goto DECTREE; %end; /*-----------------------------------------------------------* * Create traing and validation data sets *-----------------------------------------------------------*/ data &temp_lib..autoseg_train &temp_lib..autoseg_valid; set &&_wab_autoseg_indsn_&wab_seg_iter; sample_coefficient=ranuni(9912); if sample_coefficient lt &wab_autoseg_train_samp_pct then output &temp_lib..autoseg_train; else output &temp_lib..autoseg_valid; run; %if %quote(&wab_autoseg_leafsize)= %then %do; data _null_; dsid=open("&temp_lib..autoseg_train"); total_obs=attrn(dsid,"nobs"); leafsize=round(total_obs*.01,1.0); call symput('wab_autoseg_leafsize',put(leafsize,best.)); run; %if &wab_autoseg_leafsize lt 1 %then %do; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) input data source does not have enough %QCMPRES( observations) for a segmentation rule to be created.; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter could not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Input data source does not %QCMPRES( have) enough data for a segmentation rule to be created.; %goto DECTREE; %end; %put %unquote(&wab_note) The Automatic Segmentation decision %QCMPRES( trees) can have at minimum &wab_autoseg_leafsize %QCMPRES( observations) for a new segmentation rule to be created.; %end; %if ( %sysfunc(lowcase(%sysfunc( compress(&&_wab_autoseg_target_level_&wab_seg_iter))))=nominal ) or ( %sysfunc(lowcase(%sysfunc( compress(&&_wab_autoseg_target_level_&wab_seg_iter))))=ordinal ) %then %do; proc summary data=&temp_lib..autoseg_train nway; class &&_wab_autoseg_target_&wab_seg_iter; output out=&temp_lib.._wab_train_target_counts_; run; %if %get_observation_count(indsn=&temp_lib.._wab_train_target_counts_) lt 2 %then %do; data _null_; set &temp_lib.._wab_train_target_counts_; call symput('_wab_seg_train_resp_',put(&&_wab_autoseg_target_&wab_seg_iter , z1.)); run; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) target/reponse variable must have more than one %QCMPRES( value) for a segmentation rule to be created. In the training data set &&_wab_autoseg_target_&wab_seg_iter %QCMPRES( contained) only &_wab_seg_train_resp_ .; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter will not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Training data source response variable contains only one value.; %goto DECTREE; %end; proc summary data=&temp_lib..autoseg_valid nway; class &&_wab_autoseg_target_&wab_seg_iter; output out=&temp_lib.._wab_valid_target_counts_; run; %if %get_observation_count(indsn=&temp_lib.._wab_valid_target_counts_) lt 2 %then %do; data _null_; set &temp_lib.._wab_valid_target_counts_; call symput('_wab_seg_valid_resp_',put(&&_wab_autoseg_target_&wab_seg_iter , z1.)); run; %put %unquote(&wab_warning) The Automatic Segmentation decision %QCMPRES( tree) target/reponse variable must have more than one %QCMPRES( value) for a segmentation rule to be created. In the validation data set &&_wab_autoseg_target_&wab_seg_iter %QCMPRES( contained) only &_wab_seg_train_resp_ .; %put %unquote(&wab_warning) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter will not be produced.; %let wab_seg_reason=Segmentation Analysis did not run. Validation data source response variable contains only one value.; %goto DECTREE; %end; %end; proc arboretum data=&temp_lib..autoseg_train splitatdatum missing = useinsearch; %if %quote(&&_wab_nominput_list_&wab_seg_iter) ne %then %do; input &&_wab_nominput_list_&wab_seg_iter / level=nominal; %end; %if %quote(&&_wab_interinput_list_&wab_seg_iter) ne %then %do; input &&_wab_interinput_list_&wab_seg_iter / level=interval; %end; %if %quote(&&_wab_ordinput_list_&wab_seg_iter) ne %then %do; input &&_wab_ordinput_list_&wab_seg_iter / level=ordinal; %end; %if %sysfunc(lowcase(%sysfunc( compress(&&_wab_autoseg_target_level_&wab_seg_iter))))=nominal %then %do; target &&_wab_autoseg_target_&wab_seg_iter / level=&&_wab_autoseg_target_level_&wab_seg_iter order=descending; assess validata=&temp_lib..autoseg_valid; %end; %else %do; target &&_wab_autoseg_target_&wab_seg_iter / level=&&_wab_autoseg_target_level_&wab_seg_iter order=descending; %end; interact pruned; train %if %sysfunc(lowcase(%sysfunc( compress(&&_wab_autoseg_target_level_&wab_seg_iter))))=interval %then %do; alpha=&wab_autoseg_alpha %end; %else %do; minworth=&wab_autoseg_minworth %end; maxdepth = &wab_autoseg_maxdepth leafsize = &wab_autoseg_leafsize; score data=&temp_lib..autoseg_train out=&temp_lib..autoseg_train_scores role=score; score data=&temp_lib..autoseg_valid out=&temp_lib..autoseg_valid_scores role=score; assess validata=&temp_lib..autoseg_valid; subtree nleaves = &&_autoseg_num_segments&wab_seg_iter; save path=&temp_lib..autoseg_path rules=&temp_lib..autoseg_rules nodestat=&temp_lib..autoseg_nodestat summary=&temp_lib..autoseg_summary model=&temp_lib..autoseg_model importance=&temp_lib..autoseg_importance; makemacro nleaves=nl; run; quit; %if &syserr gt 4 %then %do; %put %unquote(&wab_error) The Automatic Segmentation segment %QCMPRES( structure) and/or segment statistics data set could not be created.; %put %unquote(&wab_error) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter will not be produced.; %put %unquote(&wab_error) Web Analytics program exiting.; %let wab_seg_reason = Segmentation Analysis did not run. PROC ARBORETUM could not create any segments. Please see Administrator.; %goto DECTREE; %end; data &temp_lib..autoseg_dectree(keep=var1-var7 ) ; length var4 $1000; set &temp_lib..autoseg_nodestat; if parent = . then parent = 0; var1=node; var2=parent; if linkwidth > 1 then var3=round(linkwidth,1.0); else var3=1; var4 =' Train: ' ||"&&_wab_autoseg_target_label_&wab_seg_iter=" ||compress(put(n,nlnum12.)) ||' ' ||' Valid: ' ||"&&_wab_autoseg_target_label_&wab_seg_iter=" ||compress(put(vn,nlnum12.)); if index(abovetext,'<') > 0 then abovetext=tranwrd(abovetext,'<','C;'); if index(abovetext,'>') > 0 then abovetext=tranwrd(abovetext,'>','E;'); var5=abovetext; var6=belowtext; if leaf = . then var7=400; else var7=100; run; data &outlib..autoseg_dectree; length segmentation_name $ 40; format date date9.; set &temp_lib..autoseg_dectree(in=a) %if %sysfunc(exist(&outlib..autoseg_dectree,DATA)) %then %do; &outlib..autoseg_dectree %end;; date=&autoseg_report_date; if a then id=strip( compbl("&&_wab_autoseg_id_&wab_seg_iter")); run; %*-------------------------------------------------------------------*; %* Get the variable type for each variable selected by PROC %* ARBORETUM. %*-------------------------------------------------------------------*; proc contents data=&&_wab_autoseg_indsn_&wab_seg_iter out=_wab_autoseg_var_type_&wab_seg_iter noprint; run; data _wab_autoseg_var_type_&wab_seg_iter(keep=start label fmtname fmt_type); set _wab_autoseg_var_type_&wab_seg_iter; fmtname="vartype&wab_seg_iter.a"; start=name; if type=1 then label='N'; else label='C'; fmt_type='c'; run; proc format cntlin=_wab_autoseg_var_type_&wab_seg_iter(rename=(fmt_type=type)); run; data &outlib..autoseg_path; length var_name $ 32 variable $ 256 id $ 36 segmentation_name $ 40; set &temp_lib..autoseg_path(in=a) %if %sysfunc(exist(&outlib..autoseg_path,DATA)) %then %do; &outlib..autoseg_path %end;; variable=strip(variable); if a then do; id=strip(compbl("&&_wab_autoseg_id_&wab_seg_iter")); segmentation_name=strip(compbl("&&_wab_autoseg_name_&wab_seg_iter")); variable_type=put(var_name,$vartype&wab_seg_iter.a.); end; run; proc contents data=&temp_lib..autoseg_nodestat noprint out=&temp_lib..autoseg_nodestat_ct(keep=name where=(lowcase(substr(name,1,2))='p_')); run; %if (&syserr gt 4) or (%get_observation_count(indsn=&temp_lib..autoseg_nodestat_ct)=0) %then %do; %put %unquote(&wab_error) The Automatic Segmentation predicted %QCMPRES( response) variables and/or could not be determined.; %put %unquote(&wab_error) The Automatic Segmentation for %QCMPRES( name:) &&_wab_autoseg_name_&wab_seg_iter will not be produced.; %put %unquote(&wab_error) Web Analytics program exiting.; %let &retcode=1; %let wab_seg_reason= &&_wab_autoseg_name_&wab_seg_iter. predicted response variables could not be determined. %goto DECTREE; %end; data _null_; set &temp_lib..autoseg_nodestat_ct end=eof; target=compress("p_&&_wab_autoseg_target_&wab_seg_iter"); rename=tranwrd(lowcase(name),lowcase(target),""); call symput("_wab_response_variable_name_"||strip(put(_n_,6.)),substr(name,3)); call symput("_wab_response_variable_rename_"||strip(put(_n_,6.)),compress(rename)); if eof then do; call symput("_wab_num_prediction_vars_",strip(put(_n_,6.))); response=reverse(substr(reverse(substr(name,3)),2)); end; run; data &temp_lib..autoseg_nodestat; length segmentation_name $ 40 id $ 36 response $ 32; set &temp_lib..autoseg_nodestat(keep=node leaf %do i=1 %to &_wab_num_prediction_vars_; p_&&_wab_response_variable_name_&i v_&&_wab_response_variable_name_&i %end; rename=( %do r=1 %to &_wab_num_prediction_vars_; p_&&_wab_response_variable_name_&r = predicted_&&_wab_response_variable_rename_&r v_&&_wab_response_variable_name_&r = validated_&&_wab_response_variable_rename_&r %end; ) ); response = "&&_wab_autoseg_target_label_&wab_seg_iter"; segmentation_name=strip(compbl("&&_wab_autoseg_name_&wab_seg_iter")); id=strip(compbl("&&_wab_autoseg_id_&wab_seg_iter")); label %do r=1 %to &_wab_num_prediction_vars_; predicted_&&_wab_response_variable_rename_&r = "Predicted &&_wab_response_variable_rename_&r" validated_&&_wab_response_variable_rename_&r = "Validated &&_wab_response_variable_rename_&r" %end; ; format %do r=1 %to &_wab_num_prediction_vars_; predicted_&&_wab_response_variable_rename_&r nlnum6.2 validated_&&_wab_response_variable_rename_&r nlnum6.2 %end; ; run; /*C001 _wab_response_variable_label */ data &outlib..autoseg_nodestat; set &temp_lib..autoseg_nodestat %if %sysfunc(exist(&outlib..autoseg_nodestat,DATA)) %then %do; &outlib..autoseg_nodestat %end; ; run; %put ******************************* END &&_wab_autoseg_name_&wab_seg_iter ************************; %DECTREE: /*-----------------------------------------------------* * Create a data set containing the segmentation names * with no data. *-----------------------------------------------------*/ %if %nrbquote(&wab_seg_reason) ne %str() %then %do; data &&temp_lib.._wab_autosg_not_enough_data; length segmentation_name $ 40 message $2000 response $ 32; %if %sysfunc(exist(&temp_lib.._wab_autosg_not_enough_data,DATA)) %then %do; set &&temp_lib.._wab_autosg_not_enough_data; output; %end; date=&autoseg_report_date; segmentation_name=strip(compbl("&&_wab_autoseg_name_&wab_seg_iter")); response= "&&_wab_autoseg_target_label_&wab_seg_iter"; id=strip(compbl("&&_wab_autoseg_id_&wab_seg_iter")); message = "&wab_seg_reason"; output; run; %end; %end; proc format; value $xlatrel '>' ='<' '>='='<=' '<='='>=' '<' ='>'; run; %if %sysfunc(exist(&outlib..autoseg_path,data))=0 %then %do; %put %unquote(&wab_warning) The &outlib..autoseg_path data set %QCMPRES( does) not exist. No segments were created for any of the segmentation %QCMPRES( analyses.) Check &outlib..autoseg_segment_rules for reasons.; %put %unquote(&wab_warning) Automatic Segmentation program exiting.; %goto ERREXIT; %end; %if %sysfunc(exist(&outlib..autoseg_nodestat,data))=0 %then %do; %put %unquote(&wab_warning) The &outlib..autoseg_nodestat data set %QCMPRES( does) not exist. No segments were created for any of the segmentation %QCMPRES( analyses.) Check &outlib..autoseg_segment_rules for reasons; %put %unquote(&wab_warning) Automatic Segmentation program exiting.; %goto ERREXIT; %end; /* -------------------------------------------------------------------------- */ /* make sure that VARIABLE is in the same order across SEGMENTATION_NAME/ */ /* LEAF/NODE so that SEGMENT_RULE descriptors will present information */ /* consistently. */ /* -------------------------------------------------------------------------- */ /* propogate VARIABLE and VAR_NAME from previous obs when they are missing */ /* -------------------------------------------------------------------------- */ data propogate_autoseg_path; set &outlib..autoseg_path; by id node leaf notsorted; retain prev_var_name prev_variable; if var_name eq ' ' then var_name=prev_var_name; if variable eq ' ' then variable=prev_variable; /* move 'MISSING' from RELATION to CHARACTER_VALUE */ if upcase(relation) eq 'MISSING' then do; if upcase(variable_type)='N' then character_value='.'; relation='='; end; if upcase(relation) eq 'NOT MISSING' then do; if upcase(variable_type)='N' then character_value='.'; relation='ne'; end; prev_var_name=var_name; prev_variable=variable; drop prev_var_name prev_variable; run; proc sort data=propogate_autoseg_path out =sorted_autoseg_path; by id leaf node variable; run; data wab_autoseg_path; set sorted_autoseg_path; by id leaf node variable; length segment_rule segment_rule_label $ 2000 subject subject1 $ 1024 obj1 obj2 $ 25 relop1 relop2 $ 11 x $ 10 ; retain segment_rule segment_rule_label obj1 relop1 subject subject1; /* there is one SEGMENT_RULE and one SEGMENT_RULE_LABEL for each SEGMENTATION_NAME/LEAF/NODE. the only difference between a SEGMENT_RULE and a SEGMENT_RULE_LABEL is that the SEGMENT_RULE uses the variable as the SUBJECT and the SEGMENT_RULE_LABEL uses the variable label as the SUBJECT. for every VARIABLE within SEGMENTATION_NAME/LEAF/NODE, create a descriptor that is made up of the following parts; OBJ1 Object 1 -- the minimum value RELOP1 Relational Operator 1 SUBJECT (i.e., VARIABLE) RELOP2 Relational Operator 2 OBJ2 Object 2 -- the maximum value the following are examples of SEGMENT_RULE_LABELs with one descriptor: Page Count < 22 22 <= Page Count < 31 31 <= Page Count the following are examples of SEGMENT_RULE_LABELs with two descriptors: Number of Stores < 4.5 and Product in (BOOT, SANDAL, SPORT SHOE) 3.5 <= Number of Stores < 5.5 and Product in (CASUAL, MISSING) 10.5 <= Number of Stores and Product in (BOOT, MISSING) if any relational operator for a variable is EQ ('='), it is assumed that all the relational operators for that variable are also EQ and the descriptor is built in the form: 'SUBJECT in (OBJ1, ... , OBJn)' the segment rule is made by ANDing the descriptor for each variable */ if first.node then do; segment_rule=' '; segment_rule_label=' '; end; if first.variable then do; if strip(relation) in ('=','ne') then do; obj1 =' '; relop1 =' '; if strip(relation)='=' then x='in'; else x='not in'; subject=strip(var_name) || ' '|| strip(x)||' ' || '(' || strip(character_value); subject1=strip(variable) || ' '|| strip(x) ||' ' || '(' || strip(character_value); end; else do; subject=var_name; subject1=variable; obj1 =character_value; if character_value eq ' ' then relop1 =' '; else relop1 =put(strip(relation),$xlatrel.); end; end; else do; if strip(relation) in ('=','ne') then do; obj2 =' '; relop2 =' '; subject=strip(subject) || ', ' || strip(character_value); end; else do; obj2 =character_value; if character_value eq ' ' then relop2 =' '; else relop2 =relation; end; end; if last.variable then do; if segment_rule gt ' ' then conjunction=' and '; if relation in ('=','ne') then do; segment_rule=strip(strip(segment_rule) || conjunction || strip(subject) || ')' ); segment_rule_label=strip(strip(segment_rule_label) || conjunction || strip(subject1) || ')' ); end; else do; segment_rule=strip(strip(segment_rule) || conjunction || strip(obj1) || ' ' || strip(relop1) || ' ' || strip(subject) || ' ' || strip(relop2) || ' ' || strip(obj2) ); segment_rule_label=strip(strip(segment_rule_label) || conjunction || strip(obj1) || ' ' || strip(relop1) || ' ' || strip(subject1) || ' ' || strip(relop2) || ' ' || strip(obj2) ); end; end; if last.node then do; segment_rule_label=tranwrd(segment_rule_label,'Session','Visit'); segment_rule_label=tranwrd(segment_rule_label,'session','visit'); segment_rule_label=tranwrd(segment_rule_label,'SESSION','VISIT'); output; end; keep id segmentation_name node leaf segment_rule segment_rule_label ; run; proc sort data=&outlib..autoseg_nodestat (where=(leaf gt .z)) out =wab_autoseg_nodestat; by id leaf node; run; /*S0265918 - temp_lib was summary*/ data &temp_lib..autoseg_segment_rules(drop=node leaf); format date date9.; merge wab_autoseg_path (in=in_a) wab_autoseg_nodestat (in=in_b drop=segmentation_name); by id leaf node; date=&autoseg_report_date; if in_a then if in_b then do; output &temp_lib..autoseg_segment_rules; end; run; proc sort data=&temp_lib..autoseg_segment_rules; by date id segment_rule; run; data &temp_lib..autoseg_segment_rules(drop=segs); set &temp_lib..autoseg_segment_rules end=eof; by date id segment_rule; length segs $2000; retain segs; /* get a list of analyses with segments */ if first.id then do; if segs='' then segs="'"||strip(segmentation_name)||"'"; else segs=strip(segs)||",'"||strip(segmentation_name)||"'"; end; /* calculate weighted response */ format weighted_response nlnum6.2; weighted_response= &wab_autoseg_train_samp_pct*predicted_1+(1-&wab_autoseg_train_samp_pct)*validated_1; label weighted_response='Expected Response Rate'; if eof then do; call symput('_wab_autosgs_',strip(segs)); end; run; /* -------------------------------------------------------------------------- */ /* Optional reporting - Visitor assignment to segment */ /* Calculate number of visitors falling into each segment for : */ /* 1. All visitors */ /* 2. New visitors */ /* 3. Report date visitors */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* Determine which segmentation have the appropriate variables */ /* -------------------------------------------------------------------------- */ proc sort data=&temp_lib.._wab_autosg_meta_all_ out=_wab_autosg_meta_segs_; ; where segmentation_name in (&_wab_autosgs_); by id; run; data _null_; set _wab_autosg_meta_segs_ end=eof; by id; length inputs totals $1000 new_visitor max_visitor $32; retain totals inputs new_visitor max_visitor; retain analyses 0; if first.id then do; totals=''; new_visitor=''; max_visitor=''; inputs=''; end; select(lowcase(role)); when('input') do; if inputs='' then inputs="'"||strip(_name_)||"'"; else inputs=strip(inputs)||" '"||strip(_name_)||"'"; if index(_name_,'_anl') > 0 then do; if totals='' then totals="'"||strip(tranwrd(_name_,'_anl','_all'))||"'"; else totals=strip(totals)||" '"||strip(tranwrd(_name_,'_anl','_all'))||"'"; end; end; when('new visitor indicator') new_visitor=_name_; when('most current visitor') max_visitor=_name_; otherwise; end; if last.id then do; if totals ne '' then do; analyses=analyses+1; call symput(compress('_totals'||put(analyses,best.)),strip(totals)); call symput(compress('_inputs'||put(analyses,best.)),strip(inputs)); call symput(compress('_new_visitor'||put(analyses,best.)),strip(new_visitor)); call symput(compress('_max_visitor'||put(analyses,best.)),strip(max_visitor)); call symput(compress('_data'||put(analyses,best.)),strip(indsn)); call symput(compress('_analysis'||put(analyses,best.)),strip(id)); end; end; if eof then call symput('_wab_final_analyses',put(analyses,best.)); run; %if &_wab_final_analyses > 0 %then %do; %if %sysfunc(exist(&temp_lib..autosegment_applied))>0 %then %do; proc datasets library=&temp_lib nolist; delete autosegment_applied ; run; quit; %end; %do a = 1 %to &_wab_final_analyses; /*----------------------------------------------------------------------*/ /* Determine if there are matching total variables for all analysis */ /* variables */ /*----------------------------------------------------------------------*/ data _null_; set &temp_lib..autoseg_segment_rules end=eof; where id=strip("&&_analysis&a"); n + 1; segment_rule=tranwrd(segment_rule,'_anl','_all'); call symput(compress('_segment'||put(n,best.)),strip(segment_rule)); call symput(compress('wt_resp'||put(_n_,best.)),put(round(weighted_response,0.01),best.)); if eof then call symput('_segments',compress(put(n,best.))); run; proc contents data=&&_data&a noprint out=total_var_check; run; data total_var_check; set total_var_check; where index(name,'_all')>0 or index(name,'_anl')>0; if index(name,'_all')>0 then var=strip(tranwrd(name,'_all','')); if index(name,'_anl')>0 then var=strip(tranwrd(name,'_anl','')); run; /*----------------------------------------------------------------------*/ /* No appropriately named total/analytical variables pairs to be used */ /* to apply segments to visitor */ /*----------------------------------------------------------------------*/ %if %get_observation_count(indsn=total_var_check) = 0 %then %do; %put %unquote(&wab_warning) The Automatic Segmentation application of %QCMPRESS segments) could not occur, no total variables present in data.; %let _wab_autoseg_no_tot_var_=1; %goto SEG_APPLY; %end; /*----------------------------------------------------------------------*/ /* Determine if all analytical variables are paired with total */ /* variables */ /*----------------------------------------------------------------------*/ %else %do; proc sort data=total_var_check; where name in ( &&_totals&a &&_inputs&a ); by var; run; data _null_; set total_var_check end=eof; by var; retain var_check1; retain missing_total 0; if first.var then do; var_check1=reverse(substr(strip(reverse(name)),1,4)); end; if last.var then do; var_check2=reverse(substr(strip(reverse(name)),1,4)); if var_check1 = var_check2 then do; missing_total=1; put 'Missing total variable ' +1 var; end; end; if eof then call symput('_wab_autoseg_no_tot_var_',compress(put(missing_total,best.))); run; %if &_wab_autoseg_no_tot_var_ > 0 %then %do; %put %unquote(&wab_warning) The Automatic Segmentation application of %QCMPRESS segments) could not occur, no total variables present in data.; %goto SEG_APPLY; %end; %end; /*-----------------------------------------------------------* * Clean up data sets *-----------------------------------------------------------*/ proc datasets library=&temp_lib nolist; delete autoseg_visitors_&a autosegment_summary_&a autosegment_summary_nv_&a autosegment_summary_tv_&a autosegments_&a autosegments_c_&a; run; quit; /* apply segments to data */ data &temp_lib..autoseg_visitors_&a; set &&_data&a ; %do s = 1 %to &_segments; if &&_segment&s then segment&s = 1; else segment&s = 0; %end; run; /*-----------------------------------------------------------* * All visitors summary *-----------------------------------------------------------*/ proc summary data=&temp_lib..autoseg_visitors_&a nway; var segment1-segment&_segments ; output out=&temp_lib..autosegment_summary_&a (drop=_type_) sum= ; run; /*-----------------------------------------------------------* * New visitors summary *-----------------------------------------------------------*/ %if &&_new_visitor&a ne %str() %then %do; proc summary data=&temp_lib..autoseg_visitors_&a nway; where &&_new_visitor&a=1; var segment1-segment&_segments ; output out=&temp_lib..autosegment_summary_nv_&a (drop=_type_) sum= ; run; %end; /*-----------------------------------------------------------* * Max date in data new visitors summary *-----------------------------------------------------------*/ %if &&_max_visitor&a ne %str() %then %do; proc summary data=&temp_lib..autoseg_visitors_&a nway; where &&_max_visitor&a=1; var segment1-segment&_segments ; output out=&temp_lib..autosegment_summary_tv_&a (drop=_type_) sum= ; run; %end; data &temp_lib..autosegments_summary_&a; length type $30; set &temp_lib..autosegment_summary_&a(in=c) %if &&_new_visitor&a ne %str() %then %do; &temp_lib..autosegment_summary_nv_&a(in=a) %end; %if &&_max_visitor&a ne %str() %then %do; &temp_lib..autosegment_summary_tv_&a(in=b) %end; ; %if &&_new_visitor&a ne %str() %then %do; if a then type='New Visitors'; %end; %if &&_max_visitor&a ne %str() %then %do; if b then type="MaxDate Visitors"; %end; if c then type='All Visitors'; run; proc transpose data=&temp_lib..autosegments_summary_&a out=&temp_lib..autosegments_&a; id type; idlabel type; var _freq_ segment1-segment&_segments; run; data &temp_lib..autosegments_c_&a( keep=segment_rule id _name_ all_visitors pc_visits new_visitors pc_visits_new maxdate_visitors pc_visits_maxdate date exp_resp_all exp_resp_new exp_resp_maxdate where =(_name_ ne '_FREQ_') ); length id $36.; format segment_rule $2000. all_visitors nlnum12. pc_visits nlnum8.2 exp_resp_all nlnum12. new_visitors nlnum12. pc_visits_new nlnum8.2 exp_resp_new nlnum12. maxdate_visitors nlnum12. pc_visits_maxdate nlnum8.2 exp_resp_maxdate nlnum12. ; set &temp_lib..autosegments_&a end=eof; /* total visitors for time period */ if _name_='_FREQ_' then do; retain visits_all; visits_all=all_visitors; %if &&_new_visitor&a ne %str() %then %do; retain visits_new; visits_new=new_visitors; %end; %if &&_max_visitor&a ne %str() %then %do; retain visits_maxdate; visits_maxdate=maxdate_visitors; %end; end; /* segment totals and percents */ else do; date=&autoseg_report_date; id=strip("&&_analysis&a"); %do ss=1 %to &_segments; if lowcase(_name_)="segment&ss" then segment_rule=strip(tranwrd("&&_segment&ss",'_all','_anl')); %end; /* percentages */ pc_visits=(all_visitors/visits_all)*100; %if &&_new_visitor&a ne %str() %then %do; pc_visits_new=(new_visitors/visits_new)*100; %end; %if &&_max_visitor&a ne %str() %then %do; pc_visits_maxdate=(maxdate_visitors/visits_maxdate)*100; %end; /* calculate response rates */ %do e = 1 %to &_segments; if lowcase(_name_) = "segment&e" then do; exp_resp_all = all_visitors * &&wt_resp&e ; exp_resp_new = new_visitors * &&wt_resp&e ; exp_resp_maxdate =maxdate_visitors * &&wt_resp&e ; exp_resp_all = round(exp_resp_all,1.0); exp_resp_new = round(exp_resp_new,1.0); exp_resp_maxdate = round(exp_resp_maxdate,1.0); end; %end; end; label all_visitors='All Visitors' new_visitors='New Visitors' maxdate_visitors='Most Current Visitors' pc_visits='% All Visitors' pc_visits_new='% New Visitors' pc_visits_maxdate="% Most Current Visitors" exp_resp_all='All Exp Responders' exp_resp_new='New Exp Responders' exp_resp_maxdate='Current Date Exp Responders'; run; proc append base=&temp_lib..autosegment_applied data=&temp_lib..autosegments_c_&a; run; %SEG_APPLY: %if &_wab_autoseg_no_tot_var_ > 0 %then %do; /*------------------------------------------------------------*/ /* set all report variables to missing */ /*------------------------------------------------------------*/ data &temp_lib..autosegments_c_&a; length id $25.; format segment_rule $2000. all_visitors nlnum12. pc_visits 8.2 exp_resp_all nlnum12. new_visitors nlnum12. pc_visits_new 8.2 exp_resp_new nlnum12. maxdate_visitors nlnum12. pc_visits_maxdate 8.2 exp_resp_maxdate nlnum12. ; array visitor_vars{9} all_visitors pc_visits exp_resp_all new_visitors pc_visits_new exp_resp_new maxdate_visitors pc_visits_maxdate exp_resp_maxdate; label all_visitors='All Visitors' new_visitors='New Visitors' maxdate_visitors='Most Current Visitors' pc_visits='% All Visitors' pc_visits_new='% New Visitors' pc_visits_maxdate='% Most Current Visitors' exp_resp_all='All Predicted Responders' exp_resp_new='New Predicted Responders' exp_resp_maxdate='Most Current Predicted Responders' ; id=strip("&&_analysis&a"); %do ss=1 %to &_segments; segment_rule=strip("&&_segment&ss"); do i=1 to dim(visitor_vars); visitor_vars{i}=.; end; output; %end; run; proc append base=&temp_lib..autosegment_applied data=&temp_lib..autosegments_c_&a; run; %end; %end; /* add segment applied stats to segment rule table */ proc sort data=&temp_lib..autosegment_applied; by date id segment_rule; run; data &temp_lib..autoseg_segment_rules; merge &temp_lib..autoseg_segment_rules &temp_lib..autosegment_applied; by date id segment_rule; run; %end; %else %do; /*------------------------------------------------------------*/ /* set all report variables to missing */ /*------------------------------------------------------------*/ data &temp_lib..autoseg_segment_rules(drop=i); set &temp_lib..autoseg_segment_rules; array visitor_vars{9} all_visitors pc_visits exp_resp_all new_visitors pc_visits_new exp_resp_new maxdate_visitors pc_visits_maxdate exp_resp_maxdate; do i=1 to dim(visitor_vars); visitor_vars{i}=.; end; label all_visitors='All Visitors' new_visitors='New Visitors' maxdate_visitors='Most Current Visitors' pc_visits='% All Visitors' pc_visits_new='% New Visitors' pc_visits_maxdate='% Most Current Visitors' exp_resp_all='All Predicted Responders' exp_resp_new='New Predicted Responders' exp_resp_maxdate='Most Current Predicted Responders' ; run; %end; /* -------------------------------------------------------------------------- */ /* Handle segmentation analyses that did not produce segments */ /* -------------------------------------------------------------------------- */ %if %sysfunc(exist(&&temp_lib.._wab_autosg_not_enough_data,DATA))>0 %then %do; data &temp_lib..autoseg_segment_rules ; set &temp_lib..autoseg_segment_rules &&temp_lib.._wab_autosg_not_enough_data (in=b); run; proc sort data=&temp_lib..autoseg_segment_rules ; by date id segment_rule; run; %end; %if %sysfunc(exist(&outlib..autoseg_segment_rules,data))=0 %then %do; data &outlib..autoseg_segment_rules; set &temp_lib..autoseg_segment_rules; by date id segment_rule; run; %end; %else %do; data &outlib..autoseg_segment_rules; update &outlib..autoseg_segment_rules &temp_lib..autoseg_segment_rules; by date id segment_rule; run; %end; /*S0265918*/ /* code to age data from output dataset */ /* * Check the output data sets for records that are older than the * number of cut off days specified in the wbconfig file. This * is done to manage the size of the accumulated data in * &outlib..dashboard_data and &outlib..dashboard_metric_history */ %IF %SYSFUNC(EXIST(&outlib..autoseg_segment_rules,DATA)) %THEN %DO; data &temp_lib..checkrange; set &outlib..autoseg_segment_rules; if date le (&autoseg_report_date-&wab_days_in_autoseg); run; %IF %get_observation_count(indsn=&temp_lib..checkrange) gt 0 %THEN %DO; %PUT %UNQUOTE(&wab_note) *******************************************************; %PUT %UNQUOTE(&wab_note) Records outside the cutoff date range found in:; %PUT %UNQUOTE(&wab_note) &outlib..autoseg_segment_rules; %PUT %UNQUOTE(&wab_note) Number of days to keep in Autoseg is: &wab_days_in_autoseg; %PUT %UNQUOTE(&wab_note) This can be changed by using WAB_DAYS_IN_AUTOSEG in the; %PUT %UNQUOTE(&wab_note) WBCONFIG file.; %PUT %UNQUOTE(&wab_note) *******************************************************; %END; data &outlib..autoseg_segment_rules; set &outlib..autoseg_segment_rules; if date gt (&autoseg_report_date-&wab_days_in_autoseg); run; %END; /* end code to age data from output dataset */ /* delete segmentation/proc arboretum output data sets */ proc datasets library=&outlib. nolist; delete autoseg_path autoseg_nodestat autoseg_dectree; run; quit; %ERREXIT: /*-----------------------------------------------------------* * Create missing segmentation analyses explanation records *-----------------------------------------------------------*/ %if %sysfunc(exist(&&temp_lib.._wab_autosg_not_enough_data,DATA)) %then %do; proc sort data=&&temp_lib.._wab_autosg_not_enough_data; by date id; run; %if %sysfunc(exist(&outlib..autoseg_segment_rules,DATA)) %then %do; data &outlib..autoseg_segment_rules(drop=message); set &outlib..autoseg_segment_rules(in=a) &&temp_lib.._wab_autosg_not_enough_data(in=b); by date id; if b then segment_rule = message; run; %end; %else %do; data &outlib..autoseg_segment_rules(drop=message i); format date date9.; length segment_rule segment_rule_label $ 2000; set &&temp_lib.._wab_autosg_not_enough_data; by date id; segment_rule = message; segment_rule_label=''; array visitor_vars{14} predicted_0 predicted_1 validated_0 validated_1 weighted_response all_visitors pc_visits exp_resp_all new_visitors pc_visits_new exp_resp_new maxdate_visitors pc_visits_maxdate exp_resp_maxdate; do i=1 to dim(visitor_vars); visitor_vars{i}=.; end; label weighted_response='Expected Response Rate' all_visitors='All Visitors' new_visitors='New Visitors' maxdate_visitors='Most Current Visitors' pc_visits='% All Visitors' pc_visits_new='% New Visitors' pc_visits_maxdate='% Most Current Visitors' exp_resp_all='All Predicted Responders' exp_resp_new='New Predicted Responders' exp_resp_maxdate='Most Current Predicted Responders' ; run; %end; %end; %put *********************** END SEGMENTATION ANALYSIS *************************; %mend waseganl;