/*----------------------Web Analytics-----------------------------------------*/ /* WAADMSUM */ /* Transform metadata from the WAADMSUM format to the WASUMENG format. */ /* */ /*----------------------------------------------------------------------------*/ /* Supported By: Frank Roediger, FRROED */ /* */ /*----------------------------------------------------------------------------*/ /* History: */ /* Date Description Username Change Code */ /* 20040814 Program Created frroed NA */ /* 20040930 Make sure that there are no frroed C001 */ /* commas in space-delimited */ /* fields. */ /* 20060329 S0351214 frroed */ /* */ /*----------------------------------------------------------------------------*/ /* Dependencies: */ /* The following macros need to be available: */ /* none */ /* */ /* The following macro variables need to be defined and assigned values */ /* in the invoking environment: */ /* none */ /* */ /*----------------------------------------------------------------------------*/ /* Parameters: */ /* META_IN: the input data set (in WAADMSUM format) */ /* META_OUT: the output data set (in WASUMENG format) */ /* RETCODE: mechanism for passing Return Codes */ /* */ /*----------------------------------------------------------------------------*/ /* Input: */ /* (meta_in= */ /* ,meta_out= */ /* ,retcode= */ /* ); */ /* */ /*----------------------------------------------------------------------------*/ /* Output: */ /* */ /*----------------------------------------------------------------------------*/ /* Copyright (c) 2004 SAS Institute Inc. All Rights Reserved */ /*----------------------------------------------------------------------------*/ %macro waadmsum_options_test(lvl=1 ,field=requested_file ); if index(output_dsn_options_&lvl,"&field") and not index(output_dsn_options_&lvl,"&field._token") then output_dsn_options_&lvl=tranwrd(output_dsn_options_&lvl ,"&field" ,"&field._token" ); %mend waadmsum_options_test; %macro waadmsum_where_test(field=requested_file ); %***************************************************************************; %* NOTE: OUTPUT_DSN_WHERE_STMT has been LOWCASEd by the invoking ; %* environment. this does not affect the state of any embedded ; %* literals because they are LOWCASEd when they are used to generate ; %* the MD5-based _TOKENs. ; %***************************************************************************; if index(output_dsn_where_stmt,"&field") and not index(output_dsn_where_stmt,"&field._token") then do; length test_where test_clause $ 4096; %************************************************************************; %* test only the clauses within the WHERE statement that are for &FIELD ; %************************************************************************; %* strip off the WHERE syntax ; %************************************************************************; test_where=strip(tranwrd(output_dsn_where_stmt ,'where=(' ,' ' ) ); test_where=substr(test_where,1,(length(test_where)-1)); do until (not index(test_where,"&field")); %*********************************************************************; %* assign to TEST_CLAUSE the FIELD-based test ; %*********************************************************************; %* locate the position of FIELD-based test in TEST_WHERE ; %*********************************************************************; if index(substr(test_where,1,index(test_where,"&field")),' and ') then %******************************************************************; %* TEST_WHERE is from prior AND ; %******************************************************************; test_where=substr(test_where,index(test_where,' and ')); else if index(substr(test_where,1,index(test_where,"&field")),' or ') then %******************************************************************; %* TEST_WHERE is from prior OR ; %******************************************************************; test_where=substr(test_where,index(test_where,' or ')); else %******************************************************************; %* TEST_WHERE is from start of string ; %******************************************************************; test_where=test_where; if index(test_where,' and ') then test_clause=substr(test_where,1,(index(test_where,'and')-1)); else if index(test_where,' or ') then test_clause=substr(test_where,1,(index(test_where,'or')-1)); else test_clause=test_where; %*********************************************************************; %* disqualify OUTPUT_DSN_WHERE_STMT if TEST_CLAUSE has an operator ; %* that cannot be used with _TOKENs ; %*********************************************************************; if index(test_clause,'between') or index(test_clause,'?') or index(test_clause,'contains') or index(test_clause,'is null') or index(test_clause,'is missing') or index(test_clause,'like') or index(test_clause,'=*') or index(test_clause,'index(') or index(test_clause,'substr(') or index(test_clause,'same') then disqualify_ind=1; %*********************************************************************; %* discard TEST_CLAUSE from TEST_WHERE ; %*********************************************************************; test_where=strip(tranwrd(test_where,strip(test_clause),' ')); end; if not disqualify_ind then do; %*********************************************************************; %* translate literals into MD5 tokens ; %*********************************************************************; length scan_dlm $ 2 entry_dlm $ 1 entry $ 500 entry_string $ 500 entry_token $ 34 where_var $ 32 where_rel $ 10 where_old_lit $ 500 where_new_lit $ 1000 preamble $ 500 function $ 500 ; %*********************************************************************; %* FIELD can be embedded in a function -- for example, LOWCASE -- ; %* that does not pertain to _TOKENs. remove these functions. ; %*********************************************************************; do while (index(compress(output_dsn_where_stmt) ,'(' || strip("&field") || ')' ) ); %******************************************************************; %* identify the function ; %******************************************************************; preamble=substr(output_dsn_where_stmt ,1 ,index(output_dsn_where_stmt ,strip("&field") )-1 ); function=reverse(scan(reverse(strip(preamble)),1,'(')); %******************************************************************; %* remove the function -- and its parentheses ; %******************************************************************; output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,strip(function) ,' ' ); output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,'( ' || strip("&field") ,strip("&field") ); output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,'(' || strip("&field") ,strip("&field") ); output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,strip("&field") || ' )' ,strip("&field") ); output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,strip("&field") || ')' ,strip("&field") ); output_dsn_where_stmt=compbl(output_dsn_where_stmt); end; where_var=strip("&field") || '_token'; %*********************************************************************; %* make the parse-able string begin after FIELD ; %*********************************************************************; copy_output_dsn_where_stmt=substr(output_dsn_where_stmt ,index(output_dsn_where_stmt ,strip("&field") ) ); copy_output_dsn_where_stmt=strip(tranwrd(copy_output_dsn_where_stmt ,strip("&field") ,' ' ) ); if substr(copy_output_dsn_where_stmt,1,1) eq ')' then copy_output_dsn_where_stmt=strip(substr(copy_output_dsn_where_stmt ,2 ) ); %*********************************************************************; %* the WHERE_REL relational operator is the 2nd word ; %*********************************************************************; where_rel=scan(copy_output_dsn_where_stmt,1,' '); %*********************************************************************; %* dont grab NOT as the only WHERE_REL value by mistake ; %*********************************************************************; if where_rel eq 'not' then where_rel=strip(where_rel) || ' ' || scan(copy_output_dsn_where_stmt,2,' ('); copy_output_dsn_where_stmt=strip(tranwrd(copy_output_dsn_where_stmt ,strip(where_rel) ,' ' ) ); %*********************************************************************; %* WHERE_OLD_LIT is the next quoted (or paren-ed) value ; %*********************************************************************; scan_dlm=substr(copy_output_dsn_where_stmt,1,1); if scan_dlm eq '(' then scan_dlm='()'; where_old_lit=scan(copy_output_dsn_where_stmt,1,strip(scan_dlm)); where_new_lit=where_old_lit; %*********************************************************************; %* translate all the entries in WHERE_OLD_LIT to _TOKEN values ; %*********************************************************************; do i=1 to (1+countc(where_old_lit,',')); entry_string=scan(where_old_lit,i,','); if substr(entry_string,1,1) eq '"' or substr(entry_string,1,1) eq "'" then do; entry_dlm=substr(entry_string,1,1); entry=tranwrd(entry_string,strip(entry_dlm),' '); end; else do; entry_dlm=' '; entry=entry_string; end; %if &wab_normalize_uri_url eq 1 %then %do; entry_token=strip(entry_dlm) || put(md5(strip(compress(lowcase(entry)))),hex32.) || strip(entry_dlm); %end; %else %do; entry_token=strip(entry_dlm) || put(md5(strip(compbl(lowcase(entry)))),hex32.) || strip(entry_dlm); %end; if entry_string eq ' ' then do; entry_string=strip(scan_dlm) || ' ' || strip(scan_dlm); entry_token=strip(scan_dlm) || strip(entry_token) || strip(scan_dlm); end; if where_new_lit eq ' ' then do; where_old_lit=strip(scan_dlm) || ' ' || strip(scan_dlm); where_new_lit=strip(entry_token); end; else where_new_lit=tranwrd(where_new_lit ,strip(entry_string) ,strip(entry_token) ); end; output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,strip("&field") ,strip(where_var) ); output_dsn_where_stmt=tranwrd(output_dsn_where_stmt ,strip(where_old_lit) ,strip(where_new_lit) ); end; end; %mend waadmsum_where_test; %macro waadmsum (meta_in= ,meta_out= ,retcode= ); %local macname; %let macname=&sysmacroname; %let retcode=0; %wautils; %***************************************************************************; %* Change the references in the CONFIG.WAADMSUM metadata from the large ; %* text fields to their tokens. ; %***************************************************************************; data &temp_lib..waadmsum_disqualify &temp_lib..waadmsum_token ; set &meta_in end=eof; disqualify_ind=0; class_var_list=lowcase(class_var_list); output_dsn_where_stmt=lowcase(output_dsn_where_stmt); output_dsn_options_1=lowcase(output_dsn_options_1); output_dsn_options_0=lowcase(output_dsn_options_0); %************************************************************************; %* CLASS vars that contain REQUESTED_FILE ; %************************************************************************; tranwrd_flag=0; if index(class_var_list,'requested_file ') then do; class_var_list=tranwrd(class_var_list ,'requested_file ' ,'requested_file_token ' ); tranwrd_flag=1; end; if index(class_var_list,'requested_file,') then do; class_var_list=tranwrd(class_var_list ,'requested_file,' ,'requested_file_token,' ); tranwrd_flag=1; end; if index(class_var_list,',requested_file,') then do; class_var_list=tranwrd(class_var_list ,',requested_file,' ,',requested_file_token,' ); tranwrd_flag=1; end; if index(class_var_list,',requested_file ') then do; class_var_list=tranwrd(class_var_list ,',requested_file ' ,',requested_file_token ' ); tranwrd_flag=1; end; if tranwrd_flag eq 1 then do; %waadmsum_where_test(field=requested_file); %waadmsum_options_test(lvl=0 ,field=requested_file ); %waadmsum_options_test(lvl=1 ,field=requested_file ); end; %************************************************************************; %* CLASS vars that contain FIRST_REQUESTED_FILE ; %************************************************************************; tranwrd_flag=0; if index(class_var_list,'first_requested_file ') then do; class_var_list=tranwrd(class_var_list ,'first_requested_file ' ,'first_requested_file_token ' ); tranwrd_flag=1; end; if index(class_var_list,'first_requested_file,') then do; class_var_list=tranwrd(class_var_list ,'first_requested_file,' ,'first_requested_file_token,' ); tranwrd_flag=1; end; if index(class_var_list,',first_requested_file,') then do; class_var_list=tranwrd(class_var_list ,',first_requested_file,' ,',first_requested_file_token,' ); tranwrd_flag=1; end; if index(class_var_list,',first_requested_file ') then do; class_var_list=tranwrd(class_var_list ,',first_requested_file ' ,',first_requested_file_token ' ); tranwrd_flag=1; end; if tranwrd_flag eq 1 then do; %waadmsum_where_test(field=first_requested_file); %waadmsum_options_test(lvl=0 ,field=first_requested_file ); %waadmsum_options_test(lvl=1 ,field=first_requested_file ); end; %************************************************************************; %* CLASS vars that contain REFERRER ; %************************************************************************; tranwrd_flag=0; if index(class_var_list,'referrer ') then do; class_var_list=tranwrd(class_var_list ,'referrer ' ,'referrer_token ' ); tranwrd_flag=1; end; else if index(class_var_list,'referrer,') then do; class_var_list=tranwrd(class_var_list ,'referrer,' ,'referrer_token,' ); tranwrd_flag=1; end; else if index(class_var_list,',referrer,') then do; class_var_list=tranwrd(class_var_list ,',referrer,' ,',referrer_token,' ); tranwrd_flag=1; end; else if index(class_var_list,',referrer ') then do; class_var_list=tranwrd(class_var_list ,',referrer ' ,',referrer_token ' ); tranwrd_flag=1; end; if tranwrd_flag eq 1 then do; %waadmsum_where_test(field=referrer); %waadmsum_options_test(lvl=0 ,field=referrer ); %waadmsum_options_test(lvl=1 ,field=referrer ); end; %************************************************************************; %* CLASS vars that contain REFERRER_DOMAIN ; %************************************************************************; tranwrd_flag=0; if index(class_var_list,'referrer_domain ') then do; class_var_list=tranwrd(class_var_list ,'referrer_domain ' ,'referrer_domain_token ' ); tranwrd_flag=1; end; else if index(class_var_list,'referrer_domain,') then do; class_var_list=tranwrd(class_var_list ,'referrer_domain,' ,'referrer_domain_token,' ); tranwrd_flag=1; end; else if index(class_var_list,',referrer_domain,') then do; class_var_list=tranwrd(class_var_list ,',referrer_domain,' ,',referrer_domain_token,' ); tranwrd_flag=1; end; else if index(class_var_list,',referrer_domain ') then do; class_var_list=tranwrd(class_var_list ,',referrer_domain ' ,',referrer_domain_token ' ); tranwrd_flag=1; end; if tranwrd_flag eq 1 then do; %waadmsum_where_test(field=referrer_domain); %waadmsum_options_test(lvl=0 ,field=referrer_domain ); %waadmsum_options_test(lvl=1 ,field=referrer_domain ); end; %************************************************************************; %* CLASS vars that contain SEARCH_TERM ; %************************************************************************; if index(class_var_list,'search_term') then do; class_var_list=tranwrd(class_var_list ,'search_term' ,'search_term_token' ); %waadmsum_where_test(field=search_term); %waadmsum_options_test(lvl=0 ,field=search_term ); %waadmsum_options_test(lvl=1 ,field=search_term ); end; %************************************************************************; %* CLASS vars that contain VISITOR_ID ; %************************************************************************; if index(class_var_list,'visitor_id') then do; class_var_list=tranwrd(class_var_list ,'visitor_id' ,'visitor_id_token' ); %waadmsum_where_test(field=visitor_id); %waadmsum_options_test(lvl=0 ,field=visitor_id ); %waadmsum_options_test(lvl=1 ,field=visitor_id ); end; output &temp_lib..waadmsum_token; if disqualify_ind then output &temp_lib..waadmsum_disqualify; drop tranwrd_flag; run; %if %get_observation_count(indsn=&temp_lib..waadmsum_disqualify) ne 0 %then %do; data _null_; set &temp_lib..waadmsum_disqualify end=eof; call symput('wab_err_msg_' || strip(put(_n_,2.)) ,strip(Label) || ' / ' || strip(output_dsn_where_stmt) ); if eof then call symput('wab_err_msg_cnt',put(_n_,2.)); run; %put %unquote(&wab_error) The following rows in %QCMPRES( CONFIG.WAADMSUM) have output WHERE statements that are not %QCMPRES( compatible) with the use of _TOKENs for CLASS variables. %QCMPRES( Please) contact SAS Technical Support for details.; %do i=1 %to &wab_err_msg_cnt; %put -- &&wab_err_msg_&i; %end; %let retcode=1; %goto exit_waadmsum; %end; %***************************************************************************; %* expand the metadata from its WAADMSUM representation to its WASUMENG ; %* representation for the initial summarizations (PARTITION_BY_DATE eq 1) ; %***************************************************************************; data &temp_lib..partition1; %wametatr(srce_dsn=wasumeng ,retcode=wab_rc ); length start_of $ 30 new_dsn_options $ 4096; set &temp_lib..waadmsum_token (drop=id); id=id; need_re_summary=need_re_summary; partition_by_date=1; %* remove commas from space-delimited fields ; /* C001 */ summary_stmt_opts=translate(strip(proc_summary_options),' ',','); summary_output_opts=translate(strip(output_options),' ',','); output_dsn_options_1=translate(strip(output_dsn_options_1),' ',','); %* create the WASUMENG fields that apply to ALL initial summarizations ; where_stmt=input_where; analysis_vars=analysis_var_list; freq_vars=' '; weight_vars=' '; input_table='&temp_lib..' || strip(source_table) || '_&&_wab_date_&wab_iter'; summary_outdsn_where=output_dsn_where_stmt; %* create the WASUMENG fields that are summary level-specific ; do i=1 to (1+countc(summary_level_list,',')); level=scan(summary_level_list,i,','); if index(id_var_summary_level_list,strip(level)) then id_vars=id_var_list; output_table='&temp_lib..' || strip(results_table) || '_' || strip(level); if level eq 'qtr' then level='quarter'; %* initialize SUMMARY_OUTDSN_OPTS with the DATE-related RENAMEs ; if level eq 'day' then do; start_of='date'; summary_outdsn_opts=' '; end; else do; start_of='start_of_' || strip(level); summary_outdsn_opts='rename=(' || strip(start_of) || '=date)'; end; %* replace 'date' with START_OF in OUTPUT_DSN_OPTIONS ; new_dsn_options=lowcase(strip(tranwrd(upcase(output_dsn_options_1) ,' DATE ' ,' '||strip(start_of)||' ' ) ) ); class_vars=tranwrd(class_var_list,'date',strip(start_of)); * analysis_vars=tranwrd(analysis_var_list,'date',strip(start_of)); /* this is not necessary for p_b_d=1 -- comment out */ /* if index(upcase(summary_outdsn_opts),'RENAME') then do; %* if there is a RENAME, TRANWRD the renamed var ; renamed=lowcase(strip(tranwrd(upcase(summary_outdsn_opts) ,'RENAME=' ,' ' ) ) ); renamed=strip(translate(renamed,' ','(')); renamed=strip(translate(renamed,' ',')')); orig_var=scan(renamed,1,'='); renm_var=scan(renamed,2,'='); class_vars=tranwrd(class_vars,strip(orig_var),strip(renm_var)); analysis_vars=tranwrd(analysis_vars,strip(renm_var),strip(orig_var)); end; */ %* embed RENAMEs into the existing RENAME -- if one exists ; if index(upcase(new_dsn_options),'RENAME') and index(upcase(summary_outdsn_opts),'RENAME') then do; %* isolate the RENAME from within SUMMARY_OUTDSN_OPTS ; start_pos=index(upcase(summary_outdsn_opts),'RENAME='); renamed=strip(substr(summary_outdsn_opts,start_pos)); stop_pos=index(renamed,')'); renamed=substr(renamed,1,stop_pos); renamed=lowcase(strip(tranwrd(upcase(renamed) ,'RENAME=' ,' ' ) ) ); renamed=strip(translate(renamed,' ','(')); renamed=strip(translate(renamed,' ',')')); %* insert RENAMED into NEW_DSN_OPTIONS ; start_pos=index(upcase(new_dsn_options),'RENAME=('); summary_outdsn_opts=substr(new_dsn_options,1,(start_pos+7)) || strip(renamed) || ' ' || substr(new_dsn_options,(start_pos+8)); end; else summary_outdsn_opts=strip(strip(summary_outdsn_opts) || ' ' || strip(new_dsn_options) ); output; id_vars=' '; end; keep id input_table summary_stmt_opts where_stmt partition_by_date id_vars class_vars analysis_vars freq_vars weight_vars output_table summary_outdsn_opts summary_outdsn_where summary_output_opts need_re_summary output_dsn_options_1 output_dsn_options_0 ; run; %*--------------------------------------------------------------------------; %* from each initial summarization record, create a corresponding re- ; %* summarization record and write them both out, creating the WASUMENG ; %* representation of the metadata ; %*--------------------------------------------------------------------------; data &meta_out; set &temp_lib..partition1; length renamed temp_str $1024; out_count+1; id=put((out_count+1),z5.); output; partition_by_date=0; input_table=output_table; ******output_table=tranwrd(output_table,'&temp_lib.','summary'); rename=strip(tranwrd(tranwrd(summary_outdsn_opts,'rename=(',' '),')',' ')); if rename gt ' ' then class_vars=tranwrd(class_vars,scan(rename,1,'='),'date'); if index(upcase(output_dsn_options_1),'RENAME') then do; %* for each RENAME, TRANWRD the renamed var ; start_pos=index(upcase(summary_outdsn_opts),'RENAME='); renamed=strip(substr(summary_outdsn_opts,start_pos)); stop_pos=index(renamed,')'); renamed=substr(renamed,1,stop_pos); renamed=lowcase(strip(tranwrd(upcase(renamed) ,'RENAME=' ,' ' ) ) ); renamed=strip(translate(renamed,' ','(')); renamed=strip(translate(renamed,' ',')')); temp_str=renamed; do i=1 to countc(renamed,'='); %* isolate the current RENAME pair ; eq_pos=index(temp_str,'='); orig_var=compress(substr(temp_str,1,(eq_pos-1))); renm_var=scan(strip(substr(temp_str,(eq_pos+1))),1,' '); class_vars=tranwrd(class_vars ,strip(orig_var) ,strip(renm_var) ); analysis_vars=tranwrd(analysis_vars ,strip(orig_var) ,strip(renm_var) ); %* remove ORIG_VAR from TEMP_STR ; temp_str=strip(substr(temp_str,length(orig_var)+1)); %* remove the equal sign (the 1st remaining char) from TEMP_STR ; temp_str=strip(substr(strip(temp_str),2)); %* remove RENM_VAR from TEMP_STR ; temp_str=strip(substr(temp_str,length(renm_var)+1)); end; end; summary_outdsn_where=' '; %* remove commas from space-delimited fields ; /* C001 */ summary_outdsn_opts =translate(strip(output_dsn_options_0),' ',','); out_count+1; id=put(out_count,z5.); output; keep id input_table summary_stmt_opts where_stmt partition_by_date id_vars class_vars analysis_vars freq_vars weight_vars output_table summary_outdsn_opts summary_outdsn_where summary_output_opts need_re_summary ; run; %EXIT_WAADMSUM:; %mend waadmsum;