/*-------------------------------------------------------------------*/ /* Copyright (c) 1996 by SAS Institute, Inc. Austin, Texas. */ /* NAME: index_est */ /* AUTHOR: Billy Clifford */ /* DATE: 28Feb96 */ /* SUPPORT: saswdc - Billy Clifford/saskla - Karen Armstrong */ /* PURPOSE: estimate the number of pages required for an index */ /* */ /* NOTES: */ /* 1. This program is for V6 (not V7) SAS index files. */ /* 2. The algorithm expects all values to be integers. Thus the need */ /* to use the round function. */ /* 3. This program is based upon the algorithm published in a paper */ /* written by Clifford, et al, for SUGI 14 - Using new SAS */ /* Database Features and Options. */ /*-------------------------------------------------------------------*/ data _null_; length host $5; /*-------------------------------------------------------------------*/ /* User-supplied values */ /*-------------------------------------------------------------------*/ psize = 6000; /* page size of index file */ vsize = 32; /* total number of bytes in the value to be indexed. */ /* For a composite index, this is the sum of the */ /* constituent variable sizes. */ uval = 3000; /* number of unique values */ nrec = 20000; /* total number of records (ie, observations) */ /* NOTE: if uval and nrec are equal, this is assumed */ /* to be a unique index. */ host = "HPUX"; /* name of host: MVS, CMS, HPUX, SUN, PC, AIX, */ /* ALPHA, VMS, MIPS, MAC */ /*-------------------------------------------------------------------*/ /*-------------------------------------------------------------------*/ /* Program-generated values */ /*-------------------------------------------------------------------*/ /* lpages - number of leaf (bottom) index pages */ /* upages - number of upper level (non-leaf) index pages */ /* maxent - maximum number of entries on an upper-level page */ /* totpgs - total number of index pages */ /* bytes - total number of pages converted to bytes */ /* offset - size of offset */ /* levels - number of levels in the index tree */ /* entry - the space needed to store a single indexed value and */ /* all of its RIDs (Record IDs). For a unique index */ /* there is one RID per indexed value. For a non-unique */ /* index there will be unval/nrec RIDs. */ /* noperpg - number of entries per leaf page */ /* cont - continuation leaf pages (for non-unique index) */ /*-------------------------------------------------------------------*/ /*-------------------------------------------------------------------*/ /* Host-specific values */ /*-------------------------------------------------------------------*/ /* shrtsize - sizeof(short) */ /* header - sizeof(struct IDXPAGES) */ /* struct IDXPAGES */ /* { */ /* long nextpage; */ /* long idxid; */ /* short ctr; */ /* short freesht; */ /* short nbrentry; */ /* short free; */ /* char level; */ /* char flag; */ /* char freechar[2]; */ /* }; */ /* longsize - sizeof(long) */ /* rsize - sizeof(struct BASE_RID). This is the size of the */ /* pointer (RID) to the record in the data set stored in */ /* the index. */ /*-------------------------------------------------------------------*/ if (host eq "ALPHA") then do; /* VAX ALPHA host */ shrtsize = 2; header = 28; longsize = 8; rsize = 12; end; else do; /* all other hosts */ shrtsize = 2; header = 20; longsize = 4; rsize = 8; end; if (uval eq nrec) then do; /* Unique index. No offset for a unique index. */ offset = 0; end; else do; /* Non-unique index. Assume each value occurs nrec/uval times. */ /* So there will be nrec/uval RIDs and one value. */ offset = shrtsize; rsize = round( ((nrec / uval) * rsize), 1 ); end; entry = vsize + (offset * 2) + rsize; noperpg = round( ((psize - header) / entry ) , 1 ); if (noperpg lt 1) then do; /*----------------------------------------------------------------*/ /* An entire entry will not fit on a page. This is possible only */ /* for non-unique indexes. Remember that 'entry' is the size of */ /* the indexed value plus all of its RIDs. Here we calculate cont,*/ /* the number of continuation pages needed for the extra RIDs. */ /*----------------------------------------------------------------*/ noperpg = 1; cont = round( (entry / (psize - header)), 1 ); end; else cont = 0; lpages = round( uval / noperpg, 1 ); /* number of leaf pages */ if ((lpages * noperpg) ne uval) then lpages = lpages + 1; lolev = lpages; lpages = lpages + cont; maxent = round ( ((psize - header) / (offset + vsize + longsize)), 1 ); /*-----------------------------------------------------------*/ /* Simulate index creation and count the number of leaf and */ /* non-leaf pages. */ /*-----------------------------------------------------------*/ upages = 0; levels = 1; do while (lolev > 1); curlev = round( ((lolev + maxent - 1) / maxent) , 1); upages = upages + curlev; lolev = curlev; levels = levels + 1; end; totpgs = lpages + upages; bytes = totpgs * psize; /*-----------------------------------------------------------*/ /* All values have been calculated. Create the report. */ /*-----------------------------------------------------------*/ put "================================================================"; put " "; put "Index characteristics:"; put " page size (bytes) = " psize; put " index value size (bytes) = " vsize; put " unique values = " uval; put " total number of values = " nrec; put " number of index levels = " levels; put " "; put "Estimated storage requirements:"; put " number of upper level pages = " upages 8.; put " number of leaf pages = " lpages 8.; put " total number of index pages = " totpgs 8." or " bytes comma14." bytes"; put " "; put "Note: the above estimate does not include storage for the index"; put " directory (usually one page) or the host header page."; put " "; put "Estimation of index size complete."; put " "; put "================================================================"; run;