options source2 linesize=110; /*-------------------------------------------------------------------*/ /* Copyright (c) 1996 by SAS Institute, Inc. Austin, Texas. */ /* NAME: data_est */ /* AUTHOR: sasbcf - Barbara Foster */ /* DATE: 28May96 */ /* SUPPORT: sasbcf - Barbara Foster */ /* PURPOSE: estimate the number of pages required for a data file */ /* */ /* NOTES: */ /* 1. This program is for V6 (not V7) SAS datasets. */ /* 2. The algorithm expects all values to be integers. */ /* 3. This program should be run after you have used the data step */ /* or other proc to create an empty data file, after which you */ /* execute Proc Contents to display the selected page size. */ /* 4. The output sizes do not include the space used by the host */ /* header. (8k on most unix systems) */ /*-------------------------------------------------------------------*/ data _null_; length host $5; /*-------------------------------------------------------------------*/ /* User-supplied values */ /*-------------------------------------------------------------------*/ /* .... from PROC CONTENTS .... */ pgsize = 8192; /* Data Set Page Size from PROC CONTENTS. */ varcnt = 4; /* Variables, count from PROC CONTENTS. */ obslen = 29; /* Observation Length from PROC CONTENTS. */ compress= "NO"; /* Compressed from PROC CONTENTS, YES or NO. */ filefmt = "607"; /* File Format from PROC CONTENTS, 606 or 607. */ /* .... not from PROC CONTENTS .... */ cmplen = 25; /* Estimated obs length for compressed file. */ reccnt = 30000; /* total number of records (ie, observations) */ host = "HPUX"; /* name of host: MVS, CMS, HPUX, SUN, PC, AIX, */ /* ALPHA, VMS, MIPS, MAC */ /*-------------------------------------------------------------------*/ /*-------------------------------------------------------------------*/ /* Program-generated values */ /*-------------------------------------------------------------------*/ /* bytes = total number of pages converted to bytes */ /* dunused = number of unused bytes in data-only page. */ /* laddobs = number of future ADDs that can be in last page. */ /* obsdp = number of obs in data-only page. */ /* obsmdp = number of obs in mixed page, 0 if no mixed page. */ /* totpgs = total number of data file pages */ /*-------------------------------------------------------------------*/ /*-------------------------------------------------------------------*/ /* Set Host-specific values */ /*-------------------------------------------------------------------*/ /* filhdr - size of file header information */ /* pghdrd - size of page header if page is data-only */ /* pghdrh - size of page overhead if page contains header records */ /* rechdr - size of record overhead (0=> 1 bit per record) */ /* varhdr - size of variable (overhead+variable structure size) */ /*-------------------------------------------------------------------*/ /* 1. non-ALPHA, compressed, filefmt is anything. */ if (host ne "ALPHA" and compress eq "YES") then do; filhdr = 468; pghdrh = 28; pghdrd = 28; varhdr = 124; rechdr = 12; end; /* 2. non-ALPHA, not compressed, filefmt=606. */ if (host ne "ALPHA" and compress eq "NO" and filefmt eq "606") then do; filhdr = 304; pghdrh = 28; pghdrd = 28; varhdr = 124; rechdr = 12; end; /* 3. non-ALPHA, not compressed, filefmt=607. */ if (host ne "ALPHA" and compress eq "NO" and filefmt eq "607") then do; filhdr = 304; pghdrh = 28; pghdrd = 16; varhdr = 124; rechdr = 0; end; /* 4. ALPHA, compressed, filefmt is anything. */ if (host eq "ALPHA" and compress eq "YES") then do; filhdr = 784; pghdrh = 48; pghdrd = 48; varhdr = 168; rechdr = 24; end; /* 5. ALPHA, not compressed, filefmt=606. */ if (host eq "ALPHA" and compress eq "NO" and filefmt eq "606") then do; filhdr = 464; pghdrh = 48; pghdrd = 48; varhdr = 168; rechdr = 24; end; /* 6. ALPHA, not compressed, filefmt=607. */ if (host eq "ALPHA" and compress eq "NO" and filefmt eq "607") then do; filhdr = 464; pghdrh = 48; pghdrd = 24; varhdr = 168; rechdr = 0; end; /*-----------------------------------------------------------------*/ /* Use input values (-I) to calculate and set out values (-O) */ /* related to pages which contain only data (ie, no header recs). */ /* ddelbyt = number of delete bytes in data-only page. -O*/ /* ddtspac = number of data bytes in data-only page. -O*/ /* dpgspac = number of non-hdr bytes in data-only page. -O*/ /* donespc = number of bytes used for one data rec(cprs w segtbl)-O*/ /* dunused = number of unused bytes in data-only page. -O*/ /* obsdp = number of obs in data-only page. -O*/ /* obslen = Observation Length from PROC CONTENTS. -I*/ /* pghdrd = size of page header if page is data-only. -I*/ /* pgsize = Data Set Page Size from PROC CONTENTS. -I*/ /* rechdr = size of record overhead (0=> 1 bit per record) -I*/ /*-----------------------------------------------------------------*/ dpgspac = pgsize - pghdrd; if ( rechdr eq 0 ) then donespc = rechdr + obslen; else donespc = rechdr + cmplen; obsdp = int ( dpgspac / donespc ); ddtspac = obsdp * donespc; dunused = dpgspac - ddtspac; if ( rechdr eq 0 ) then do; ddelbyt = int ( (obsdp+7) / 8 ) ; do while ( ddelbyt gt dunused ); obsdp = obsdp - 1; ddtspac = ddtspac - donespc; ddelbyt = int ( (obsdp+7) / 8 ) ; dunused = dpgspac - ddtspac; end; dunused = dunused - ddelbyt; end; else ddelbyt = 0; fullhdr = 0; /*-----------------------------------------------------------------*/ /* Use input values (-I) to calculate and set out values (-O) */ /* related to initial header pages and mixed header/data page. */ /* cpgvuse = byte count used for var namestr recs in cur pg -O*/ /* cpgvspc = byte count avail for var-namestr recs in cur hdrpg -O*/ /* cvarcnt = count of var recs on current page -O*/ /* donespc = number of bytes used for one data rec(cprs w segtbl)-O*/ /* donpspc = bytes used for one data rec, plus 1 del-byte if lean-O*/ /* filhdr = size of file header information, w/o page hdr. -I*/ /* fullhdr = num of header-only pages at start. -O*/ /* pghdrh = size of page overhead if page contains hdr recs -I*/ /* pgsize = Data Set Page Size from PROC CONTENTS. -I*/ /* rvarcnt = count of var recs remaining for mixed page -O*/ /* varcnt = Variable count from PROC CONTENTS. -I*/ /* varhdr = number of bytes for one variable. -I*/ /*-----------------------------------------------------------------*/ if ( rechdr eq 0 ) then donpspc = obslen + 1; else donpspc = donespc; rvarcnt = varcnt; cpgvspc = pgsize - filhdr - pghdrh; cpgvuse = varhdr * varcnt; /* If hdr recs and one data rec won't fit on pg, move to next pg */ do while ( (cpgvuse + donpspc) gt cpgvspc ); cvarcnt = int ( cpgvspc / varhdr ); rvarcnt = rvarcnt - cvarcnt; fullhdr = fullhdr + 1; cpgvuse = varhdr * rvarcnt; cpgvspc = pgsize - pghdrh; end; /* Current page has room for at least one data record */ /*-----------------------------------------------------------------*/ /* Use input values (-I) to calculate and set out values (-O) */ /* related to transition from header to mixed to data-only pages. */ /* fullhdr = num of header-only pages at start. -I*/ /* cpgvspc = byte count avail for var-namestr recs in cur hdrpg -I*/ /* cpgvuse = byte count used for var-namestr recs in cur pg -I*/ /* dmdbyt = number of delete bytes in mixed data page. -O*/ /* dmdspac = number of data bytes in mixed data page. -O*/ /* donespc = number of bytes used for one data rec(cprs w segtbl)-O*/ /* dmunuse = number of unused bytes in mixed data page. -O*/ /* fdatpag = page num of first page containing data-only. -O*/ /* mdatavl = byte count available for data in mixed data page. -O*/ /* obsmdp = number of obs in mixed page, 0 if no mixed page. -O*/ /* pghdrd = size of page header if page is data-only -I*/ /* pgsize = Data Set Page Size from PROC CONTENTS. -I*/ /*-----------------------------------------------------------------*/ dmdbyt = 0; if (cpgvuse eq 0) then do; fdatpag = fullhdr + 1; obsmdp = 0; dmdspac = 0; dmunuse = 0; mdatavl = 0; end; /* There is a mixed page, ie header and data records */ else do; fdatpag = fullhdr + 2; mdatavl = cpgvspc - cpgvuse; obsmdp = int ( mdatavl / donespc ); dmdspac = obsmdp * donespc; dmunuse = mdatavl - dmdspac; if ( rechdr eq 0 ) then do; dmdbyt = int ( (obsmdp+7) / 8 ); do while ( dmdbyt gt dmunuse ); obsmdp = obsmdp - 1; dmdspac = dmdspac - donespc; dmdbyt = int ( (obsmdp+7) / 8 ); dmunuse = mdatavl - dmdspac; end; dmunuse = dmunuse - dmdbyt; end; end; /*-----------------------------------------------------------------*/ /* Use input values (-I) to calculate and set out values (-O) */ /* to be printed for data file size. */ /* bytes = total number of pages converted to bytes -O*/ /* cfulldp = number of data-only pages. -O*/ /* fdatpag = page num of first page containing data-only. -I*/ /* laddobs = number of future ADDs that can be in last page. -O*/ /* obsdp = number of obs in data-only page. -I*/ /* obsmdp = number of obs in mixed page, 0 if no mixed page. -I*/ /* obsnmdp = number of obs beyond mixed page (in data-only pgs). -O*/ /* pgsize = Data Set Page Size from PROC CONTENTS. -I*/ /* totpgs = total number of data file pages -O*/ /*-----------------------------------------------------------------*/ obsnmdp = reccnt - obsmdp; cfulldp = int ( (obsnmdp + obsdp -1) / obsdp ); totpgs = fdatpag + cfulldp - 1; bytes = totpgs * pgsize; laddobs = (cfulldp * obsdp) - obsnmdp; /*-----------------------------------------------------------------*/ /* All values have been calculated. Create the report. ------ */ /* bytes = total number of pages converted to bytes */ /* dunused = number of unused bytes in data-only page. */ /* filefmt = File Format from PROC CONTENTS, 606 or 607. */ /* laddobs = number of future ADDs that can be in last page. */ /* obsdp = number of obs in data-only page. */ /* obslen = Observation Length from PROC CONTENTS. */ /* obsmdp = number of obs in mixed page, 0 if no mixed page. */ /* pgsize = Data Set Page Size from PROC CONTENTS. */ /* reccnt = total number of records (ie, observations) */ /* totpgs = total number of data file pages */ /* varcnt = Variable count from PROC CONTENTS. */ /*-----------------------------------------------------------------*/ put "================================================================"; put " "; put "Data File Characteristics:"; put " Page Size (bytes) = " pgsize; put " Variable Count = " varcnt; put " Observation Length (bytes) = " obslen; put " Observation Count = " reccnt; put " File Format = " filefmt; put " Compressed = " compress; if (compress eq "YES") then do; put " Compressed Obs Len (bytes) = " cmplen; put "Note: Reported values are exact if each obs is specified length."; end; put " "; put "Estimated storage requirements:"; put " total number of file pages = " totpgs 8." or " bytes comma14." bytes"; put " "; put "Note: the estimate does not include storage for the host header page."; if (obsmdp ne 0) then put "Note: First data is in mixed hdr/data page, containing " obsmdp 8." obs"; else put "Note: First data is in data-only page, not in mixed hdr/data page."; put "Note: Each data-only page contains slots for " obsdp 8." observations"; put "Note: Each data-only page includes " dunused 8." bytes wasted space"; put "Note: the last page includes space for " laddobs 8." future ADDs"; put " "; put "Estimation of data file size complete."; put " "; put "================================================================"; run;