From 697151b38e150ad6f7a424b53407c4804f808a43 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Thu, 11 Mar 2010 10:08:58 -0800 Subject: [PATCH] Initial import of SSB --- BUGS | 987 +++++++++++++++++++++++++++++++++++++++++ CHANGES | 33 ++ HISTORY | 535 ++++++++++++++++++++++ PORTING.NOTES | 220 ++++++++++ README | 88 ++++ TPCH_README | 425 ++++++++++++++++++ bcd2.c | 237 ++++++++++ bcd2.h | 11 + bm_utils.c | 589 +++++++++++++++++++++++++ build.c | 800 +++++++++++++++++++++++++++++++++ config.h | 179 ++++++++ dists.dss | 817 ++++++++++++++++++++++++++++++++++ driver.c | 1144 ++++++++++++++++++++++++++++++++++++++++++++++++ dss.ddl | 70 +++ dss.h | 610 ++++++++++++++++++++++++++ dss.ri | 100 +++++ dsstypes.h | 312 +++++++++++++ history.html | 586 +++++++++++++++++++++++++ load_stub.c | 281 ++++++++++++ makefile | 127 ++++++ makefile.suite | 127 ++++++ makefile_win | 85 ++++ permute.c | 175 ++++++++ permute.h | 47 ++ print.c | 1006 ++++++++++++++++++++++++++++++++++++++++++ qgen.c | 469 ++++++++++++++++++++ rnd.c | 262 +++++++++++ rnd.h | 80 ++++ shared.h | 140 ++++++ speed_seed.c | 325 ++++++++++++++ text.c | 313 +++++++++++++ tpcd.h | 103 +++++ varsub.c | 314 +++++++++++++ 33 files changed, 11597 insertions(+) create mode 100644 BUGS create mode 100644 CHANGES create mode 100644 HISTORY create mode 100644 PORTING.NOTES create mode 100644 README create mode 100644 TPCH_README create mode 100644 bcd2.c create mode 100644 bcd2.h create mode 100644 bm_utils.c create mode 100644 build.c create mode 100644 config.h create mode 100644 dists.dss create mode 100644 driver.c create mode 100644 dss.ddl create mode 100644 dss.h create mode 100644 dss.ri create mode 100644 dsstypes.h create mode 100644 history.html create mode 100644 load_stub.c create mode 100644 makefile create mode 100644 makefile.suite create mode 100644 makefile_win create mode 100644 permute.c create mode 100644 permute.h create mode 100644 print.c create mode 100644 qgen.c create mode 100644 rnd.c create mode 100644 rnd.h create mode 100644 shared.h create mode 100644 speed_seed.c create mode 100644 text.c create mode 100644 tpcd.h create mode 100644 varsub.c diff --git a/BUGS b/BUGS new file mode 100644 index 0000000..1f1b2ab --- /dev/null +++ b/BUGS @@ -0,0 +1,987 @@ +# @(#) BUGS 2.1.8.20@(#) +# The following is a list of the various DBGEN/QGEN bugs that have been +# and are being fixed. Each entry is of the form: +# +# Problem #xx: STATUS -- MR ID and OPEN/closed +# followed by a detailed explanation +# TYPE: -- classification of the bug or issue +# SPEC FIX: -- details of any change to the spec +# DBGEN FIX: -- details of any change needed to QGEN/DBGEN +# ANSWER SETS: -- any effect on answer sets +# WORKAROUND: -- temporary fix, if available +# HELP NEEDED: -- any work/assistance required +# AUDITORS NOTIFIED: -- date auditors were notified, if appropriate +# OPENED AGAINST: -- date and effected versions +# CLOSED IN: -- date and fixed version +# +# OPEN BUGS +# ========== +# Problem #33: Parallel load doesn't work under NT +# +# OPEN Feature Requests +# ================= +# Problem #9: would like to include answer set formatting in query templates +# Problem #37: need way to validate DBGEN without large storage requriement +# Problem #58: Need way to track changes from one release to the next +# +# OPEN Documentation Errors +# ================= +# None +#--------------------------------------------------------------------- +#Complete Bug List +#================== +Problem #1: closed +Summary: Q10 returns no rows + Since orders can only be returned (l_returnflag = 'R') after they + have been received, and can't be received in the future, the + number of permissible orders for query 10 tails off early in + 1995. If you are lucky enough to get a parameter substitution + after February '95 (allowed in 2.12.3), things can go "quickly". +SEVERITY: +SPEC FIX: replace 2.12.3 (1) with "DATE is the first day in a + rundomly selected month between the first month of 1993 and the + last month of 1994" +DBGEN FIX: change permisible substitution range for query 10, + parameter 1 +ANSWER SETS: not effected. +WORKAROUND: use a different seed for qgen parameter substitution +HELP NEEDED: +AUDITORS NOTIFIED: +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 (dbgen and qgen) + +Problem #2: closed +Summary: parallelism in load to gen differing data sets +the parallel load code was based on extensible data sets; since + each "extension" made an assumption of scale factor, the data + could end up clustered. Further, since the RNG is + self-modifying, different numbers of extension led to different + final data sets. +SEVERITY: +SPEC FIX: none. +DBGEN FIX: remove -E(xtensible) option and implement pure parallel +load with a known scale factor; rebuild seed files +ANSWER SETS: not effected. (parallelism not implemented for SF <= 1) +WORKAROUND: don't use the parallel load (-C) option to DBGEN +HELP NEEDED: testers needed. +AUDITORS NOTIFIED: yes. +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #3: closed +Summary: some arithmetic tends to overflow at large SF +retailprice tends to SF/10 as SF increases. this can lead to + data corruption in extendedprice and aggregate calculations +SEVERITY: +SPEC FIX: will need rework of 1.3 wrt retailprice calculation +DBGEN FIX: modification to second term of rpb_routine() calcuation +to limit contibution of second term to the maximum seen at + SF=.1 +ANSWER SETS: not effected +WORKAROUND: code retail/extended price calculations as long long; +build smaller data sets +HELP NEEDED: +AUDITORS NOTIFIED: +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #4: closed +Summary: dbgen not ported to NT +SEVERITY: +SPEC FIX: none +DBGEN FIX: need to roll in changes supplied by IBM +ANSWER SETS: not effected +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.1.0 + +Problem #5: closed +Summary: QGEN seed init inconsistent +A prior fix assured that parameter values were query order + independent when a seed was provided on the command line. need + to make this true when no seed is provided +SEVERITY: +SPEC FIX: none +DBGEN FIX: rework seed init loop in qgen.c +ANSWER SETS: not effected +WORKAROUND: supply seeds on command line +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #6: closed +Summary: command line options with abutting arguments mishandled +SEVERITY: +SPEC FIX: none +DBGEN FIX: minor fix to getopt routine in bm_utils.c +ANSWER SETS: not effected +WORKAROUND: separate options and arguments with a space +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #7: closed +Summary: '-O f' asking for new file names twice +SEVERITY: +SPEC FIX: none +DBGEN FIX: rework of set_files() in driver.c +ANSWER SETS: not effected +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #8: closed +Summary: Seed generation taking too long +SEVERITY: +SPEC FIX: N/A +DBGEN FIX: implement "skip and trudge" as discussed +ANSWER SETS: not effected +WORKAROUND: none +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #9: OPEN +Summary: would like to include answer set formatting in query templates +SEVERITY: feature request +SPEC FIX: none +DBGEN FIX: additional flag in qgen() +ANSWER SETS: not effected +WORKAROUND: N/A +HELP NEEDED: asked for reproduction info 25 Oct 95 +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: + +Problem #10: closed +Summary: need to re-introduce ability to do incremental, flat file builds +SEVERITY: feature request +SPEC FIX: none +DBGEN FIX: add -S(tep) option to build one of many partial data sets +ANSWER SETS: not effected +WORKAROUND: N/A +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0 +CLOSED IN: 1.0.1 + +Problem #11: closed +Summary: Row count for first delete at 10/100 is incorrect +SEVERITY: Error +SPEC FIX: None +DBGEN FIX: +ANSWER SETS: No Effect +WORKAROUND: hand edit of first delete file +HELP NEEDED: +AUDITORS NOTIFIED: No +OPENED AGAINST: 1.0.1 +CLOSED IN: 2.0.0 (not sure of precise release) +CLOSED BY: jms@gradientsystems.com + +Problem #12: closed +Summary: Bad default rowcount generated for query 17 +SEVERITY: Error +SPEC FIX: None +DBGEN FIX: corrected rowcnt[] entries to be 1-based +ANSWER SETS: N/A +WORKAROUND: hand edit query or add explicit row count to template +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.0 +CLOSED IN: 1.1.0 + +Problem #13: closed +Summary: Bad expansion of SET_OUTPUT for Teradata +SEVERITY: Error +SPEC FIX: N/A +DBGEN FIX: new macro in tpcd.h +ANSWER SETS: N/A +WORKAROUND: Hand edit query or hardcode output directive in templates +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #14: closed +Summary: Badly formed range deletes +SEVERITY: Error +SPEC FIX: N/A +DBGEN FIX: TBD +ANSWER SETS: N/A +WORKAROUND: hand edit delete files +HELP NEEDED: asked for reproduction info 25 Oct 95 +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 2.0.0 (not sure of precise release) +CLOSED BY: jms@gradientsystems.com + +Problem #15: closed +Summary: in a multi-stage load, parent tables are not properly named +when parent and child are build simultaneously +SEVERITY: Error +SPEC FIX: N/A +DBGEN FIX: reworked tdef[].name in pr_X_Y routines for master/detail +tables +ANSWER SETS: N/A +WORKAROUND: Build master/detail tables separately +HELP NEEDED: +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #16: closed +Summary: update generation at large scale factors produced the wrong number +of rows due to overflow of 32-bit integer +SEVERITY: BUG +SPEC FIX: N/A +DBGEN FIX: corrected order of operations in row count calcuation in +driver.c +ANSWER SETS: N/A +WORKAROUND: use 64 bit integers +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #17: closed +Summary: comment fields may be truncated when using columnar output, due to +rounding/truncation in the length calculation +SEVERITY: BUG +SPEC FIX: N/A +DBGEN FIX: add ceil() calls around all PR_VSTR() calls in print.c +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +CLOSED IN: 1.1.0 + +Problem #18: closed +Summary: the output format for identifier fields in columnar output is +unneccessarily large, and is inconsistant +SEVERITY: minor +SPEC FIX: N/A +DBGEN FIX: revised PR_BCD2 macro +ANSWER SETS: N/A +WORKAROUND: avoid columnar output, or rework macro +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0 +OPENED BY: jenn@torolab2.vnet.ibm.com +CLOSED IN: 1.1.0A +CLOSED BY: jms@informix.com + +Problem #19: closed +Summary: the case statement used to decipher substitution points in the +query template allowed extraneous :'s to re-initialize the +parameter substitution +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: rework flag switch in qgen.c to explicitly call out numerics +ANSWER SETS: N/A +WORKAROUND: be sure that there are no "unknown" flags in the template +HELP NEEDED: none +AUDITORS NOTIFIED: yes +OPENED AGAINST: 1.0.1 +OPENED BY: jenn@torolab2.vnet.ibm.com +CLOSED IN: 1.1.0A +CLOSED BY: jms@informix.com + +Problem #20: closed +Summary: parameter substitution values were not effected by small changes +in seed values +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: add UnifInt() calls to RNG init in qgen.c +ANSWER SETS: N/A +WORKAROUND: be sure seed values provide sufficient randomness in EQT +HELP NEEDED: none +AUDITORS NOTIFIED: yes +OPENED AGAINST: 1.1.0 +OPENED BY: alain_crolotte@elsegundoca.attgis.com +CLOSED IN: 1.1.0B +CLOSED BY: jms@informix.com + +Problem #21: closed +Summary: parameter logging doesn't properly handle the variable length of +the substitution list +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: assure null termination of param list and bound the output +loop that logs parameter usage +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0B +OPENED BY: +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #22: closed +Summary: parameter output for Q11 can overflow default formatting at very +large volumes +SEVERITY: bug +SPEC FIX: N/A +DBGEN FIX: expand format string to %11.10f +ANSWER SETS: N/A +WORKAROUND: hand code queries for large volumes +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0B +OPENED BY: francois@ip.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #23: closed +Summary: typos in variant 14c +SEVERITY: +SPEC FIX: N/A +DBGEN FIX: corrected query template +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0B +OPENED BY: francois@ip.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #24: closed +Summary: macro PR_DATE was hard-coded to print t->alpha even though a +target was passed in as a parameter +SEVERITY: minor +SPEC FIX: N/A +SOURCE FIX: re-worked macro to properly use its arguments +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0A +OPENED BY: Robert.Lane@eng.sun.com +CLOSED IN: dbgen 1.1.0B +CLOSED BY: jms@informix.com + +Problem #25: closed +Summary: typos in variant 10a +SEVERITY: +SPEC FIX: N/A +DBGEN FIX: corrected query template +ANSWER SETS: N/A +WORKAROUND: none +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0B +OPENED BY: francois@ip.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #26: closed +Summary: the version numbers for QGEN and DBGEN do not match +SEVERITY: minor +SPEC FIX: N/A +SOURCE FIX: unified version numbers starting with 1.1.0C +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: no +OPENED AGAINST: 1.1.0B (or 1.1.0C, depending) +OPENED BY: Robert.Lane@eng.sun.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #27: closed +Summary: correcting typos in 7, 9, 13 +SEVERITY: minor +SPEC FIX: N/A +SOURCE FIX: fixed them +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0C (pre-release) +OPENED BY: tblank@vnet.ibm.com +CLOSED IN: 1.1.0C +CLOSED BY: jms@informix.com + +Problem #28: closed +Summary: Seed generation fails with SF > 1000 due to 32 bit integer +arithmetic used to verify "divisible-ness" of data set +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: TBD +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0C +OPENED BY: alain_colotte@elsegundoca.ncr.com +CLOSED IN: 1.3.0 +CLOSED BY: jms@gradientsystems.com + +Problem #29: closed +Summary: Compile time errors on Solaris 2.5.1 and SunOS +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: Solaris fixed by renaming lineitem field from extended to + eprice; SunOS problem documented in Porting.Notes +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0D +OPENED BY: jms@informix.com +CLOSED IN: 1.2.0 +CLOSED BY: jms@informix.com + +Problem #30: closed +Summary: Cryptic comments in dists.dss +SEVERITY: flaw +SPEC FIX: N/A +SOURCE FIX: Cleaned up the comments in the file +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.0 +OPENED BY: francois@sizing.com +CLOSED IN: 1.2.3 ALPHA 1 +CLOSED BY: jms@informix.com + +Problem #31: closed +Summary: Inconsistant handling of fopen() failures +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: introduced OPEN_CHECK macro (defined in dss.h) +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.0 +OPENED BY: schiefer@ca.ibm.com +CLOSED IN: 1.3.0 +CLOSED BY: jms@gradientsystems.com + +Problem #32: closed +Summary: Path separators were hard-coded +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: introduced PATH_SEP in config.h +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.0 +OPENED BY: +CLOSED IN: 1.3.0 +CLOSED BY: jms@gradientsystems.com + +Problem #33: OPEN +Summary: Parallel load doesn't work under NT +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: +ANSWER SETS: N/A +WORKAROUND: use -S option to build each step independently +HELP NEEDED: none +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.1.0 +OPENED BY: +CLOSED IN: +CLOSED BY: + +Problem #34: closed +Summary: P_NAME not properly populated +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: Corrected color selection logic in agg_str() +ANSWER SETS: NFI for 1.x since it effect answer sets +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.3 +OPENED BY: schiefer@ca.ibm.com +CLOSED IN: 2.0.0 +CLOSED BY: jms@gradientsystems.com + +Problem #35: closed +Summary: mk_sparse() returning bad orderkeys +SEVERITY: bug +SPEC FIX: N/A +SOURCE FIX: corrected logic in mk_sparse() and bcd2_bin() +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.3.0 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.3.1 +CLOSED BY: jms@gradientsystems.com + +Problem #36: closed +Summary: a_rnd() doesn't mask properly, uses small 'alphabet' +SEVERITY: bug +SPEC FIX: Corrected 4.2.2.6 to reflect 64 character set +SOURCE FIX: changed mask in a_rnd() from 067 to 077 +ANSWER SETS: NFI for 1.x since answers would be effected +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.3 +OPENED BY: pek@elsegundoca.ncr.com +CLOSED IN: 2.0.0 +CLOSED BY: jms@gradientsystems.com + +Problem #37: OPEN +Summary: need way to validate DBGEN without large storage requriement +SEVERITY: Feature Request +SPEC FIX: N/A +SOURCE FIX: Provide vrf_xxx routine to generate checksums +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.2.3 +OPENED BY: jms@gradientsystems.com +CLOSED IN: +CLOSED BY: + +Problem #38: closed +Summary: need to be able to generate specific update set +SEVERITY: Feature Request +SPEC FIX: N/A +SOURCE FIX: Update update generation to use -S option +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.3.1 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 2.0.0 (not certain of fix version) +CLOSED BY: jms@gradientsystems.com + +Problem #39: closed +Summary: README for dbgen is out of date +SEVERITY: Documentation error +SPEC FIX: N/A +SOURCE FIX: Rewrite of README +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0.6b +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 2.0.0 +CLOSED BY: jms@gradientsystems.com + +Problem #40: closed +Summary: O_CUSTKEY is generated out of range at 10GB +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: Correction of CUST_MORTALITY calculation +ANSWER SETS: Unknown +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0.7 +OPENED BY: wayne.smith@intel.com +CLOSED IN: 2.0.0.8 +CLOSED BY: jms@gradientsystems.com + +Problem #41: closed +Summary: V2 appears slower than V1 +SEVERITY: Bug +SPEC FIX: +SOURCE FIX: Used NthElement() in row_stop() +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0.8 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 2.01a +CLOSED BY: jms@gradientsystems.com + +Problem #42: closed +Summary: Dual declaration of articles causes C++ compilation error +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: Duplicate declaration removed +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0 +OPENED BY: jpm@informix.com +CLOSED IN: 2.0.0a +CLOSED BY: jms@gradientsystems.com + +Problem #43: closed +Summary: Subselect wild card not consistant with spec +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: Query templates corrected +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0 +OPENED BY: jpm@informix.com +CLOSED IN: 2.0.0a +CLOSED BY: jms@gradientsystems.com + +Problem #44: closed +Summary: small money values incorrect +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: reworked PR_xxx macros +ANSWER SETS: new answer included for Q22 +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 2.0.0 +OPENED BY: ac4@elsegundoca.ncr.com +CLOSED IN: 1.0.1 +CLOSED BY: jms@gradientsystems.com + +Problem #45: closed +Summary: L_ORDERKEY/O_ORDERKEY incorrect +SEVERITY: Bug +SPEC FIX: N/A +SOURCE FIX: corrected pointer arithmetic in print.c +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: jpm@informix.com +CLOSED IN: 1.0.1a +CLOSED BY: jms@gradientsystems.com + +Problem #46: closed +Summary: L_ORDERKEY/O_ORDERKEY incorrect +SEVERITY: Dup (see #45) +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.0.1a +CLOSED BY: jms@gradientsystems.com + +Problem #47: closed +Summary: QGEN parameter substitution not random +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected varsub RANDOM usage to reflect seed file removal +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: wayne.smith@intel.com +CLOSED IN: 1.0.1a +CLOSED BY: jms@gradientsystems.com + +Problem #48: closed +Summary: QGEN parameter substitution not random for Q21 +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected varsub to only reference nations2 distribution +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1a +OPENED BY: wayne.smith@intel.com +CLOSED IN: 1.0.1b +CLOSED BY: jms@gradientsystems.com + +Problem #49: closed +Summary: Extraneous trailing separator in delete files +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: special-cased the handling of deletes using PR_KEY +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1b +OPENED BY: wayne.smith@intel.com +CLOSED IN: 1.0.1c +CLOSED BY: jms@gradientsystems.com + +Problem #50: closed +Summary: qgen not generating valid parameter log files for defaults +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected params/default reference +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: cta@elsegundoca.ncr.com +CLOSED IN: 1.0.1d +CLOSED BY: jms@gradientsystems.com + +Problem #51: closed +Summary: inconistent/invariant substitutions in Q16, Q17, Q19 +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: corrected "brand" selection to make order irrelevent +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 1.0.1 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.0.1d +CLOSED BY: jms@gradientsystems.com + +Problem #52: closed +Summary: qgen seeds make parameter substitutions position dependant + The current scheme uses an individual RNG stream for each query, and seeds + all streams identically. Accordingly, two queries that use the same domain + for the same parameter will always have the same value (e.g., q9 and q20). +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: seed the individual streams with the sequence of random + numbers produced by the global seed value +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 1.0.1a/1.1.0a (990727) +CLOSED BY: jms@gradientsystems.com +CHECKED BY: qa52 + +Problem #53: closed +Summary: number of lineitems in update files no longer varies + The RNG is not being set at the start of update generation; accordingly + the original data (including rowcounts) is being "regenerated" +SEVERITY: +SPEC FIX: N/A +SOURCE FIX: +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: jennc@ca.ibm.com +CLOSED IN: 990810 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: qa53 + +Problem #54: closed +Summary: segmented update files fail when rows per file is small + A round off error could cause the wrong number of rows to be output to a + given update file +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: correction to driver.c and print.c to use division and modulo + to produce comparably sized files regardless of divisor +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: v-larryk@microsoft.com +CLOSED IN: 1.0.1a/1.1.0a (990727) +CLOSED BY: jms@gradientsystems.com (using code from larry) +CHECKED BY: + +Problem #55: closed +Summary: -S generates bad data when used with updates + The RNG is not being properly set +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: added the appropriate offset to the RNG, and simplified the + update generation code +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: jms@gradientsystems.com +CLOSED IN: 990816 +CLOSED BY: jms@gradientsystems.com (using code from larry) +CHECKED BY: qa55 + +Problem #56: closed +Summary: Need way to specify dists.dss location on the command line +SEVERITY: FEATURE +SPEC FIX: N/A +SOURCE FIX: added -b switch to driver.c and qgen.c +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: clevine@microsoft.com +CLOSED IN: 990830 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #57: closed +Summary: Need way to remove all DBGEN output unless there is an error +SEVERITY: FEATURE +SPEC FIX: N/A +SOURCE FIX: added -q switch to driver.c and changed verbose if's +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: clevine@microsoft.com +CLOSED IN: 990830 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00058: OPEN +Summary: Need way to track changes from one release to the next +SEVERITY: FEATURE +SPEC FIX: N/A +SOURCE FIX: reintroduce and automate the CHANGES file. Require MRs for + all source code changes +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: None +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990708 +OPENED BY: mpoess@us.oracle.com +CLOSED IN: +CLOSED BY: +CHECKED BY: + +Problem #00059: closed +Summary: extra comma in Q2 template +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: Template corrected +ANSWER SETS: N/A +WORKAROUND: None. +HELP NEEDED: None. +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990830 +OPENED BY: jpm@informix.com +CLOSED ON: 990908 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + + +Problem #00060: closed +Summary: segmented inserts/deletes creating an extra file +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: Adding in missed change from original roll-in +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990830 +OPENED BY: larryk@microsoft.com +CLOSED ON: 990111 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00061: closed +Summary: 64-bit support under DigUnix leads to math errors +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: Calculation of dRange in rnd.c now uses double cast +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 990830 +OPENED BY: nramesh@us.oracle.com +CLOSED ON: 000131 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00062: closed +Summary: bad update rollover after 1000 refreshes + This test uses tpcH scale 0.01. We've encountered + an situation in which dbgen doesn't generate + the correct data for delete files delete.1000 and + above. In particular, file delete.1000 contains + keys to be deleted that have never been loaded. + Because of this problem, keys that should have been + deleted never are causing duplicate unique values + to appear in the incremental loads after we cycle + from the 4000th incremental update back around starting + again with the 1st one. +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 19991101 +OPENED BY: Roger.McNicol@sybase.com +CLOSED ON: 20000509 +CLOSED BY: jms +CHECKED BY: N/A + +Problem #00063: closed +Summary: update copyright notice + N/A +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: N/A +OPENED BY: jms@gradientsystems.com +CLOSED ON: 20000131 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00064: closed +Summary: permute() introduce 0 selection in [1..50] for q16 + N/A +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: rework permute() to be 1-based +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 20000413 +OPENED BY: lorna@permetrics.com +CLOSED ON: 20000414 +CLOSED BY: jms@gradientsystems.com +CHECKED BY: N/A + +Problem #00065: OPEN +Summary: permute correction caused dataset changes + initial fix for #64 caused qa failures due to data set changes. New fix + is limited to query parameter substitution changes and has passed qa +SEVERITY: BUG +SPEC FIX: N/A +SOURCE FIX: N/A +ANSWER SETS: N/A +WORKAROUND: N/A +HELP NEEDED: N/A +AUDITORS NOTIFIED: N/A +OPENED AGAINST: 20000511 +OPENED BY: jms +CLOSED ON: N/A +CLOSED BY: N/A +CHECKED BY: N/A diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..e7d1247 --- /dev/null +++ b/CHANGES @@ -0,0 +1,33 @@ +# @(#)CHANGES 2.1.8.18 +08 Dec 1998 Release 2.0.0 +15 Mar 1998 Release 2.0.0 pre-release +06 Feb 1998 Release 1.3.1 +15 Dec 1996 Release 1.2.0 +08 Aug 1996 Release 1.1.0D +01 May 1996 Release 1.1.0C +29 Jan 1996 Release 1.1.0B +23 Jan 1996 Release 1.1.0A +19 Dec 1995 Release 1.1.0 +11 Sep 1995 Release 1.0.1 +13 Mar 1995 Release 1.0 + + +Changes between 990830 and 991011 +File Bug ID +---- ------ +s.2.sql 00059 Removing extra comma +s.dss.h 00061 +s.config.h 00061 +s.driver.c 00060 adding missed change from Larry +s.makefile 00058 +s.rnd.c 00061 +s.HISTORY 00061 +s.history.html 00061 +s.mr.sh 00058 miscelaneous corrections +s.bug.template 00058 removing extraneous spaces +s.bug.template changed titles + + +Changes between 199910 and 000511 +File Bug ID +---- ------ diff --git a/HISTORY b/HISTORY new file mode 100644 index 0000000..8258af8 --- /dev/null +++ b/HISTORY @@ -0,0 +1,535 @@ +# @(#)HISTORY 2.1.8.3 +Changes as of 10/11/99 + -- versions: TPCH 1.2.0a, TPCR 1.1.0a + -- Correction to segmented updates that was causing extra file to be + generated + -- Porting changes for DigUnix +Changes as of 08/28/99 + -- versions: TPCH 1.2.0, TPCR 1.1.0 + -- reduced parameter substitution range for Q18 + -- added new option to specify location of dists file (-b) + -- added DBGEN option to suppress all output (-q) +Changes as of 08/16/99 + -- versions: TPCH 1.1.0a, TPCR 1.0.1e + -- prevent "reuse" of original data in update files + -- correction to lint target in makefile.suite + -- removal of vestigal l_partkey predicate from 21.sql + -- reorder lineitem/order join in q5 + -- removal of table aliases from 2.sql + -- randomize seeding of qgen RNG to close bug 52 + -- correct possible round off error in segmented update files + -- corrected soft copy answer set for Q22 + -- corrected percision of answer set for Q19 +Changes as of 07/08/99 + -- versions: TPCH 1.1.0, TPCR 1.0.1 + -- WORKLOAD must be set to either TPCH or TPCR in the makefile + -- unneeded reference to part table removed from q21 template +Changes as of 06/04/99 + -- version 1.0.1d + -- Restarted version numbering to match specification revisions for + TPC-H and TPC-R + -- Corrected answer set for for Q13 + -- Corrected parameter substitutions for Q16, Q17, Q19, Q20, Q21, Q22 + -- Corrected RNG initialization in qgen.c + -- added adhoc.c adhoc.h to code base to support randomized data sets; + currently disabled + -- replaced calls to UnifInt() row_stop with call to NthElement() + -- Corrected a problem that caused small negative money values to print as + a positive value + -- Simplication of PR_xxx macros + -- QGEN building correct parameter logs again + +****************** +* NOTE NOTE NOTE * +****************** +Below this line the file refers to TPC-D which was retired in favor of +TPC-H and TPC-R. Since the new speicifications are numbered from 1.0.0 +the program version was reset. +****************** +* NOTE NOTE NOTE * +****************** + +Changes as of 01/05/99 + -- version 2.0.1 + -- added 1999 to the copyright notice + -- corrected C++ compilation problem + -- sub-select phrasing corrected in Q4, Q21, Q22 + -- added support for segmenting update files (contributed by Larry Kemp, HP) +Changes as of 12/08/98 + -- version 2.0.0 + -- removed permute.h from clean target in makefile +Changes as of 11/17/98 + -- version 2.0.0 Alpha 8 + -- corrected o_custkey overrun bug + -- removed upper bound on -C command option + -- added static permute.h to distribution to match the specification +Changes as of 10/23/98 + -- version 2.0.0 Alpha 7 + -- removed references to DSS_SEED and SEED_TAG + -- minor query template cleanup + -- V2 answer sets added + -- correction to hd_sparse for SF > 300 + -- added static declaration to row types in gen_tbl to fix update problem + -- permuted params to Q22 +Changes as of 5/19/98 + -- version 2.0.0 Alpha6b + -- removed trailing apostrophe from dists.dss nouns for Tandem loader + -- corrected mk_sparse() problem with alpha6 + -- added 64b support for NCR/Metaware + -- corrected revision problem with 2.0.0.6 +Changes as of 5/7/98 + -- version 2.0.0 Alpha6 + -- corrected generation of parent/child tables in parallel + -- renamed ORDER table to ORDERS table + -- revision of DBGEN synced with revision of 2.0 specification + -- portability changes to process termination provided by John Matzka + -- portability changes for Watcom C provided by Andrew Eisenberg + -- indentation of specifications/templates now matches + -- queries now include a consistant header format +Changes as of 4/28/98 + -- version 2.0.0 Alpha5 + -- NO RELEASE OF ALPHA 5 ; skipped to sync spec/DBGEN revision levels +Changes as of 4/6/98 + -- version 2.0.0 Alpha4 + -- corrected parallel table generation + -- minor corrections to query templates + -- portability changes for HP +Changes as of 3/24/98 + -- version 2.0.0 Alpha3 + -- include substitution parameters for Q22 + -- correct substitution parameters for Q16 under AIX + -- include permute.h until unix/NT makefile fix + -- correct orderkey generation +Changes as of 3/20/98 + -- version 2.0.0 Alpha2 + -- correct runtime malloc error from bad INIT_HUGE macro + -- improve pseudo text distribution in comments + -- fix problem with parallelism of data gen + -- re-enable generation of parent/child tables + -- remove recombinaton code for parallel flat files +Changes as of 3/11/98 + -- version 2.0.0 Alpha1 + -- removed the TIME table + -- removed the need for seed files + -- made 1GB the validation database size + -- add pseudo text support in comments + -- correct character selection in a_rnd() + -- correct population of P_NAME + -- removed unclaimed variants + -- added new queries 18-22, replaced Q13 +Changes as of 2/6/98 + -- version 1.3.1 + -- Revised 64 bit support to clean up bcd2_bin()and mk_sparse() + -- Add 64b support for NT +Changes as of 12/31/97 + -- version 1.3.0 + -- support for seed generation > 1TB (data gen still to be tested) + -- rework of 64b support + -- added bcd support for subtraction, comparison, modulo + -- added 1998 to the copyright notice + -- clarified comments in dists.dss + -- corrected substitution problem in Q11 + -- standardized fopen() error messages with OPEN_CHECK() + -- introduced PATH_SEP in config.h to allow changes in path separators +Changes as of 12/15/96 + -- version 1.2.0 + -- corrected typos in queries 8a, 8c, 8d, 11a, 12F and 14F, 17a + -- added variant 15c + -- defined MAX_SCALE and MIN_SCALE; issued error messages for SF > 1000 + since implementation is incomplete + -- seed file generation can now be resumed with dbgen -R ... + -- corrected slight compile bug under Solaris 2.5.1 + -- documented compile problems under SunOS +Changes as of 8/1/96 + -- version 1.1.0D + -- included new variants for queries 8 and 15 + -- re-introduced answer sets in the source tree +Changes as of 5/1/96 + -- version 1.1.0C + -- unified version numbering of DBGEN and QGEN + -- updated BUGS list + -- removed FAQ from soft appendix; web site will keep the current + version of the FAQ + -- added 1996 to the copyright notice + -- corrected bug in PR_DATE macro; NO CHANGE TO DATA SET + -- properly initialize param values for cleaner logging + -- adjusted output format of Q11 partam to allow scaling to 1TB + -- corrected typos in variant 14c + -- corrected data type for YEAR in variant 8c + -- corrected typos in variant 10a + -- added variant 8d +Changes as of 1/23/96 + -- qgen version 1.1.0B + -- include support for ANSI semantics + -- improved patch for seed sensetivity +Changes as of 1/23/96 + -- updated BUGS list + -- dbgen version 1.1.0A + -- patch to limit BCD2 fields to 12 characters for columnar output + -- qgen version 1.1.0A + -- patch to fix the "unknown flag" problem + -- patch to fix the seed sensetivity problem +Changes as of 12/19/95 + -- updated BUGS list + -- dbgen version 1.1.0 + -- upped default value of MAX_CHILDREN to 1000 + -- corrected naming of detail tables in incremental load + -- corrected range delete output + -- forced delete files to truncate existing files + -- removed fixed size tables from seed generation + -- corrected overflow problem with large scale seed generation + -- allow date generation as MM-DD-YY based on config.h #define + -- correct truncation problem with columnar output in PR_VSTR() + -- added support for Windows NT + -- added PLATFORM macro to makefile, removed platform defines from + config.h + -- removed MAX_CHILDREN define from config.h (set to 1000 in dss.h) + -- qgen version 1.1.0 + -- correct SET_OUTPUT macro to TDAT + -- use %ld in output for q17; portability + -- add support for SQLSERVER database dialect + -- add support for SYBASE database dialect + -- adjust parameter ranges for Q1, Q3, Q6 + -- add -T/-t option to usage summary + -- added support for Windows NT +Changes as of 09/01/95 + -- qgen version 1.0.1 + -- formalized version numbering + -- -p now generates correct query permutations + -- added separate verion number for qgen + -- corrected Q3 substitution problem + -- updated permissible range for Q10 + -- corrected rowcount_dflt and the MAX row indicator (-1) + -- expanded param logging to include all possible parameters + -- allowed qgen's -d option to be used at all scale factors + -- made parameter substitution permutation-independent + -- added qgen suppport for END_TRAN (-E) and DFLT_NUM (-N) + -- correct handling of :n directive + -- added more complete explanation of QGEN to README + -- rename of random to rndm, for portability + -- dbgen version 1.0.1 + -- formalized version numbering + -- inclusion of SF=1 seed file + -- correct typo in usage() update example + -- patch to driver.c to allow correct updates + -- documentation change to README to clarify seed/stage/update + intereaction + -- corrected minor glitch in "open failed" error msg in print.c + -- added missing line continuation to makefile.suite + -- seed files are now based on scale factor and number of generators + -- seed files now hold seeds for one "step" of a given build + -- clean up of parallel load routines + -- inclusion of faster seed generation routines from Susanne Englert + -- removed the -E(xisting) option + -- assure proper scaling of O_CUSTKEY + -- corrected default update percentage + -- proper handling of child tables with '-O f' + -- removed seed files from the distribution + -- modified rpb_routine() to limit contribution of partkey in + retailprice + -- added '-S(tep)' option to allow multi-stage loads + -- roll in of 32 bit speed_seed routines from Dick Shelton + -- miscelaneous typo corrections in the documentation + -- cleanup of usage output +Changes as of 05/08/95 + -- version 1.0 + -- add Teradata defines to tpcd.h for QGEN + -- add :c to query templates for database CONNECT syntax + -- add examples of DBGEN and QGEN usage to README + -- add -T option to qgen to allow time able usage + -- query template names only requre .sql suffix, rest is arbitrary +Changes as of 03/13/95 + -- version 9.1 + -- surround DBNAME with ifndef in config.h + -- remove -DDBNAME from makefile.suite + -- sync varchar handling with 9.1 draft +Changes as of 02/21/95 + -- version 9.0a + -- fixed bug in qgen that incorrectly included rnd.h + -- included revised DDL with changes for char/varchar and l_quantity + -- updated DBGEN help message to include new single table options for + order/lineitem and part/partsupp + -- included handling for multi-set seed files TPCDSEED.xxx + -- generated seeds up through 400GB; headed to 1TB! + -- ANSI lint cleanup; more needed + -- UF2 now defaults to key lists; use "-O r" to generate key ranges + also note, this routine this routine does NOT use the BCD2_* + routines. As a result, it WILL fail if the keys being deleted + exceed 32 bits. Since this would require ~660 update iterations, + this seems an acceptable oversight +Changes as of 01/19/95 + -- version 9.0 + -- allowed command line seeding of RNG for QGEN + -- order and number of params in QGEN now matches + presentation in spec + -- fixed bug in time table format of O_ORDERDATE + -- changed l_QUANTITY to FLOAT in dss.ddl + -- reworked QGEN options to be more useful + -- allowed creation of sparse keys beyond 32 bits (for 1TB) + -- removed unused '#ifdef' and associated code + -- allowed independent generation of master/detail tables + (eg, order/lineitem) +Changes as of 12/06/94 + -- version 8.6 + -- fixed renaming of flat files for child tables + -- various documentation fixes + -- added naming convention section to Porting.Notes + -- added -DIBM flag to config.h + -- synced up QGEN with draft 8.1 +Changes as of 10/25/94 + -- version 8.5a + -- corrected bug in columnar output of pr_supp + -- added pr_drange to generate a list of order keys to be + deleted instead of generating SQL + -- added '-O d' to generate range delete as SQL + -- updated default values for QGEN to sync with spec 8.1 + -- corrected MK_SPARSE to reflect groups of 8 + -- corrected a bug in o_orderstatus + -- regenerated seed files for SF in [1,10] + -- ANSI cleanup (primarily function declarations) +Changes as of 10/11/94 + -- version 8.5 + -- remove deletes/inserts to other than order/lineitem + -- increased cardinality for part.type part.container + -- '-r' argument is now integer; percentage in basis points + -- initial roll-in of new update scheme + -- added BBB comments to supplier table +Changes as of 9/27/94 + -- version 8.4 + -- all money calculations now use integer math. This should + bring everyone's data sets into exact aggreement. +Changes as of 9/21/94 + -- version 8.3b + -- fixed handling of MAX_STREAM + -- added floor function to RPRICE bridge + -- misc lint cleanup (type fixes, new prototypes, etc.) + -- MONEY format becomes lf for DOS + -- further cleanup of PR_VSTR and its length argument + -- change to parameter generation for Q6 to allow for float + discount +Changes as of 9/15/94 + -- version 8.3a + -- isolated MONEY format for Unisys (Lf) using DOS + -- make sure all arguments to MAKE_MONEY were double's + -- rolled in NEW_PTEXT to allow Berni to experiment +Changes as of 9/12/94 + -- version 8.3 + -- added -T n and -T r to usage to match getopt() and README + -- changed PR_MONEY to remove leading blanks + -- included revised DDL from Berni + -- included some MVS portability fixes in re malloc.h + -- cleaned up error messages in qgen and made #define ofp usage + universal + -- additional DOS portability changes + -- added {c,a}len to provide specific length for columnar + output of varchar + -- added PR_VSTR to handle varchar printing under MVS + -- fixed bit masking in a_rnd and cleaned up prototype match + with V_STR + -- PR_MONEY now used %Lf + -- added revised pseudo text under NEW_PTEXT ifdef for + experiments +Changes as of 9/09/94 + -- version 8.2 + -- l_discount and l_tax are now fractional (per teleconference) + -- money calculations moved to scaled integer math to clean up + answer sets + -- changed PR_FLT() to PR_MONEY to clarify usage + -- portability changes for SYBASE: dbname --> db_name + STATUS --> DBGEN_STATUS + -- added nations2 to dists.dss to handle qgen needs for now + -- reintroduced #ifndef DOS + -- reintroduced U2200 define to control kill_load() + -- broke out nation and region separately in -T option + -- updated dss.ddl based on mail from Berni +Changes as of 8/31/94 + -- version 8.1 + -- scaling for clerks needed to be 1000 (was 100) + -- added qgen parameter for scale + -- changed qgen parameter from s)tream to p)ermutation + -- synced qgen paramter values with 8.0 spec + -- corrected duplications in dists.dss +Changes as of 8/24/94 + -- version 8.0 + -- added sparse keys to lineitem/order + -- added varchar generation for comments/addresses + -- added variable lineitems/orders + -- removed ifdef for normalized code_tables + -- included code for parameter generation and template->EQT + routines + -- updated README and Porting.Notes to reflect QGEN + -- included DDL and RI examples from Berni +Changes as of 6/15/94 + -- version 7.0b (numbers now match spec revsion) + -- rework of code tables to properly map nation/region; when + compiled with -DCODE_TABLES distributions are taken from + code.dss and two additional fields are generated for + customers and suppliers, [cs]_ncode and [cs]_rcode, + immediately following [cs]_region + -- replaced ifdef's around DEAD_DATA with opposites. DEAD_DATA + is now the default + -- worked through code to see that it conformed to 7.0 + specification + -- adjusted scale factors/rowcounts for 1 GB == sf1 + -- brought help message in line with current code + -- fixed order per customer at 10 + -- make suppkey scalable in lineitem/partsupp +Changes as of 4/25/94 + -- version 1.5 + -- added the customers with no orders; Compile with -DDEAD_DATA + to activate the change. + -- added the code table for nation and region; + Compile with -DCODE_TABLES to activate the change. +Changes as of 3/17/94 + -- version 1.41 + -- completed implementation of JULIAN_DAY after talks with Berni + -- misc cleanup in usage/README files + -- removed all tabs and capped line length at 75 + -- added -n option to allowing naming of inline-loaded database +Changes as of 3/16/94 + -- version 1.4 + -- prottyped julian day/month for query re-write work. Compile + with -DJULIAN_DAY to enable + -- removed gen_times() from driver.c + -- added VMS ifdef to config.h to clean up fork/signal issues + -- added ICL ifdef to config.h to clean up getopt() issues + -- changed header file references to config.h from machine.h +Changes as of 3/2/94 + -- version 1.31 + -- corrected format of C_NAME to match S_NAME and O_CLERK + -- re-allowed fractional scale factors < 1 (updates not + contiguous) + -- added DSS_CONFIG environemnt variable + -- reworked read_dist() to look for DSS_DIST in DSS_CONFIG + -- updated the README file +Changes as of 2/16/94 + -- version 1.3 + -- added command line options for parallel load and data set + expansion + -- changed dists.dss delimiter to | for portability + -- limited scale factors to integer values + -- added command line option for seed file generation + -- added all seed files to distribution for SFs 1 - 10 + -- moved machine.h to config.h and added MAX_CHILDREN define + -- added 'f' flag to options to allow renaming of output files + -- added generation of SQL delete statements to match updates + (Note: updates are still single-threaded; -C is cleared + by -U) + -- corrected field sizing in dsstypes.h typedefs to match v 6.4 + -- update percentage default set to 1% +Changes as of 12/3/93 + -- version 1.2 + -- added command line option to adjust update percentage + -- fixed update gneration for proper primary key ordering + -- renamed UUSR/PRC to RUSSIA/CHINA in dists.dss + -- cleaned up phone number generation to be consistant regard- + less of order of evaluation + -- adjusted size of lineitem comment to bring data in line with + 100 MB == SF=1 +Changes as of 10/15/93 + -- added command line option for update data creation + -- miscelaneous porting and cleanup changes + -- reworked table generation to allow reuse for updates + -- added comment field to tdefs structure + -- added load_state and store_state to sync data gen and + update gen +Changes as of 7/26/93 + -- combined loader and header stubs in load_stubs.c + -- separated Revision History (this file) from README + -- simplified makefile + -- removed redundancies from colors distribution + -- added getopt() for portability + -- created Porting.Notes + -- adjusted scaling rules + -- added help option to the command line +Changes as of 2/26/93 + -- combined all typedefs in one header: dsstypes.h + -- combined flat file generation in print.ec + -- combined typedef population in build.ec + -- added -P to control rowcnt scaling (P for percentage) + -- added -D option for Direct data generation and added + appropriate hooks in tdefs[] structure + -- added -F option for flat file generation + -- reused -T option (use -P 0.1 to build test size database) + now accepts suboptions c,o,p,s for single table builds. + -- dropped -M option (scaling is now by rowcount) + -- added -O option for optional controls. Currently defined: + -O t -- generate optional time table a join fields in + order/lineitem + -O h -- generate headers for flat file output + -O m -- generate fixed column-length output + -- removed dynamic memory allocation, redundant calls to + UnifInt, etc to improve performance +Changes as of 1/12/92 + -- julian() changed to handle orders->orderdate correctly + -- rflag distributions corrected in dists.dss + -- sea, gold removed from color distribution to clean up substring + problems + -- part->number and supplier-> adjusted for 1-based indexing + -- time->day changed to be day of month, not day of year + -- t.week changed to be week in year, not day of week +Changes as of 11/18/92 + -- checked line length and tab for transmission + -- another chapter in the portability wars. added #include + "machine.h" to dss.h (which is included by everyone else). Any + machine particular porting changes should go here. + -- fixed fixed-field formats to prevent double printing + -- expanded PR_FLT formats to %010.2 +Changes as of 10/21/92 + -- added fixed format and column header handling; users of headers + will have to define the header functions to be called in + int (*tdefs.header)() +Changes as of 10/09/92: + -- added ansi prototypes and recompiled with gcc -ansi. users may + need to change the CC definition in the makefile and the contents + of CFLAGS to reflect their particular ansi compiler. + -- replaced all int references with long + -- replaced all float references with double + -- found and fixed odate/julian problem TS mentioned in 10/09 phone + call + +Changes as of 9/09/92: + -- Park/Miller random number generator included + -- clerk scaling changed to 100 * scale + -- parts.name always built from 5 selections from colors set + -- test scaling changed to ~60MB (TEST_SCALING == 10) + -- logarithmic scaling removed + -- mfgcost removed and retail/supplier cost bounds adjusted + -- agg_str memory leak fixed + -- independent RNG streams on a per column basis + +This is the revised data generator for DSS. + +The rewrite tried to accomplish three things: (1) identify and isolate +all the implicit assumptions about limits, bounds, ranges, distribu- +tions, etc.; (2) standardize the way any given table was generated/ +printed to ease understanding and maintenance; (3) bring the generator +in line with the current work of the committee and the excellent spec +the Indira put together; (4) provide an easy way to adjust distribu- +tions, string contents and to facilitate experimentation to get a +better idea of the impact of data population changes. + +The files included are: + +driver.c ------- main and the calling routines for the generators +dist.c ------- should really be named dss_util.c; misc routines +customer.c ------- generation and print routines for customer table +orders.c ------- "" "" order table +parts.c ------- "" "" parts/partsupp +suppliers.c ------- "" "" suppliers table +time.c ------- "" "" time table +customer.h ------- associate header files; contain structure + definitions +dss.h dss.h holds the large number of assumptions and +orders.h values that have been used as IFDEFs. +parts.h +suppliers.h +time.h +dists.dss ------- string selections and weights; used to build + distributions + +Running make will create an executable (using the compiler flags in +CFLAGS, the ld flags in LDFLAGS and the libraries in LIBS [-O, -s, +and -lm by default]) which will create flat files suitable for dbload. +t + diff --git a/PORTING.NOTES b/PORTING.NOTES new file mode 100644 index 0000000..8054913 --- /dev/null +++ b/PORTING.NOTES @@ -0,0 +1,220 @@ +# @(#)PORTING.NOTES 2.1.8.1 + +Table of Contents +================== +1. General Program Structure +2. Naming Conventions and Variable Usage +3. Porting Procedures +4. Compilation Options +5. Customizing QGEN +6. Further Enhancements +7. Known Porting Problems +8. Reporting Problems + +1. General Program Structure + +The code provided with TPC-H and TPC-R benchmarks includes a database +population generator (DBGEN) and a query template translator(QGEN). It +is written in ANSI-C, and is meant to be easily portable to a broad variety +of platforms. The program is composed of five source files and some +support and header files. The main modules are: + + build.c: each table in the database schema is represented by a + routine mk_XXXX, which populates a structure + representing one row in table XXXX. + See Also: dss_types.h, bm_utils.c, rnd.* + print.c: each table in the database schema is represented by a + routine pr_XXXX, which prints the contents of a + structure representing one row in table XXX. + See Also: dss_types.h, dss.h + driver.c: this module contains the main control functions for + DBGEN, including command line parsing, distribution + management, database scaling and the calls to mk_XXXX + and pr_XXXX for each table generated. + qgen.c: this module contains the main control functions for + QGEN, including query template parsing. + varsub.c: each query template includes one or more parameter + substitution points; this routine handles the + parameter generation for the TPC-H/TPC-R benchmark. + +The support utilities provide a generalized set of functions for data +generation and include: + + bm_utils.c: data type generators, string management and + portability routines. + + rnd.*: a general purpose random number generator used + throughout the code. + + dss.h: + shared.h: a set of '#defines' for limits, formats and fixed + values + dsstypes.h: structure definitions for each table definition + +2. Naming Conventions and Variable Usage + +Since DBGEN will be maintained by a large number of people, it is +particularly important to observe the coding, variable naming and usage +conventions detailed here. + + #define + -------- + All #define directives are found in header files (*.h). In general, + the header files segregate variables and macros as follows: + rnd.h -- anything exclusively referenced by rnd.c + dss.h -- general defines for the benchmark, including *all* + extern declarations (see below). + shared.h -- defines related to the tuple definitions in + dsstypes.h. Isolated to ease automatic processing needed by many + direct load routines (see below). + dsstypes.h -- structure definitons and typedef directives to + detail the contents of each table's tuples. + config.h -- any porting and configuration related defines should + go here, to localize the changes necessary to move the suite + from one machine to another. + tpcd.h -- defines related to QGEN, rather than DBGEN + + extern + ------ + DBGEN and QGEN make extensive use of extern declarations. This could + probably stand to be changed at some point, but has made the rapid + turnaround of prototypes easier. In order to be sure that each + declaration was matched by exactly one definition per executatble, + they are all declared as EXTERN, a macro dependent on DECLARER. In + any module that defines DECLARER, all variables declared EXTERN will + be defined as globals. DECLARER should be declared only in modules + containing a main() routine. + + Naming Conventions + ------------------ + defines + o All defines use upper case + o All defines use a table prefix, if appropriate: + O_* relates to orders table + L_* realtes to lineitem table + P_* realtes to part table + PS_* relates to partsupplier table + C_* realtes to customer table + S_* relates to supplier table + N_* relates to nation table + R_* realtes to region table + T_* relates to time table + o All defines have a usage prefix, if appropriate: + *_TAG environment variable name + *_DFLT environment variable default + *_MAX upper bound + *_MIN lower bound + *_LEN average length + *_SD random number seed (see rnd.*) + *_FMT printf format string + *_SCL divisor (for scaled arithmetic) + *_SIZE tuple length + +3. Porting Procedures + +The code provided should be easily portable to any machine providing an +ANSI C compiler. + -- Copy makefile.suite to makefile + -- Edit the makefile to match the name of your C compiler + and to include appropriate compilation options in the CFLAGS + definition + -- make. + +Special care should be taken in modifying any of the monetary calcu- +lations in DBGEN. These have proven to be particularly sensitive to +portability problems. If you decide to create the routines for inline +data load (see below), be sure to compare the resulting data to that +generated by a flat file data generation to be sure that all numeric +conversions have been correct. + +If the compile generates errors, refer to "Compilation Options", below. +The problem you are encountering may already have been addressed in the +code. + +If the compile is successful, but QGEN is not generating the appropriate +query syntax for your environment, refer to "Customizing QGEN", below. + +For other problems, refer to "Reporting Problems" at the end of this +document. + +4. Compilation Options + +config.h and makefile.suite contain a number of compile time options intended +to make the process of porting the code provided with TPC-H/TPC-R as easy as +possible on a broad range of platforms. Most ports should consist of reviewing +the possible settings described in config.h and modifying the makefile +to employ them appropriately. + +5. Customizing QGEN + +QGEN relies on a number of vendor-specific conventions to generate +appropriate query syntax. These are controlled by #defines in tpcd.h, +and enabled by a #define in config.h. If you find that the syntax +generated by QGEN is not sufficient for your environment you will need +to modify these to files. It is strongly recomended that you not change +the general organization of the files. + +Currently defined options are: + +VTAG -- marks a variable substitution point [:] +QDIR_TAG -- environent variable which points to query templates + [DSS_QUERY] +GEN_QUERY_PLAN -- syntax to generate a query plan ["Set Explain On;"] +START_TRAN -- syntax to begin a transaction ["Begin Work;"] +END_TRAN -- syntax to end a transaction ["Commit Work;"] +SET_OUTPUT -- syntax to redirect query output ["Output to"] +SET_ROWCOUNT -- syntax to set the number of rows returned + ["{return %d rows}"] +SET_DBASE -- syntax to connect to a database + +6. Further Enhancements + +load_stub.c provides entry points for two likely enhancements. + +The ld_XXXX routines make it possible to load the +database directly from DBGEN without first writing the database +population out to the filesystem. This may prove particularly useful +when loading larger database populations. Be particularly careful about +monetary amounts. To assure portability, all monetary calcualtion are +done using long integers (which hold money amounts as a number of +pennies). These will need to be scaled to dollars and cents (by dividing +by 100), before the values are presented to the DBMS. + +The hd_XXXX routines allow header information to be written before the +creation of the flat files. This should allow system which require +formatting information in database load files to use DBGEN with only +a small amount of custom code. + +qgen.c defines the translation table for query templates in the +routine qsub(). + +varsub.c defines the parameter substitutions in the routine varsub(). + +If you are porting DBGEN to a machine that is not supports a native word +size larger that 32 bits, you may wish to modify the default values for +BITS_PER_LONG and MAX_LONG. These values are used in the generation of +the sparse primary keys in the order and lineitem tables. The code has +been structured to run on any machine supporting a 32 bit long, but +may be slightly more efficient on machines that are able to make use of +a larger native type. + +7. Known Porting Problems + +The current codeline will not compile under SunOS 4.1. Solaris 2.4 and later +are supported, and anyone wishing to use DBGEN on a Sun platform is +encouraged to use one of these OS releases. + + +8. Reporting Problems + +The code provided with TPC-H/TPC-R has been written to be easily portable, +and has been tested on a wide variety of platforms, If you have any +trouble porting the code to your platform, please help us to correct +the problem in a later release by sending the following information +to the TPC D subcommittee: + + Computer Make and Model + Compiler Type and Revision Number + Brief Description of the problem + Suggested modification to correct the problem + diff --git a/README b/README new file mode 100644 index 0000000..547195d --- /dev/null +++ b/README @@ -0,0 +1,88 @@ +Note: In our research paper we use the SSB instead of SSBM +Version of 2/28/10: +Cardinality of supplier fixed to follow benchmark spec: now 2000*SF + (previously was 10000*SF, in error): line 226, driver.c +Type of time value changed from long to time_t (now 64 bits on Windows): + line 688, build.c +Building in Visual Studio 2008: + Use Win32 console project, not using precompiled headers, + in Properties>C/C++>CommandLine, additional options: + /D "SSBM" /D "DBNAME" /D "DB2" (for DB2) +Building using makefile_win: set for DB2 build: + nmake -f makefile_win + (Change DATABASE symbol for other database) + +SSBM dbgen readme: + +SSBM is based on TPC-H dbgen source. The coding style and architecture +follows the TPCH dbgen. The original TPCH dbgen code stays untouched and +all new code related to SSBM dbgen follow the "#ifdef SSBM" statements. + +For original detailed TPC-H documentation, please refer TPCH_README +document under the same directory. Here we just list few things that +are specific to SSBM. + + +1. How is SSBM DBGEN built? + +Same idea as TPCH dbgen setup, which requires user to create an +appropriate makefile, using makefile.suite as a basis. Make sure to +use "SSBM" for the workload variable. + +Type "make" to compile and to generate the SSBM dbgen executable. +Please refer to Porting.Notes for more details and for +suggested compile time options. + +Note: If you want to generate the data files to a diffent directory, you should +copy the dbgen executable as well as the dists.dss file to that directory. + +2. How to generate SSBM data files? +To generate the dimension tables: + +(customer.tbl) +dbgen -s 1 -T c + +(part.tbl) +dbgen -s 1 -T p + +(supplier.tbl) +dbgen -s 1 -T s + +(date.tbl) +dbgen -s 1 -T d + +(fact table lineorder.tbl) +dbgen -s 1 -T l + +(for all SSBM tables) +dbgen -s 1 -T a + +To generate the refresh (insert/delete) data set: +(create delete.[1-4] and lineorder.tbl.u[1-4] with refreshing fact 0.05%) +dbgen -s 1 -r 5 -U 4 + + where "-r 5" specifies refreshin fact n/10000 + "-U 4" specifies 4 segments for deletes and inserts + +At this moment there is no QGEN for SSBM. So +the command line options related to those features won't apply. + +3. What are the changes upon TPC-H dbgen + +changes made upon original TPC-H dbgen + +1. removed snowflake tables such as nation and region (done) +2. removed the partsupply table (done) +3. removed the order table (done) +4. renamed the fact table as Lineorder and added/removed many fields +( done) +5. added the date dimension table (done) +6. adding and removing fields in dimension tables (done) +7. have data cross reference for supplycost, revenue in lineorder (done) +8. apply the refreshing only to lineorder table (done) + +The command line option keeps the same as TPC-H dbgen (The -T options +are changed to reflect different set of tables) + +===================== End of README ======================================== + diff --git a/TPCH_README b/TPCH_README new file mode 100644 index 0000000..9c8225f --- /dev/null +++ b/TPCH_README @@ -0,0 +1,425 @@ +# @(#)README 2.1.8.1 + +Table of Contents +=================== + 0. What is this document? + 1. What is DBGEN? + 2. What will DBGEN create? + 3. How is DBGEN built? + 4. Command Line Options for DBGEN + 5. Building Large Data Sets with DBGEN + 6. DBGEN limitations and compliant usage + 7. Sample DBGEN executions + 8. What is QGEN? + 9. What will QGEN create? +10. How is QGEN built? +11. Command Line Options for QGEN +12. Query Template Syntax +13. Sample QGEN executions and Query Templates +14. Environment variable +15. Version Numbering in DBGEN and QGEN + +0. What is this document? + +This is the general README file for DBGEN and QGEN, the data- +base population and executable query text generation programs +used in the TPC-H and TPC-R benchmarks. It covers the proper use +of DBGEN and QGEN. For information on porting the utility to your +particular platform see Porting.Notes. + +1. What is DBGEN? + +DBGEN is a database population program for use with the TPC-H and +TPC-R benchmarks. It is written in ANSI 'C' for portability, and has +been successfully ported to over a dozen different systems. While the +TPC-H and TPC-R specifications allow an implementor to use any utility +to populate the benchmark database, the resultant population must exactly +match the output of DBGEN. The source code has been provided to make the +process of building a compliant database population as simple as possible. + +2. What will DBGEN create? + +Without any command line options, DBGEN will generate 8 separate ascii +files. Each file will contain pipe-delimited load data for one of the +tables defined in the TPC-H and TPC-R database schemas. The default tables +will contain the load data required for a scale factor 1 database. By +default the file will be created in the current directory and be +named .tbl. As an example, customer.tbl will contain the +load data for the customer table. + +When invoked with the '-U' flag, DBGEN will create the data sets to be +used in the update functions and the SQL syntax required to delete the +data sets. The update files will be created in the same directory as +the load data files and will be named "u_
.set". The delete +syntax will be written to "delete.set". For instance, the data set to +be used in the third query set to update the lineitem table will be +named "u_lineitem.tbl.3", and the SQL to remove those rows will be +found in "delete.3". The size of the update files can be controlled +with the '-r' flag. + +3. How is DBGEN built? + +Create an appropriate makefile, using makefile.suite as a basis, +and type make. Refer to Porting.Notes for more details and for +suggested compile time options. + +4. Command Line Options for DBGEN + +DBGEN's output is controlled by a combination of command line options +and environment variables. Command line options are assumed to be single +letter flags preceded by a minus sign. They may be followed by an +optional argument. + +option argument default action +------ -------- ------- ------ +-h Display a usage summary + +-f none Force. Existing data files will be + overwritten. + +-F none yes Flat file output. + +-D none Direct database load. ld_XXXX() routines + must be defined in load_stub.c + +-s 1 Scale of the database population. Scale + 1.0 represents ~1 GB of data + +-T
Generate the data for a particular table + ONLY. Arguments: p -- part/partuspp, + c -- customer, s -- supplier, + o -- orders/lineitem, n -- nation, r -- region, + l -- code (same as n and r), + O -- orders, L -- lineitem, P -- part, + S -- partsupp + +-O d Generate SQL for delete function + instead of key ranges + +-O f Allow over-ride of default output file + names + +-O h Generate headers in flat ascii files. + hd_XXX routines must be defined in + load_stub.c + +-O m Flat files generate fixed length records + +-O r Generate key ranges for the UF2 update + function + +-O v Verify data set without generating it. + +-r 10 Scale each udpate file to the given + percentage (expressed in basis points) + of the data set + +-v none Verbose. Progress messages are + displayed as data is generated. + +-n Use database for in-line load + +-C Use separate processes to + generate data + +-S Generate the th part of a multi-part load + or update set + +-U Create a specified number of data sets + in flat files for the update/delete + functions + +-i Split the inserted rows in an refresh pair + between files + +-d Split the deleted rows in an refresh pair + between files + +5. DBGEN limitations and compliant usage + +DBGEN is meant to be a robust population generator for use with the +TPC-H and TPC-R benchmarks. It is hoped that DBGEN will make it easier +to experiment with and become proficient in the execution of TPC decision +support benchmarks. As a result, it includes a number of command line +options which are not, strictly speaking, necessary to generate a compliant +data set for a TPC-D run. In addition, some command line options will accept +arguments which result in the generation of NON-COMPLIANT data sets. Options +which should be used with care include: + +-s -- scale factor. TPC-H/TPC-R runs are only compliant when run against SF's + of 1, 10, 30, 100, 300, 1000 .... +-r -- refresh percentage. TPC-H/TPC-R runs are only compliant when run with + -r 10, the default. + +6. Sample DBGEN executions + +DBGEN has been built to allow as much flexibility as possible, but is +fundementally intended to generate two things: a database population +against which the queries in TPC-H and TPC-R can be run, and the updates +that are used during the update functions in TPC-H and TPC-R. Here are +some sample uses of DBGEN. + + 1. To generate the database population for the qualification database + dbgen -s 1 + 2. To generate the lineitem table only, for a scale factor 10 database, + and over-write any existing flat files: + dbgen -s 10 -f -T L + 4. To geterate a 100GB data set in 1GB pieces, generate only the part and + partsupplier tables, and include some progress reports along the way: + dbgen -s 100 -S 1 -C 100 -T p -v (to generate the first 1GB file) + dbgen -s 100 -S 2 -C 100 -T p -v (to generate the second 1GB file) + (and so on, incrementing the argument to -S each time) + 5. To generate the update files needed for a 4 stream run of the throughput + test at 100 GB, using an existing set of seed files from an 8 process + load: + dbgen -s 100 -U 4 -C 8 + + +7. What is QGEN? + +QGEN is a query generation program for use with the TPC-H and TPC-R benchmarks. +It is written in ANSI 'C' for portability, and has been successfully +ported to over a dozen different systems. While the benchmark specifications +allow an implementor to use any utility to create the benchmark query +sets, QGEN has been provided to make the process of building +a benchmark implementation as simple as possible. + +8. What will QGEN create? + +QGEN is a filter, triggered by :'s. It does line-at-a-time reads of its +input (more on that later), scanning for :foo, where foo determines the +substitution that occurs. Including: + +: replace with the appropriate value for parameter +:b replace with START_TRAN (from tpcd.h) +:c replace with SET_DBASE (from tpcd.h) +:n replace with SET_ROWCOUNT() (from tpcd.h) +:o replace with SET_OUTPUT (from tpcd.h) +:q replace with query number +:s replace with stream number +:x replace with GEN_QUERY_PLAN (from tpcd.h) + +Qgen takes an assortment of command line options, controlling which of these +options should be active during the translation from template to EQT, and a +list of query "names". It then translates the template found in +$DSS_QUERY/.sql and puts the result of stdout. + +Here is a sample query template: + +{ Sccsid: @(#)1.sql 9.1.1.1 1/25/95 10:51:56 } +:n 0 +:o +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from lineitem +where l_shipdate <= date '1998-12-01' - interval :1 day +group by l_returnflag, l_linestatus +order by l_returnflag, l_linestatus; + +And here is what is generated: +$ qgen -d 1 + +{return 0 rows} + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from lineitem +where l_shipdate <= date('1998-12-01') - interval (90) day to day +group by l_returnflag, l_linestatus +order by l_returnflag, l_linestatus; + +See "Query Template Syntax" below for more detail on converting your prefered query +phrasing for use with QGEN. + +9. How is QGEN built? + +QGEN is built by the same makefile that creates DBGEN. If the makefile +is successfully creating DBGEN, no further compilation modifications +should be necessary. You may need to modify some of the options which +allow QGEN to integrate with your preferred query tool. Refer to +Porting.Notes for more detail. + +10. Command Line Options for QGEN + +Like DBGEN, QGEN is controlled by a combination of command line options +and environment variables (See "Environment Variables", below for more +detail). Command line options are assumed to be single +letter flags preceded by a minus sign. They may be followed by an +optional argument. + +option argument default action +------ -------- ------- ------ +-c none Retain comments in translation of template to + EQT + +-d none Default. Use the parameter substitutions + required for query validation + +-h Display a usage summary + +-i Use contents of to init a query stream + +-l Save query parameters to + +-n Use database for queries + +-N Always use default rowcount, and ignore :n directives + +-o Save query n's output in /n. + Uses -p option, and uses :o tag + +-p Use the query permutation defined for + stream . If this option is + omited, EQT will be generated for the + queries named on the command line. + +-r Seed the rnadom number generator with + +-s Set scale to for parameter + substitutions. + +-t Use contents of to complete a query + stream + +-T none Use time table format for date substitution + +-v none Verbose. Progress messages are + displayed as data is generated. + +-x none Generate a query plan as part of query + execution. + +11. Query Template Syntax + +QGEN is a simple ASCII text filter, meant to translate query generalized +query syntax("query template") into the executable query text(EQT) re- +quired by the benchmarks. It provides a number of shorthands and syntactic +extensions that allow the automatic generation of query parameters and some +control over the operation of the benchmark implementation. + +QGEN first strips all comments from the query template, recognizing both +{comment} and --comment styles. Next it traverses the query template +one line at a time, locating required substitution points, called +parameter tags. The values substituted for a given tag are summarized +below. QGEN does not support nested substitutions. That is, if +the text substituted for tag itself contains a valid tag the second tag +will not be expanded. + +Tag Converted To Based on +=== ============ ======== +:c database ;(1) -n from the command line +:x set explain on;(1) -x from the command line +: paremeter +:s stream number +:o output to outpath/qnum.stream;(1) + -o from command line, -s from + command line +:b BEGIN WORK;(1) -a from comand line +:e COMMIT WORK(1) -a from command line +:q query number +:n sets rowcount to be returned + to , unless -N appears on the command line + +Notes: + (1) This is Informix-specific syntax. Refer to Porting.Notes for + tailoring the generated text to your database environment. + +12. Sample QGEN executions and Query Templates + +QGEN translates generic query templates into valid SQL. In addition, it +allows conditional inclusion of the commands necessary to connect to a +database, produce diagnostic output, etc. Here are some sample of QGEN +usage, and the way that command line parameters and the query templates +interact to produce valid SQL. + + Template, in $DSS_QUERY/1.sql: + :c + :o + select count(*) from foo; + :x + select count(*) from lineitem + where l_orderdate < ':1'; + + 1. "qgen 1", would produce: + select count(*) from foo; + select count(*) from lineitem + where l_orderdate < '1997-01-01'; + Assuming that 1 January 1997 was a valid substitution for parameter 1. + + 2. "qgen -d -c dss1 1, would produce: + database dss1; + select count(*) from foo; + select count(*) from lineitem + where l_orderdate < '1995-07-18'; + Assuming that 18 July 1995 was the default substitution for parameter 1, + and using Informix syntax. + + 3. "qgen -d -c dss1 -x -o somepath 1, would produce: + database dss1; + output to "somepath/1.0" + select count(*) from foo; + set explain on; + select count(*) from lineitem + where l_orderdate < '1995-07-18'; + Assuming that 18 July 1995 was the default substitution for parameter 1, + and using Informix syntax. + + +13. Environment Variables + +Enviroment variables are used to control features of DBGEN and QGEN +which are unlikely to change from one execution to another. + +Variable Default Action +------- ------- ------ +DSS_PATH . Directory in which to build flat files +DSS_CONFIG . Directory in which to find configuration files +DSS_DIST dists.dss Name of distribution definition file +DSS_QUERY . Directory in which to find query templates + +14. Version Numbering in DBGEN and QGEN + +DBGEN and QGEN use a common version numbering algorithm. Each executable +is stamped with a version number which is displayed in the usage messages +available with the '-h' option. A version number is of the form: + + V.R.P.M + | | | | + | | | | + | | | | + | | | -- modification: alphabetic, incremented for any trivial changes + | | | to the source (e.g, porting ifdef's) + | | ---- patch level: numeric, incremented for any minor bug fix + | | (e.g, qgen parameter range) + | ------- release: numeric, incremented for each minor revision of the + | specification + |-------- version: numeric, incremented for each major revision of the + specification + +An implementation of TPC-H or TPC-R is valid only if it conforms to the +following version usage rules: + + -- The Version of DBGEN and QGEN must match the integer portion of the + current specification revision + +The current revisions are: + DBGEN: 1.0.1 + QGEN: 1.0.1 diff --git a/bcd2.c b/bcd2.c new file mode 100644 index 0000000..30038ba --- /dev/null +++ b/bcd2.c @@ -0,0 +1,237 @@ +/* @(#)bcd2.c 2.1.8.1 */ +/* + * bcd.c: conversion routines for multi-byte arithmetic + * + * defined routines: + * bin_bcd2(long binary, long *low_res, long *high_res) + * bcd2_bin(long *dest, long bcd) + * bcd2_add(long *bcd_low, long *bcd_high, long addend) + * bcd2_sub(long *bcd_low, long *bcd_high, long subend) + * bcd2_mul(long *bcd_low, long *bcd_high, long multiplier) + * bcd2_div(long *bcd_low, long *bcd_high, long divisor) + * long bcd2_mod(long *bcd_low, long *bcd_high, long modulo) + * long bcd2_cmp(long *bcd_low, long *bcd_high, long compare) + */ +#include +#include "bcd2.h" /* for function prototypes */ + +#define DIGITS_PER_LONG 7 +#define WORD_DIVISOR 10000000 +#define GET_DIGIT(num, low, high) \ + ((num) >= DIGITS_PER_LONG)? \ + (high & (0xF << (4 * ((num) - DIGITS_PER_LONG)))) \ + >> (((num) - DIGITS_PER_LONG) * 4): \ + (low & (0xF << (4 * (num)))) >> ((num) * 4) +#define SET_DIGIT(value, num, low, high) \ + if ((num) >= DIGITS_PER_LONG) \ + { \ + *high &= \ + (0xFFFFFFF ^ (0xF << (4 * ((num) - DIGITS_PER_LONG)))); \ + *high |= (value << (4 * ((num) - DIGITS_PER_LONG))); \ + } \ + else \ + { \ + *low = (*low & (0xFFFFFFF ^ (0xF << (4 * (num))))); \ + *low |= (value << (4 * (num))); \ + } +int +bin_bcd2(long binary, long *low_res, long *high_res) +{ + char number[15], + *current; + int count; + long *dest; + + *low_res = *high_res = 0; + sprintf(number, "%014ld", binary); + for (current = number, count=13; *current; current++, count--) + { + dest = (count < DIGITS_PER_LONG)?low_res:high_res; + *dest = *dest << 4; + *dest |= *current - '0'; + } + return(0); +} + +int +bcd2_bin(long *dest, long bcd) +{ + int count; + long mask; + + count = DIGITS_PER_LONG - 1; + mask = 0xF000000; + *dest = 0; + while (mask) + { + *dest *= 10; + *dest += (bcd & mask) >> (4 * count); + mask = mask >> 4; + count -= 1; + } + return(0); +} + +int +bcd2_add(long *bcd_low, long *bcd_high, long addend) +{ + long tmp_lo, tmp_hi, carry, res; + int digit; + + bin_bcd2(addend, &tmp_lo, &tmp_hi); + carry = 0; + for (digit=0; digit < 14; digit++) + { + res = GET_DIGIT(digit, *bcd_low, *bcd_high); + res += GET_DIGIT(digit, tmp_lo, tmp_hi); + res += carry; + carry = res / 10; + res %= 10; + SET_DIGIT(res, digit, bcd_low, bcd_high); + } + return(carry); +} + +int +bcd2_sub(long *bcd_low, long *bcd_high, long subend) +{ + long tmp_lo, tmp_hi, carry, res; + int digit; + + bin_bcd2(subend, &tmp_lo, &tmp_hi); + carry = 0; + for (digit=0; digit < 14; digit++) + { + res = GET_DIGIT(digit, *bcd_low, *bcd_high); + res -= GET_DIGIT(digit, tmp_lo, tmp_hi); + res -= carry; + if (res < 0) + { + res += 10; + carry = 1; + } + SET_DIGIT(res, digit, bcd_low, bcd_high); + } + return(carry); +} + +int +bcd2_mul(long *bcd_low, long *bcd_high, long multiplier) +{ + long tmp_lo, tmp_hi, carry, m_lo, m_hi, m1, m2; + int udigit, ldigit, res; + + tmp_lo = *bcd_low; + tmp_hi = *bcd_high; + bin_bcd2(multiplier, &m_lo, &m_hi); + *bcd_low = 0; + *bcd_high = 0; + carry = 0; + for (ldigit=0; ldigit < 14; ldigit++) + { + m1 = GET_DIGIT(ldigit, m_lo, m_hi); + carry = 0; + for (udigit=0; udigit < 14; udigit++) + { + m2 = GET_DIGIT(udigit, tmp_lo, tmp_hi); + res = m1 * m2; + res += carry; + if (udigit + ldigit < 14) + { + carry = GET_DIGIT(udigit + ldigit, *bcd_low, *bcd_high); + res += carry; + } + carry = res / 10; + res %= 10; + if (udigit + ldigit < 14) + SET_DIGIT(res, udigit + ldigit, bcd_low, bcd_high); + } + } + return(carry); +} + +int +bcd2_div(long *bcd_low, long *bcd_high, long divisor) +{ + long tmp_lo, tmp_hi, carry, d1, res, digit; + + + carry = 0; + tmp_lo = *bcd_low; + tmp_hi = *bcd_high; + *bcd_low = *bcd_high = 0; + for (digit=13; digit >= 0; digit--) + { + d1 = GET_DIGIT(digit, tmp_lo, tmp_hi); + d1 += 10 * carry; + res = d1 / divisor; + carry = d1 % divisor; + SET_DIGIT(res, digit, bcd_low, bcd_high); + } + return(carry); +} + +long +bcd2_mod(long *bcd_low, long *bcd_high, long modulo) +{ + long tmp_low, tmp_high; + + tmp_low = *bcd_low; + tmp_high = *bcd_high; + while (tmp_high || tmp_low > modulo) + bcd2_sub(&tmp_low, &tmp_high, modulo); + return(tmp_low); +} + +long +bcd2_cmp(long *low1, long *high1, long comp) +{ + long temp = 0; + + bcd2_bin(&temp, *high1); + if (temp > 214) + return(1); + bcd2_bin(&temp, *low1); + return(temp - comp); +} + +#ifdef TEST_BCD +#include + +main() +{ +long bin, low_bcd, high_bcd; +int i; + +bin = MAXINT; +printf("%ld\n", bin); +bin_bcd2(bin, &low_bcd, &high_bcd); +printf("%ld %ld\n", high_bcd, low_bcd); +bin = 0; +bcd2_bin(&bin, high_bcd); +bcd2_bin(&bin, low_bcd); +printf( "%ld\n", bin); +for (i=9; i >= 0; i--) + printf("%dth digit in %d is %d\n", + i, bin, GET_DIGIT(i, low_bcd, high_bcd)); +bcd2_add(&low_bcd, &high_bcd, MAXINT); +bin = 0; +bcd2_bin(&bin, high_bcd); +high_bcd = bin; +bin = 0; +bcd2_bin(&bin, low_bcd); +low_bcd = bin; +printf( "%ld%07ld\n", high_bcd, low_bcd); +bin_bcd2(14, &low_bcd, &high_bcd); +bcd2_mul(&low_bcd, &high_bcd, 23L); +bin = 0; +bcd2_bin(&bin, high_bcd); +bcd2_bin(&bin, low_bcd); +printf( "%ld\n", bin); +bcd2_div(&low_bcd, &high_bcd, 10L); +bin = 0; +bcd2_bin(&bin, high_bcd); +bcd2_bin(&bin, low_bcd); +printf( "%ld\n", bin); +} +#endif /* TEST */ diff --git a/bcd2.h b/bcd2.h new file mode 100644 index 0000000..6ea92a1 --- /dev/null +++ b/bcd2.h @@ -0,0 +1,11 @@ +/* + * Sccsid: @(#)bcd2.h 2.1.8.1 + */ +int bin_bcd2(long binary, long *low_res, long *high_res); +int bcd2_bin(long *dest, long bcd); +int bcd2_add(long *bcd_low, long *bcd_high, long addend); +int bcd2_sub(long *bcd_low, long *bcd_high, long subend); +int bcd2_mul(long *bcd_low, long *bcd_high, long multiplier); +int bcd2_div(long *bcd_low, long *bcd_high, long divisor); +long bcd2_mod(long *bcd_low, long *bcd_high, long modulo); +long bcd2_cmp(long *bcd_low, long *bcd_high, long compare); diff --git a/bm_utils.c b/bm_utils.c new file mode 100644 index 0000000..5da29a0 --- /dev/null +++ b/bm_utils.c @@ -0,0 +1,589 @@ +/* @(#)bm_utils.c 2.1.8.2 */ +/* + * + * Various routines that handle distributions, value selections and + * seed value management for the DSS benchmark. Current functions: + * env_config -- set config vars with optional environment override + * yes_no -- ask simple yes/no question and return boolean result + * a_rnd(min, max) -- random alphanumeric within length range + * pick_str(size, set) -- select a string from the set of size + * read_dist(file, name, distribution *) -- read named dist from file + * tbl_open(path, mode) -- std fopen with lifenoise + * julian(date) -- julian date correction + * rowcnt(tbl) -- proper scaling of given table + * e_str(set, min, max) -- build an embedded str + * agg_str() -- build a string from the named set + * dsscasecmp() -- version of strcasecmp() + * dssncasecmp() -- version of strncasecmp() + * getopt() + * set_state() -- initialize the RNG + */ + +/*this has to be put on top...*/ +#ifdef LINUX +/* turn on GNU extensions, incl O_DIRECT */ +/* O_LARGEFILE is defined in fcntl.h*/ +#define _GNU_SOURCE +#endif + +#include "dss.h" +#include +#include +#include +#include + +#ifdef HP +#include +#endif /* HP */ +#include +#include +#ifndef _POSIX_SOURCE +#include +#endif /* POSIX_SOURCE */ + +#include + +#ifdef IBM +#include +#endif /* IBM */ +#include +#include +/* Lines added by Chuck McDevitt for WIN32 support */ +#if (defined(WIN32)||defined(DOS)) +#ifndef _POSIX_ +#include +#ifndef S_ISREG + +#define S_ISREG(m) ( ((m) & _S_IFMT) == _S_IFREG ) +#define S_ISFIFO(m) ( ((m) & _S_IFMT) == _S_IFIFO ) + +#endif +#endif +#ifndef stat +#define stat _stat +#endif +#ifndef fdopen +#define fdopen _fdopen +#endif +#ifndef open +#define open _open +#endif +#ifndef O_RDONLY +#define O_RDONLY _O_RDONLY +#endif +#ifndef O_WRONLY +#define O_WRONLY _O_WRONLY +#endif +#ifndef O_CREAT +#define O_CREAT _O_CREAT +#endif +#endif +/* End of lines added by Chuck McDevitt for WIN32 support */ +#include "dsstypes.h" + + +static char alpha_num[65] = +"0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,"; + +#if defined(__STDC__) || defined(__cplusplus) +#define PROTO(s) s +#else +#define PROTO(s) () +#endif + +char *getenv PROTO((const char *name)); +void usage(); +long *permute_dist(distribution *d, long stream); +extern long Seed[]; + +/* + * env_config: look for a environmental variable setting and return its + * value; otherwise return the default supplied + */ +char * +env_config(char *var, char *dflt) +{ + static char *evar; + + if ((evar = getenv(var)) != NULL) + return (evar); + else + return (dflt); +} + +/* + * return the answer to a yes/no question as a boolean + */ +long +yes_no(char *prompt) +{ + char reply[128]; + +#ifdef WIN32 +/* Disable warning about conditional expression is constant */ +#pragma warning(disable:4127) +#endif + + while (1) + { +#ifdef WIN32 +#pragma warning(default:4127) +#endif + printf("%s [Y/N]: ", prompt); + gets(reply); + switch (*reply) + { + case 'y': + case 'Y': + return (1); + case 'n': + case 'N': + return (0); + default: + printf("Please answer 'yes' or 'no'.\n"); + } + } +} + +/* + * generate a random string with length randomly selected in [min, max] + * and using the characters in alphanum (currently includes a space + * and comma) + */ +int +a_rnd(int min, int max, int column, char *dest) +{ + long i, + len, + char_int; + + RANDOM(len, min, max, column); + for (i = 0; i < len; i++) + { + if (i % 5 == 0) + RANDOM(char_int, 0, MAX_LONG, column); + *(dest + i) = alpha_num[char_int & 077]; + char_int >>= 6; + } + *(dest + len) = '\0'; + return (len); +} + +/* + * embed a randomly selected member of distribution d in alpha-numeric + * noise of a length rendomly selected between min and max at a random + * position + */ +void +e_str(distribution *d, int min, int max, int stream, char *dest) +{ + char strtmp[MAXAGG_LEN + 1]; + long loc; + int len; + + a_rnd(min, max, stream, dest); + pick_str(d, stream, strtmp); + len = strlen(strtmp); + RANDOM(loc, 0, (strlen(dest) - 1 - len), stream); + strncpy(dest + loc, strtmp, len); + + return; +} + + +/* + * return the string associate with the LSB of a uniformly selected + * long in [1, max] where max is determined by the distribution + * being queried + */ +int +pick_str(distribution *s, int c, char *target) +{ + long i = 0; + long j; + + RANDOM(j, 1, s->list[s->count - 1].weight, c); + while (s->list[i].weight < j) + i++; + strcpy(target, s->list[i].text); + return(i); +} + +/* + * unjulian (long date) -- return(date - STARTDATE) + */ +long +unjulian(long date) +{ + int i; + long res = 0; + + for (i = STARTDATE / 1000; i < date / 1000; i++) + res += 365 + LEAP(i); + res += date % 1000 - 1; + + return(res); +} + +long +julian(long date) +{ + long offset; + long result; + long yr; + long yend; + + offset = date - STARTDATE; + result = STARTDATE; + +#ifdef WIN32 +/* Disable warning about conditional expression is constant */ +#pragma warning(disable:4127) +#endif + + while (1) + { +#ifdef WIN32 +#pragma warning(default:4127) +#endif + yr = result / 1000; + yend = yr * 1000 + 365 + LEAP(yr); + if (result + offset > yend) /* overflow into next year */ + { + offset -= yend - result + 1; + result += 1000; + continue; + } + else + break; + } + return (result + offset); +} + +/* +* load a distribution from a flat file into the target structure; +* should be rewritten to allow multiple dists in a file +*/ +void +read_dist(char *path, char *name, distribution *target) +{ +FILE *fp; +char line[256], + token[256], + *c; +long weight, + count = 0, + name_set = 0; + + if (d_path == NULL) + { + sprintf(line, "%s%c%s", + env_config(CONFIG_TAG, CONFIG_DFLT), PATH_SEP, path); + fp = fopen(line, "r"); + OPEN_CHECK(fp, line); + } + else + { + fp = fopen(d_path, "r"); + OPEN_CHECK(fp, d_path); + } + while (fgets(line, sizeof(line), fp) != NULL) + { + if ((c = strchr(line, '\n')) != NULL) + *c = '\0'; + if ((c = strchr(line, '#')) != NULL) + *c = '\0'; + if (*line == '\0') + continue; + + if (!name_set) + { + if (dsscasecmp(strtok(line, "\n\t "), "BEGIN")) + continue; + if (dsscasecmp(strtok(NULL, "\n\t "), name)) + continue; + name_set = 1; + continue; + } + else + { + if (!dssncasecmp(line, "END", 3)) + { + fclose(fp); + return; + } + } + + if (sscanf(line, "%[^|]|%ld", token, &weight) != 2) + continue; + + if (!dsscasecmp(token, "count")) + { + target->count = weight; + target->list = + (set_member *) + malloc((size_t)(weight * sizeof(set_member))); + MALLOC_CHECK(target->list); + target->max = 0; + continue; + } + target->list[count].text = + (char *) malloc((size_t)(strlen(token) + 1)); + MALLOC_CHECK(target->list[count].text); + strcpy(target->list[count].text, token); + target->max += weight; + target->list[count].weight = target->max; + + count += 1; + } /* while fgets() */ + + if (count != target->count) + { + fprintf(stderr, "Read error on dist '%s'\n", name); + fclose(fp); + exit(1); + } + target->permute = (long *)NULL; + fclose(fp); + return; +} + +/* + * standard file open with life noise + */ + +FILE * +tbl_open(int tbl, char *mode) +{ + char prompt[256]; + char fullpath[256]; + FILE *f; + struct stat fstats; + int retcode; + + + if (*tdefs[tbl].name == PATH_SEP) + strcpy(fullpath, tdefs[tbl].name); + else + sprintf(fullpath, "%s%c%s", + env_config(PATH_TAG, PATH_DFLT), PATH_SEP, tdefs[tbl].name); + + retcode = stat(fullpath, &fstats); + if (retcode && (errno != ENOENT)) + { + fprintf(stderr, "stat(%s) failed.\n", fullpath); + exit(-1); + } + if (S_ISREG(fstats.st_mode) && !force && *mode != 'r' ) + { + sprintf(prompt, "Do you want to overwrite %s ?", fullpath); + if (!yes_no(prompt)) + exit(0); + } + + if (S_ISFIFO(fstats.st_mode)) + { + retcode = + open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT); + f = fdopen(retcode, mode); + } + else{ + +#ifdef LINUX + /* allow large files on Linux */ + /*use open to first to get the in fd and apply regular fdopen*/ + + /*cheng: Betty mentioned about write mode problem here, added 066*/ + retcode = + open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT|O_LARGEFILE,0644); + f = fdopen(retcode, mode); +#else + f = fopen(fullpath, mode); +#endif + + } + OPEN_CHECK(f, fullpath); + if (header && columnar && tdefs[tbl].header != NULL) + tdefs[tbl].header(f); + + return (f); +} + + +/* + * agg_str(set, count) build an aggregated string from count unique + * selections taken from set + */ +void +agg_str(distribution *set, long count, long col, char *dest) +{ + distribution *d; + int i; + + + d = set; + *dest = '\0'; + for (i=0; i < count; i++) + { + strcat(dest, DIST_MEMBER(set,*permute_dist(d, col))); + + strcat(dest, " "); + d = (distribution *)NULL; + } + *(dest + strlen(dest) - 1) = '\0'; + return; +} + + +long +dssncasecmp(char *s1, char *s2, int n) +{ + for (; n > 0; ++s1, ++s2, --n) + if (tolower(*s1) != tolower(*s2)) + return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); + else if (*s1 == '\0') + return (0); + return (0); +} + +long +dsscasecmp(char *s1, char *s2) +{ + for (; tolower(*s1) == tolower(*s2); ++s1, ++s2) + if (*s1 == '\0') + return (0); + return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); +} + +#ifndef STDLIB_HAS_GETOPT +int optind = 0; +int opterr = 0; +char *optarg = NULL; + +int +getopt(int ac, char **av, char *opt) +{ + static char *nextchar = NULL; + char *cp; + char hold; + + if (optarg == NULL) + { + optarg = (char *)malloc(BUFSIZ); + MALLOC_CHECK(optarg); + } + + if (!nextchar || *nextchar == '\0') + { + optind++; + if (optind == ac) + return(-1); + nextchar = av[optind]; + if (*nextchar != '-') + return(-1); + nextchar +=1; + } + + if (nextchar && *nextchar == '-') /* -- termination */ + { + optind++; + return(-1); + } + else /* found an option */ + { + cp = strchr(opt, *nextchar); + nextchar += 1; + if (cp == NULL) /* not defined for this run */ + return('?'); + if (*(cp + 1) == ':') /* option takes an argument */ + { + if (*nextchar) + { + hold = *cp; + cp = optarg; + while (*nextchar) + *cp++ = *nextchar++; + *cp = '\0'; + *cp = hold; + } + else /* white space separated, use next arg */ + { + if (++optind == ac) + return('?'); + strcpy(optarg, av[optind]); + } + nextchar = NULL; + } + return(*cp); + } +} +#endif /* STDLIB_HAS_GETOPT */ + +char ** +mk_ascdate(void) +{ + char **m; + dss_time_t t; + int i; + + m = (char**) malloc((size_t)(TOTDATE * sizeof (char *))); + MALLOC_CHECK(m); + for (i = 0; i < TOTDATE; i++) + { + m[i] = (char *)malloc(DATE_LEN * sizeof(char)); + MALLOC_CHECK(m[i]); + mk_time((long)(i + 1), &t); + strcpy(m[i], t.alpha); + } + + return(m); +} + +/* + * set_state() -- initialize the RNG so that + * appropriate data sets can be generated. + * For each table that is to be generated, calculate the number of rows/child, and send that to the + * seed generation routine in speed_seed.c. Note: assumes that tables are completely independent. + * Returns the number of rows to be generated by the named step. + */ +long +set_state(int table, long sf, long procs, long step, long *extra_rows) +{ + int i; + long rowcount, remainder, result; + + if (sf == 0 || step == 0) + return(0); + + rowcount = tdefs[table].base / procs; + if ((sf / procs) > (int)MAX_32B_SCALE) + INTERNAL_ERROR("SCALE OVERFLOW. RE-RUN WITH MORE CHILDREN."); + rowcount *= sf; + remainder = (tdefs[table].base % procs) * sf; + rowcount += remainder / procs; + result = rowcount; + for (i=0; i < step - 1; i++) + { + if (table == LINE) /* special case for shared seeds */ + tdefs[table].gen_seed(1, rowcount); + else + tdefs[table].gen_seed(0, rowcount); + /* need to set seeds of child in case there's a dependency */ + /* NOTE: this assumes that the parent and child have the same base row count */ + if (tdefs[table].child != NONE) + tdefs[tdefs[table].child].gen_seed(0,rowcount); + } + *extra_rows = remainder % procs; + if (step > procs) /* moving to the end to generate updates */ + tdefs[table].gen_seed(*extra_rows); + + return(result); +} + + + + + + + + diff --git a/build.c b/build.c new file mode 100644 index 0000000..fd6cc87 --- /dev/null +++ b/build.c @@ -0,0 +1,800 @@ +/* @(#)build.c 2.1.8.1 */ +/* Sccsid: @(#)build.c 9.1.1.17 11/15/95 12:52:28 */ +/* stuff related to the customer table */ +#include +#include +#ifdef SSBM +#include +#endif +#ifndef VMS +#include +#endif +#if defined(SUN) +#include +#endif + +#if defined(LINUX) +#include +#endif + +#include + +#include "dss.h" +#include "dsstypes.h" +#include "bcd2.h" +#ifdef ADHOC +#include "adhoc.h" +extern adhoc_t adhocs[]; +#endif /* ADHOC */ + +#define LEAP_ADJ(yr, mnth) \ +((LEAP(yr) && (mnth) >= 2) ? 1 : 0) +#define JDAY_BASE 8035 /* start from 1/1/70 a la unix */ +#define JMNTH_BASE (-70 * 12) /* start from 1/1/70 a la unix */ +#define JDAY(date) ((date) - STARTDATE + JDAY_BASE + 1) +#define PART_SUPP_BRIDGE(tgt, p, s) \ + { \ + long tot_scnt = tdefs[SUPP].base * scale; \ + tgt = (p + s * (tot_scnt / SUPP_PER_PART + \ + (long) ((p - 1) / tot_scnt))) % tot_scnt + 1; \ + } +#define RPRICE_BRIDGE(tgt, p) tgt = rpb_routine(p) +#define V_STR(avg, sd, tgt) a_rnd((int)(avg * V_STR_LOW), \ +(int)(avg * V_STR_HGH), sd, tgt) +#define TEXT(avg, sd, tgt) \ +dbg_text(tgt, (int)(avg * V_STR_LOW),(int)(avg * V_STR_HGH), sd) +static void gen_phone PROTO((long ind, char *target, long seed)); + +#ifdef SSBM +static void gen_category PROTO((char *target, long seed)); +int gen_city PROTO((char *cityName, char *nationName)); +int gen_season PROTO((char * dest,int month,int day)); +int is_last_day_in_month PROTO((int year,int month,int day)); +int gen_holiday_fl PROTO((char * dest, int month, int day)); +int gen_city PROTO((char *cityName, char *nationName)); +int gen_color PROTO((char * source, char * dest)); +#endif + + +long +rpb_routine(long p) + { + long price; + price = 90000; + price += (p/10) % 20001; /* limit contribution to $200 */ + price += (p % 1000) * 100; + + return(price); + } + +static void +gen_phone(long ind, char *target, long seed) + { + long acode, + exchg, + number; + + RANDOM(acode, 100, 999, seed); + RANDOM(exchg, 100, 999, seed); + RANDOM(number, 1000, 9999, seed); + sprintf(target, "%02d", 10 + (ind % NATIONS_MAX)); + sprintf(target + 3, "%03d", acode); + sprintf(target + 7, "%03d", exchg); + sprintf(target + 11, "%04d", number); + target[2] = target[6] = target[10] = '-'; + return; +} + +static void +gen_category(char *target, long seed){ + long num1,num2; + RANDOM(num1,1,5,seed); + RANDOM(num2,1,5,seed); + strcpy(target,"MFGR"); + sprintf(target + 4, "%01d", num1); + sprintf(target + 5, "%01d", num2); + return; +} + +#ifdef SSBM +long mk_cust(long n_cust, customer_t *c) +{ + long i; + c->custkey = n_cust; + sprintf(c->name, C_NAME_FMT, C_NAME_TAG, n_cust); + c->alen = V_STR(C_ADDR_LEN, C_ADDR_SD, c->address); + RANDOM(i, 0, nations.count-1, C_NTRG_SD); + strcpy(c->nation_name,nations.list[i].text); + strcpy(c->region_name,regions.list[nations.list[i].weight].text); + gen_city(c->city,c->nation_name); + gen_phone(i, c->phone, (long)C_PHNE_SD); + pick_str(&c_mseg_set, C_MSEG_SD, c->mktsegment); + return (0); + } + +#else +long +mk_cust(long n_cust, customer_t *c) + { + long i; + + c->custkey = n_cust; + sprintf(c->name, C_NAME_FMT, C_NAME_TAG, n_cust); + c->alen = V_STR(C_ADDR_LEN, C_ADDR_SD, c->address); + RANDOM(i, 0, (nations.count - 1), C_NTRG_SD); + c->nation_code = i; + gen_phone(i, c->phone, (long)C_PHNE_SD); + RANDOM(c->acctbal, C_ABAL_MIN, C_ABAL_MAX, C_ABAL_SD); + pick_str(&c_mseg_set, C_MSEG_SD, c->mktsegment); + c->clen = TEXT(C_CMNT_LEN, C_CMNT_SD, c->comment); + + return (0); + } +#endif + + /* + * generate the numbered order and its associated lineitems +*/ +void +mk_sparse (long i, DSS_HUGE *ok, long seq) + { +#ifndef SUPPORT_64BITS + if (scale < MAX_32B_SCALE) +#endif + ez_sparse(i, ok, seq); +#ifndef SUPPORT_64BITS + else + hd_sparse(i, ok, seq); +#endif + return; + } + + /* + * the "simple" version of mk_sparse, used on systems with 64b support + * and on all systems at SF <= 300G where 32b support is sufficient +*/ +void +ez_sparse(long i, DSS_HUGE *ok, long seq) + { + long low_bits; + + LONG2HUGE(i, ok); + low_bits = (long)(i & ((1 << SPARSE_KEEP) - 1)); + *ok = *ok >> SPARSE_KEEP; + *ok = *ok << SPARSE_BITS; + *ok += seq; + *ok = *ok << SPARSE_KEEP; + *ok += low_bits; + + + return; + } + +#ifndef SUPPORT_64BITS +void +hd_sparse(long i, DSS_HUGE *ok, long seq) + { + long low_mask, seq_mask; + static int init = 0; + static DSS_HUGE *base, *res; + + if (init == 0) + { + INIT_HUGE(base); + INIT_HUGE(res); + init = 1; + } + + low_mask = (1 << SPARSE_KEEP) - 1; + seq_mask = (1 << SPARSE_BITS) - 1; + bin_bcd2(i, base, base + 1); + HUGE_SET (base, res); + HUGE_DIV (res, 1 << SPARSE_KEEP); + HUGE_MUL (res, 1 << SPARSE_BITS); + HUGE_ADD (res, seq, res); + HUGE_MUL (res, 1 << SPARSE_KEEP); + HUGE_ADD (res, *base & low_mask, res); + bcd2_bin (&low_mask, *res); + bcd2_bin (&seq_mask, *(res + 1)); + *ok = low_mask; + *(ok + 1) = seq_mask; + return; + } +#endif + +#ifdef SSBM +long +mk_order(long index, order_t *o, long upd_num) + { + long lcnt; + long rprice; + long ocnt; + long tmp_date; + long c_date; + long clk_num; + long supp_num; + static char **asc_date = NULL; + char tmp_str[2]; + char **mk_ascdate PROTO((void)); + int delta = 1; + + if (asc_date == NULL) + asc_date = mk_ascdate(); + + RANDOM(tmp_date, O_ODATE_MIN, O_ODATE_MAX, O_ODATE_SD); + strcpy(o->odate, asc_date[tmp_date - STARTDATE]); + + mk_sparse (index, o->okey, + (upd_num == 0) ? 0 : 1 + upd_num / (10000 / refresh)); + RANDOM(o->custkey, O_CKEY_MIN, O_CKEY_MAX, O_CKEY_SD); + while (o->custkey % CUST_MORTALITY == 0) + { + o->custkey += delta; + o->custkey = MIN(o->custkey, O_CKEY_MAX); + delta *= -1; + } + pick_str(&o_priority_set, O_PRIO_SD, o->opriority); + RANDOM(clk_num, 1, MAX((scale * O_CLRK_SCL), O_CLRK_SCL), O_CLRK_SD); + o->spriority = 0; + + o->totalprice = 0; + ocnt = 0; + + RANDOM(o->lines, O_LCNT_MIN, O_LCNT_MAX, O_LCNT_SD); + for (lcnt = 0; lcnt < o->lines; lcnt++) + { + + HUGE_SET(o->okey, o->lineorders[lcnt].okey); + o->lineorders[lcnt].linenumber = lcnt + 1; + o->lineorders[lcnt].custkey = o->custkey; + RANDOM(o->lineorders[lcnt].partkey, L_PKEY_MIN, L_PKEY_MAX, L_PKEY_SD); + RANDOM(o->lineorders[lcnt].suppkey, L_SKEY_MIN, L_SKEY_MAX, L_SKEY_SD); + + RANDOM(o->lineorders[lcnt].quantity, L_QTY_MIN, L_QTY_MAX, L_QTY_SD); + RANDOM(o->lineorders[lcnt].discount, L_DCNT_MIN, L_DCNT_MAX, L_DCNT_SD); + RANDOM(o->lineorders[lcnt].tax, L_TAX_MIN, L_TAX_MAX, L_TAX_SD); + + strcpy(o->lineorders[lcnt].orderdate,o->odate); + + strcpy(o->lineorders[lcnt].opriority,o->opriority); + o->lineorders[lcnt].ship_priority = o->spriority; + + RANDOM(c_date, L_CDTE_MIN, L_CDTE_MAX, L_CDTE_SD); + c_date += tmp_date; + strcpy(o->lineorders[lcnt].commit_date, asc_date[c_date - STARTDATE]); + + pick_str(&l_smode_set, L_SMODE_SD, o->lineorders[lcnt].shipmode); + + RPRICE_BRIDGE( rprice, o->lineorders[lcnt].partkey); + o->lineorders[lcnt].extended_price = rprice * o->lineorders[lcnt].quantity; + o->lineorders[lcnt].revenue = o->lineorders[lcnt].extended_price * ((long)100-o->lineorders[lcnt].discount)/(long)PENNIES; + + //round off problem with linux if use 0.6 + o->lineorders[lcnt].supp_cost = 6 * rprice /10; + + o->totalprice += + ((o->lineorders[lcnt].extended_price * + ((long)100 - o->lineorders[lcnt].discount)) / (long)PENNIES ) * + ((long)100 + o->lineorders[lcnt].tax) + / (long)PENNIES; + } + + for (lcnt = 0; lcnt < o->lines; lcnt++) + { + o->lineorders[lcnt].order_totalprice = o->totalprice; + } + return (0); + } +#else +long +mk_order(long index, order_t *o, long upd_num) + { + long lcnt; + long rprice; + long ocnt; + long tmp_date; + long s_date; + long r_date; + long c_date; + long clk_num; + long supp_num; + static char **asc_date = NULL; + char tmp_str[2]; + char **mk_ascdate PROTO((void)); + int delta = 1; + + if (asc_date == NULL) + asc_date = mk_ascdate(); + mk_sparse (index, o->okey, + (upd_num == 0) ? 0 : 1 + upd_num / (10000 / refresh)); + RANDOM(o->custkey, O_CKEY_MIN, O_CKEY_MAX, O_CKEY_SD); + while (o->custkey % CUST_MORTALITY == 0) + { + o->custkey += delta; + o->custkey = MIN(o->custkey, O_CKEY_MAX); + delta *= -1; + } + + + RANDOM(tmp_date, O_ODATE_MIN, O_ODATE_MAX, O_ODATE_SD); + strcpy(o->odate, asc_date[tmp_date - STARTDATE]); + + pick_str(&o_priority_set, O_PRIO_SD, o->opriority); + RANDOM(clk_num, 1, MAX((scale * O_CLRK_SCL), O_CLRK_SCL), O_CLRK_SD); + sprintf(o->clerk, O_CLRK_FMT, + O_CLRK_TAG, + clk_num); + o->clen = TEXT(O_CMNT_LEN, O_CMNT_SD, o->comment); +#ifdef DEBUG + if (o->clen > O_CMNT_MAX) fprintf(stderr, "comment error: O%d\n", index); +#endif /* DEBUG */ + o->spriority = 0; + + o->totalprice = 0; + o->orderstatus = 'O'; + ocnt = 0; + + RANDOM(o->lines, O_LCNT_MIN, O_LCNT_MAX, O_LCNT_SD); + for (lcnt = 0; lcnt < o->lines; lcnt++) + { + HUGE_SET(o->okey, o->l[lcnt].okey); + o->l[lcnt].lcnt = lcnt + 1; + RANDOM(o->l[lcnt].quantity, L_QTY_MIN, L_QTY_MAX, L_QTY_SD); + RANDOM(o->l[lcnt].discount, L_DCNT_MIN, L_DCNT_MAX, L_DCNT_SD); + RANDOM(o->l[lcnt].tax, L_TAX_MIN, L_TAX_MAX, L_TAX_SD); + pick_str(&l_instruct_set, L_SHIP_SD, o->l[lcnt].shipinstruct); + pick_str(&l_smode_set, L_SMODE_SD, o->l[lcnt].shipmode); + o->l[lcnt].clen = TEXT(L_CMNT_LEN, L_CMNT_SD, o->l[lcnt].comment); + RANDOM(o->l[lcnt].partkey, L_PKEY_MIN, L_PKEY_MAX, L_PKEY_SD); + RPRICE_BRIDGE( rprice, o->l[lcnt].partkey); + RANDOM(supp_num, 0, 3, L_SKEY_SD); + PART_SUPP_BRIDGE( o->l[lcnt].suppkey, o->l[lcnt].partkey, supp_num); + o->l[lcnt].eprice = rprice * o->l[lcnt].quantity; + + o->totalprice += + ((o->l[lcnt].eprice * + ((long)100 - o->l[lcnt].discount)) / (long)PENNIES ) * + ((long)100 + o->l[lcnt].tax) + / (long)PENNIES; + + RANDOM(s_date, L_SDTE_MIN, L_SDTE_MAX, L_SDTE_SD); + s_date += tmp_date; + RANDOM(c_date, L_CDTE_MIN, L_CDTE_MAX, L_CDTE_SD); + c_date += tmp_date; + RANDOM(r_date, L_RDTE_MIN, L_RDTE_MAX, L_RDTE_SD); + r_date += s_date; + + + strcpy(o->l[lcnt].sdate, asc_date[s_date - STARTDATE]); + strcpy(o->l[lcnt].cdate, asc_date[c_date - STARTDATE]); + strcpy(o->l[lcnt].rdate, asc_date[r_date - STARTDATE]); + + + if (julian(r_date) <= CURRENTDATE) + { + pick_str(&l_rflag_set, L_RFLG_SD, tmp_str); + o->l[lcnt].rflag[0] = *tmp_str; + } + else + o->l[lcnt].rflag[0] = 'N'; + + if (julian(s_date) <= CURRENTDATE) + { + ocnt++; + o->l[lcnt].lstatus[0] = 'F'; + } + else + o->l[lcnt].lstatus[0] = 'O'; + } + + if (ocnt > 0) + o->orderstatus = 'P'; + if (ocnt == o->lines) + o->orderstatus = 'F'; + + return (0); +} +#endif + +#ifdef SSBM +long mk_part(long index, part_t *p) +{ + long mfgr,cat,brnd; + + p->partkey = index; + + agg_str(&colors, (long)P_NAME_SCL, (long)P_NAME_SD, p->name); + + /*extract color from substring of p->name*/ + p->clen =gen_color(p->name,p->color); + + + RANDOM(mfgr, P_MFG_MIN, P_MFG_MAX, P_MFG_SD); + sprintf(p->mfgr, "%s%d", "MFGR#", mfgr); + + RANDOM(cat, P_CAT_MIN, P_CAT_MAX, P_CAT_SD); + sprintf(p->category, "%s%d", p->mfgr,cat); + + + RANDOM(brnd, P_BRND_MIN, P_BRND_MAX, P_BRND_SD); + sprintf(p->brand,"%s%d",p->category,brnd); + + p->tlen = pick_str(&p_types_set, P_TYPE_SD, p->type); + p->tlen = strlen(p_types_set.list[p->tlen].text); + RANDOM(p->size, P_SIZE_MIN, P_SIZE_MAX, P_SIZE_SD); + + pick_str(&p_cntr_set, P_CNTR_SD, p->container); + + + return (0); +} +#else +long +mk_part(long index, part_t *p) + { + long temp; + long snum; + long brnd; + + p->partkey = index; + agg_str(&colors, (long)P_NAME_SCL, (long)P_NAME_SD, p->name); + RANDOM(temp, P_MFG_MIN, P_MFG_MAX, P_MFG_SD); + sprintf(p->mfgr, P_MFG_FMT, P_MFG_TAG, temp); + RANDOM(brnd, P_BRND_MIN, P_BRND_MAX, P_BRND_SD); + sprintf(p->brand, P_BRND_FMT, + P_BRND_TAG, + (temp * 10 + brnd)); + p->tlen = pick_str(&p_types_set, P_TYPE_SD, p->type); + p->tlen = strlen(p_types_set.list[p->tlen].text); + RANDOM(p->size, P_SIZE_MIN, P_SIZE_MAX, P_SIZE_SD); + pick_str(&p_cntr_set, P_CNTR_SD, p->container); + RPRICE_BRIDGE( p->retailprice, index); + p->clen = TEXT(P_CMNT_LEN, P_CMNT_SD, p->comment); + + for (snum = 0; snum < SUPP_PER_PART; snum++) + { + p->s[snum].partkey = p->partkey; + PART_SUPP_BRIDGE( p->s[snum].suppkey, index, snum); + RANDOM(p->s[snum].qty, PS_QTY_MIN, PS_QTY_MAX, PS_QTY_SD); + RANDOM(p->s[snum].scost, PS_SCST_MIN, PS_SCST_MAX, PS_SCST_SD); + p->s[snum].clen = TEXT(PS_CMNT_LEN, PS_CMNT_SD, p->s[snum].comment); + } + return (0); + } +#endif + + +#ifdef SSBM +long +mk_supp(long index, supplier_t *s) +{ + long i, + bad_press, + noise, + offset, + type; + s->suppkey = index; + sprintf(s->name, S_NAME_FMT, S_NAME_TAG, index); + s->alen = V_STR(S_ADDR_LEN, S_ADDR_SD, s->address); + RANDOM(i, 0, nations.count-1, S_NTRG_SD); + strcpy(s->nation_name,nations.list[i].text); + strcpy(s->region_name,regions.list[nations.list[i].weight].text); + gen_city(s->city,s->nation_name); + gen_phone(i, s->phone, (long)C_PHNE_SD); + return (0); +} +#else +long +mk_supp(long index, supplier_t *s) + { + long i, + bad_press, + noise, + offset, + type; + + s->suppkey = index; + sprintf(s->name, S_NAME_FMT, S_NAME_TAG, index); + s->alen = V_STR(S_ADDR_LEN, S_ADDR_SD, s->address); + RANDOM(i, 0, nations.count - 1, S_NTRG_SD); + s->nation_code= i; + gen_phone(i, s->phone, S_PHNE_SD); + RANDOM(s->acctbal, S_ABAL_MIN, S_ABAL_MAX, S_ABAL_SD); + + s->clen = TEXT(S_CMNT_LEN, S_CMNT_SD, s->comment); + /* these calls should really move inside the if stmt below, + * but this will simplify seedless parallel load + */ + RANDOM(bad_press, 1, 10000, BBB_CMNT_SD); + RANDOM(type, 0, 100, BBB_TYPE_SD); + RANDOM(noise, 0, (s->clen - BBB_CMNT_LEN), BBB_JNK_SD); + RANDOM(offset, 0, (s->clen - (BBB_CMNT_LEN + noise)), + BBB_OFFSET_SD); + if (bad_press <= S_CMNT_BBB) + { + type = (type < BBB_DEADBEATS) ?0:1; + memcpy(s->comment + offset, BBB_BASE, BBB_BASE_LEN); + if (type == 0) + memcpy(s->comment + BBB_BASE_LEN + offset + noise, + BBB_COMPLAIN, BBB_TYPE_LEN); + else + memcpy(s->comment + BBB_BASE_LEN + offset + noise, + BBB_COMMEND, BBB_TYPE_LEN); + } + + return (0); + } +#endif + +struct + { + char *mdes; + long days; + long dcnt; + } +months[] = + + { + {NULL, 0, 0}, + {"JAN", 31, 31}, + {"FEB", 28, 59}, + {"MAR", 31, 90}, + {"APR", 30, 120}, + {"MAY", 31, 151}, + {"JUN", 30, 181}, + {"JUL", 31, 212}, + {"AUG", 31, 243}, + {"SEP", 30, 273}, + {"OCT", 31, 304}, + {"NOV", 30, 334}, + {"DEC", 31, 365} + }; + +long +mk_time(long index, dss_time_t *t) + { + long m = 0; + long y; + long d; + + t->timekey = index + JDAY_BASE; + y = julian(index + STARTDATE - 1) / 1000; + d = julian(index + STARTDATE - 1) % 1000; + while (d > months[m].dcnt + LEAP_ADJ(y, m)) + m++; + PR_DATE(t->alpha, y, m, + d - months[m - 1].dcnt - ((LEAP(y) && m > 2) ? 1 : 0)); + t->year = 1900 + y; + t->month = m + 12 * y + JMNTH_BASE; + t->week = (d + T_START_DAY - 1) / 7 + 1; + t->day = d - months[m - 1].dcnt - LEAP_ADJ(y, m-1); + + return (0); + } + + int + mk_nation(long index, code_t *c) + { + c->code = index - 1; + c->text = nations.list[index - 1].text; + c->join = nations.list[index - 1].weight; + c->clen = TEXT(N_CMNT_LEN, N_CMNT_SD, c->comment); + return(0); + } + + int + mk_region(long index, code_t *c) + { + + c->code = index - 1; + c->text = regions.list[index - 1].text; + c->join = 0; /* for completeness */ + c->clen = TEXT(R_CMNT_LEN, R_CMNT_SD, c->comment); + return(0); + } + + +#ifdef SSBM + /*bug!*/ +int gen_city(char *cityName, char *nationName){ + int i=0; + long randomPick; + int clen = strlen(cityName); + int nlen = strlen(nationName); + + strncpy(cityName,nationName,CITY_FIX-1); + + if(nlen < CITY_FIX-1){ + for(i = nlen ; i< CITY_FIX-1;i++) + cityName[i] = ' '; + } + RANDOM(randomPick, 0, 9, 98); + + sprintf(cityName+CITY_FIX-1,"%d",randomPick); + cityName[CITY_FIX] = '\0'; + return 0; +} + + +/* +P_NAME is as long as 55 bytes in TPC-H, which is un¬reasonably large. +We reduce it to 22 by limiting to a concatena¬tion of two colors (see [TPC-H], pg 94). +We also add a new column named P_COLOR that could be used in queries where currently a +color must be chosen by substring from P_NAME. +*/ +int gen_color(char * source, char * dest){ + int i = 0,j=0; + int clen=0; + + while(source[i]!= ' ' ){ + dest[i]=source[i]; + i++; + } + dest[i]='\0'; + + i++; + while(source[i] != '\0'){ + source[j] = source[i]; + j++; + i++; + } + + source[j] = '\0'; + + clen = strlen(dest); + return clen; +} + + + +/*Following functions are related to date table generation*/ +int days_in_a_month[12]={31,28,31,30,31,30,31,31,30,31,30,31}; +int days_in_a_month_l[12]={31,29,31,30,31,30,31,31,30,31,30,31}; +season seasons[]={ + {"Christmas",1,11,31,12}, + {"Summer",1,5,31,8}, + {"Winter",1,1,31,3}, + {"Spring",1,4,30,4}, + {"Fall",1,9,31,10} +}; +holiday holidays[]={ + {"Christmas",12,24}, + {"New Years Day", 1,1}, + {"holiday1", 2,20}, + {"Easter Day",4,20}, + {"holiday2", 5,20}, + {"holiday3",7,20}, + {"holiday4",8,20}, + {"holiday5",9,20}, + {"holiday6",10,20}, + {"holiday7",11,20} +}; + +char * month_names[]={"January","February","March","April", + "May","June","July","Augest", + "September","Octorber","November","December"}; + +char * weekday_names[]={"Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"}; + +/*make the date table, it takes the continuous index , and add index*60*60*24 to + *numeric representation 1/1/1992 01:01:01, + *then convert the final numeric date time to tm structure, and thus extract other field + *for date_t structure */ +long +mk_date(long index,date_t *d) +{ + long espan = (index-1)*60*60*24; + + time_t numDateTime = D_STARTDATE + espan; + + struct tm *localTime = localtime(&numDateTime); + + /*make Sunday be the first day of a week */ + d->daynuminweek=((long)localTime->tm_wday+1)%7+1; + d->monthnuminyear=(long)localTime->tm_mon+1; + strncpy(d->dayofweek, weekday_names[d->daynuminweek-1],D_DAYWEEK_LEN+1); + strncpy(d->month,month_names[d->monthnuminyear-1],D_MONTH_LEN+1); + d->year=(long)localTime->tm_year + 1900; + d->daynuminmonth=(long)localTime->tm_mday; + d->yearmonthnum=d->year * 100 + d->monthnuminyear; + + sprintf(d->yearmonth,"%.3s%d",d->month,d->year); + sprintf(d->date,"%s %d, %d",d->month,d->daynuminmonth,d->year); + + d->datekey = d->year*10000+d->monthnuminyear*100+ d->daynuminmonth; + + d->daynuminyear=(int)localTime->tm_yday+1; + d->weeknuminyear = d->daynuminyear/7 + 1; + + if(d->daynuminweek ==7){ + d->lastdayinweekfl[0]='1'; + } + else{ + d->lastdayinweekfl[0]='0'; + } + d->lastdayinweekfl[1]='\0'; + + if(is_last_day_in_month(d->year,d->monthnuminyear,d->daynuminmonth)==1){ + d->lastdayinmonthfl[0]= '0'; + }else{ + d->lastdayinmonthfl[0]= '1'; + } + d->lastdayinmonthfl[1]='\0'; + + if(d->daynuminweek!=1 && d->daynuminweek!=7){ + d->weekdayfl[0]='1'; + } + else{ + d->weekdayfl[0]='0'; + } + + d->weekdayfl[1]='\0'; + + gen_season(d->sellingseason,d->monthnuminyear,d->daynuminmonth); + d->slen = strlen(d->sellingseason); + gen_holiday_fl(d->holidayfl,d->monthnuminyear,d->daynuminmonth); + return (0); +} + +int gen_holiday_fl(char * dest, int month, int day){ + int i; + for(i = 0; i< NUM_HOLIDAYS; i++){ + if(holidays[i].month == month && holidays[i].day == day){ + strcpy(dest,"1"); + return 0; + } + } + strcpy(dest,"0"); + return 0; +} + + +int +is_last_day_in_month(int year,int month,int day){ + int * days; + if(LEAP(year)) + days = days_in_a_month_l; + else + days = days_in_a_month; + if(day == days[month-1]) return 1; + return 0; +} + +int gen_season(char * dest,int month,int day) +{ + int i; + for(i =0;i=seas->start_month && month<=seas->end_month && + day >= seas->start_day && day <= seas->end_day){ + strcpy(dest, seas->name); + return 0; + } + } + strcpy(dest,""); + + return 0; +} + +#endif + + + + + + + + + + + + + + + + + + + diff --git a/config.h b/config.h new file mode 100644 index 0000000..fa505ec --- /dev/null +++ b/config.h @@ -0,0 +1,179 @@ +/* + * Sccsid: @(#)config.h 2.1.8.2 + * + * this file allows the compilation of DBGEN to be tailored to specific + * architectures and operating systems. Some options are grouped + * together to allow easier compilation on a given vendor's hardware. + * + * The following #defines will effect the code: + * TPCH -- make will create TPCH (set in makefile) + * TPCR -- make will create TPCR (set in makefile) + * KILL(pid) -- how to terminate a process in a parallel load + * SPAWN -- name of system call to clone an existing process + * SET_HANDLER(proc) -- name of routine to handle signals in parallel load + * WAIT(res, pid) -- how to await the termination of a child + * SEPARATOR -- character used to separate fields in flat files + * DBNAME -- default name of database to be loaded + * STDLIB_HAS_GETOPT -- to prevent confilcts with gloabal getopt() + * MDY_DATE -- generate dates as MM-DD-YY + * WIN32 -- support for WindowsNT + * SUPPORT_64BITS -- compiler defines a 64 bit datatype + * DSS_HUGE -- 64 bit data type + * HUGE_FORMAT -- printf string for 64 bit data type + * HUGE_COUNT -- number of objects in DSS_HUGE + * EOL_HANDLING -- flat files don't need final column separator + * + * OS defines + * ========== + * ATT -- getopt() handling + * DIGITAL -- changes for DigUnix 64-bit support + * DOS -- disable all multi-user functionality/dependency + * HP -- posix source inclusion differences + * IBM -- posix source inclusion differences + * ICL -- getopt() handling + * MVS -- special handling of varchar format + * SGI -- getopt() handling + * SUN -- getopt() handling + * LINUX -- getopt() handling + * TANDEM -- EOL handling + * U2200 -- death of parent kills children automatically + * VMS -- signal/fork handing differences + * + * Database defines + * ================ + * DB2 -- use DB2 dialect in QGEN + * INFORMIX -- use Informix dialect in QGEN + * SQLSERVER -- use SQLSERVER dialect in QGEN + * SYBASE -- use Sybase dialect in QGEN + * TDAT -- use Teradata dialect in QGEN + */ + +#ifdef DOS +#define DSS_PROC 1 +#define PATH_SEP '\\' +#else + + +#ifdef ATT +#define STDLIB_HAS_GETOPT +#ifdef SQLSERVER +#define WIN32 +#else +/* the 64 bit defines are for the Metaware compiler */ +#define SUPPORT_64BITS +#define DSS_HUGE long long +#define HUGE_COUNT 1 +#define HUGE_FORMAT "%LLd" +#endif /* SQLSERVER or MP/RAS */ +#endif /* ATT */ + +#ifdef DIGITAL +#define DOUBLE_CAST (double)(int) +#endif + +#ifdef HP +#define _INCLUDE_POSIX_SOURCE +#define STDLIB_HAS_GETOPT +#endif /* HP */ + +#ifdef IBM +#define _POSIX_SOURCE +/* + * if the C compiler is 3.1 or later, then uncomment the + * lines for 64 bit seed generation + */ +/* #define SUPPORT_64BITS*/ +/* #define DSS_HUGE long long*/ +/* #define HUGE_COUNT 1 */ +#define STDLIB_HAS_GETOPT +#endif /* IBM */ + +#ifdef ICL +#define STDLIB_HAS_GETOPT +#endif /* ICL */ + +#ifdef SUN +#define STDLIB_HAS_GETOPT +#endif /* SUN */ + +#ifdef LINUX +#define STDLIB_HAS_GETOPT +#endif /* LINUX */ + +#ifdef SGI +#define STDLIB_HAS_GETOPT +#define SUPPORT_64BITS +#define DSS_HUGE __uint64_t +#define HUGE_COUNT 1 +#endif /* SGI */ + +#ifdef TANDEM +#define EOL_HANDLING +#endif /* TANDEM */ + +#ifdef VMS +#define SPAWN vfork +#define KILL(pid) kill(SIGQUIT, pid) +#define SET_HANDLER(proc) signal(SIGQUIT, proc) +#define WAIT(res, pid) wait(res) +#define SIGS_DEFINED +#endif /* VMS */ + +#if (defined(WIN32)&&!defined(_POSIX_)) +#define pid_t int +#define SET_HANDLER(proc) signal(SIGINT, proc) +#define KILL(pid) \ + TerminateProcess(OpenProcess(PROCESS_TERMINATE,FALSE,pid),3) +#if (defined (__WATCOMC__)) +#define SPAWN() spawnv(P_NOWAIT, spawn_args[0], spawn_args) +#define WAIT(res, pid) cwait(res, pid, WAIT_CHILD) +#else +#define SPAWN() _spawnv(_P_NOWAIT, spawn_args[0], spawn_args) +#define WAIT(res, pid) _cwait(res, pid, _WAIT_CHILD) +#define getpid _getpid +#endif /* WATCOMC */ +#define SIGS_DEFINED +#define PATH_SEP '\\' +#ifndef TEST_32B +#define SUPPORT_64BITS +#define DSS_HUGE __int64 +#define HUGE_COUNT 1 +#define HUGE_FORMAT "%I64d" +#endif /* TEST_32B */ +/* need to define process termination codes to match UNIX */ +/* these are copied from Linux/GNU and need to be verified as part of a rework of */ +/* process handling under NT (29 Apr 98) */ +#define WIFEXITED(s) ((s & 0xFF) == 0) +#define WIFSIGNALED(s) (((unsigned int)((status)-1) & 0xFFFF) < 0xFF) +#define WIFSTOPPED(s) (((s) & 0xff) == 0x7f) +#define WTERMSIG(s) ((s) & 0x7f) +#define WSTOPSIG(s) (((s) & 0xff00) >> 8) +#endif /* WIN32 */ + +#ifndef SIGS_DEFINED +#define KILL(pid) kill(SIGUSR1, pid) +#define SET_HANDLER(proc) signal(SIGUSR1, proc) +#define SPAWN fork +#define WAIT(res, pid) wait(res) +#endif /* DEFAULT */ + +#define DSS_PROC getpid() +#endif /* DOS */ + +#ifndef DBNAME +#define DBNAME "dss" +#endif /* DBNAME */ + +#ifndef PATH_SEP +#define PATH_SEP '/' +#endif /* PATH_SEP */ + +#ifndef DSS_HUGE +#define DSS_HUGE long +#define HUGE_COUNT 2 +#endif + +#ifndef DOUBLE_CAST +#define DOUBLE_CAST (double) +#endif /* DOUBLE_CAST */ + diff --git a/dists.dss b/dists.dss new file mode 100644 index 0000000..72157ef --- /dev/null +++ b/dists.dss @@ -0,0 +1,817 @@ +# Sccsid: @(#)dists.dss 2.1.8.1 +# +# distributions have the following format: +# +# | # comment +# +# Distributions are used to bias the selection of a token +# based on its associated weight. The list of tokens and values +# between the keywords BEGIN and END define the distribution named after +# the BEGIN. A uniformly random value from [0, sum(weights)] +# will be chosen and the first token whose cumulative weight is greater than +# or equal to the result will be returned. In essence, the weights for each +# token represent its relative weight within a distribution. +# +# one special token is defined: count (number of data points in the +# distribution). It MUST be defined for each named distribution. +#----------------------------------------------------------------------- +# currently defined distributions and their use: +# NAME FIELD/NOTES +# ======== ============== +# category parts.category +# container parts.container +# instruct shipping instructions +# msegmnt market segment +# names parts.name +# nations must be ordered along with regions +# nations2 stand alone nations set for use with qgen +# o_prio order priority +# regions must be ordered along with nations +# rflag lineitems.returnflag +# types parts.type +# colors embedded string creation; CANNOT BE USED FOR pick_str(), agg_str() perturbs order +# articles comment generation +# nouns +# verbs +# adverbs +# auxillaries +# prepositions +# terminators +# grammar sentence formation +# np +# vp +### +# category +### +BEGIN category +COUNT|5 +FURNITURE|1 +STORAGE EQUIP|1 +TOOLS|1 +MACHINE TOOLS|1 +OTHER|1 +END category +### +# container +### +begin p_cntr +count|40 +SM CASE|1 +SM BOX|1 +SM BAG|1 +SM JAR|1 +SM PACK|1 +SM PKG|1 +SM CAN|1 +SM DRUM|1 +LG CASE|1 +LG BOX|1 +LG BAG|1 +LG JAR|1 +LG PACK|1 +LG PKG|1 +LG CAN|1 +LG DRUM|1 +MED CASE|1 +MED BOX|1 +MED BAG|1 +MED JAR|1 +MED PACK|1 +MED PKG|1 +MED CAN|1 +MED DRUM|1 +JUMBO CASE|1 +JUMBO BOX|1 +JUMBO BAG|1 +JUMBO JAR|1 +JUMBO PACK|1 +JUMBO PKG|1 +JUMBO CAN|1 +JUMBO DRUM|1 +WRAP CASE|1 +WRAP BOX|1 +WRAP BAG|1 +WRAP JAR|1 +WRAP PACK|1 +WRAP PKG|1 +WRAP CAN|1 +WRAP DRUM|1 +end p_cntr +### +# instruct +### +begin instruct +count|4 +DELIVER IN PERSON|1 +COLLECT COD|1 +TAKE BACK RETURN|1 +NONE|1 +end instruct +### +# msegmnt +### +begin msegmnt +count|5 +AUTOMOBILE|1 +BUILDING|1 +FURNITURE|1 +HOUSEHOLD|1 +MACHINERY|1 +end msegmnt +### +# names +### +begin p_names +COUNT|4 +CLEANER|1 +SOAP|1 +DETERGENT|1 +EXTRA|1 +end p_names +### +# nations +# NOTE: this is a special case; the weights here are adjustments to +# map correctly into the regions table, and are *NOT* cummulative +# values to mimic a distribution +### +begin nations +count|25 +ALGERIA|0 +ARGENTINA|1 +BRAZIL|0 +CANADA|0 +EGYPT|3 +ETHIOPIA|-4 +FRANCE|3 +GERMANY|0 +INDIA|-1 +INDONESIA|0 +IRAN|2 +IRAQ|0 +JAPAN|-2 +JORDAN|2 +KENYA|-4 +MOROCCO|0 +MOZAMBIQUE|0 +PERU|1 +CHINA|1 +ROMANIA|1 +SAUDI ARABIA|1 +VIETNAM|-2 +RUSSIA|1 +UNITED KINGDOM|0 +UNITED STATES|-2 +end nations +### +# nations2 +### +begin nations2 +count|25 +ALGERIA|1 +ARGENTINA|1 +BRAZIL|1 +CANADA|1 +EGYPT|1 +ETHIOPIA|1 +FRANCE|1 +GERMANY|1 +INDIA|1 +INDONESIA|1 +IRAN|1 +IRAQ|1 +JAPAN|1 +JORDAN|1 +KENYA|1 +MOROCCO|1 +MOZAMBIQUE|1 +PERU|1 +CHINA|1 +ROMANIA|1 +SAUDI ARABIA|1 +VIETNAM|1 +RUSSIA|1 +UNITED KINGDOM|1 +UNITED STATES|1 +end nations2 +### +# regions +### +begin regions +count|5 +AFRICA|1 +AMERICA|1 +ASIA|1 +EUROPE|1 +MIDDLE EAST|1 +end regions +### +# o_prio +### +begin o_oprio +count|5 +1-URGENT|1 +2-HIGH|1 +3-MEDIUM|1 +4-NOT SPECIFIED|1 +5-LOW|1 +end o_oprio +### +# rflag +### +begin rflag +count|2 +R|1 +A|1 +end rflag +### +# smode +### +begin smode +count|7 +REG AIR|1 +AIR|1 +RAIL|1 +TRUCK|1 +MAIL|1 +FOB|1 +SHIP|1 +end smode +### +# types +### +begin p_types +COUNT|150 +STANDARD ANODIZED TIN|1 +STANDARD ANODIZED NICKEL|1 +STANDARD ANODIZED BRASS|1 +STANDARD ANODIZED STEEL|1 +STANDARD ANODIZED COPPER|1 +STANDARD BURNISHED TIN|1 +STANDARD BURNISHED NICKEL|1 +STANDARD BURNISHED BRASS|1 +STANDARD BURNISHED STEEL|1 +STANDARD BURNISHED COPPER|1 +STANDARD PLATED TIN|1 +STANDARD PLATED NICKEL|1 +STANDARD PLATED BRASS|1 +STANDARD PLATED STEEL|1 +STANDARD PLATED COPPER|1 +STANDARD POLISHED TIN|1 +STANDARD POLISHED NICKEL|1 +STANDARD POLISHED BRASS|1 +STANDARD POLISHED STEEL|1 +STANDARD POLISHED COPPER|1 +STANDARD BRUSHED TIN|1 +STANDARD BRUSHED NICKEL|1 +STANDARD BRUSHED BRASS|1 +STANDARD BRUSHED STEEL|1 +STANDARD BRUSHED COPPER|1 +SMALL ANODIZED TIN|1 +SMALL ANODIZED NICKEL|1 +SMALL ANODIZED BRASS|1 +SMALL ANODIZED STEEL|1 +SMALL ANODIZED COPPER|1 +SMALL BURNISHED TIN|1 +SMALL BURNISHED NICKEL|1 +SMALL BURNISHED BRASS|1 +SMALL BURNISHED STEEL|1 +SMALL BURNISHED COPPER|1 +SMALL PLATED TIN|1 +SMALL PLATED NICKEL|1 +SMALL PLATED BRASS|1 +SMALL PLATED STEEL|1 +SMALL PLATED COPPER|1 +SMALL POLISHED TIN|1 +SMALL POLISHED NICKEL|1 +SMALL POLISHED BRASS|1 +SMALL POLISHED STEEL|1 +SMALL POLISHED COPPER|1 +SMALL BRUSHED TIN|1 +SMALL BRUSHED NICKEL|1 +SMALL BRUSHED BRASS|1 +SMALL BRUSHED STEEL|1 +SMALL BRUSHED COPPER|1 +MEDIUM ANODIZED TIN|1 +MEDIUM ANODIZED NICKEL|1 +MEDIUM ANODIZED BRASS|1 +MEDIUM ANODIZED STEEL|1 +MEDIUM ANODIZED COPPER|1 +MEDIUM BURNISHED TIN|1 +MEDIUM BURNISHED NICKEL|1 +MEDIUM BURNISHED BRASS|1 +MEDIUM BURNISHED STEEL|1 +MEDIUM BURNISHED COPPER|1 +MEDIUM PLATED TIN|1 +MEDIUM PLATED NICKEL|1 +MEDIUM PLATED BRASS|1 +MEDIUM PLATED STEEL|1 +MEDIUM PLATED COPPER|1 +MEDIUM POLISHED TIN|1 +MEDIUM POLISHED NICKEL|1 +MEDIUM POLISHED BRASS|1 +MEDIUM POLISHED STEEL|1 +MEDIUM POLISHED COPPER|1 +MEDIUM BRUSHED TIN|1 +MEDIUM BRUSHED NICKEL|1 +MEDIUM BRUSHED BRASS|1 +MEDIUM BRUSHED STEEL|1 +MEDIUM BRUSHED COPPER|1 +LARGE ANODIZED TIN|1 +LARGE ANODIZED NICKEL|1 +LARGE ANODIZED BRASS|1 +LARGE ANODIZED STEEL|1 +LARGE ANODIZED COPPER|1 +LARGE BURNISHED TIN|1 +LARGE BURNISHED NICKEL|1 +LARGE BURNISHED BRASS|1 +LARGE BURNISHED STEEL|1 +LARGE BURNISHED COPPER|1 +LARGE PLATED TIN|1 +LARGE PLATED NICKEL|1 +LARGE PLATED BRASS|1 +LARGE PLATED STEEL|1 +LARGE PLATED COPPER|1 +LARGE POLISHED TIN|1 +LARGE POLISHED NICKEL|1 +LARGE POLISHED BRASS|1 +LARGE POLISHED STEEL|1 +LARGE POLISHED COPPER|1 +LARGE BRUSHED TIN|1 +LARGE BRUSHED NICKEL|1 +LARGE BRUSHED BRASS|1 +LARGE BRUSHED STEEL|1 +LARGE BRUSHED COPPER|1 +ECONOMY ANODIZED TIN|1 +ECONOMY ANODIZED NICKEL|1 +ECONOMY ANODIZED BRASS|1 +ECONOMY ANODIZED STEEL|1 +ECONOMY ANODIZED COPPER|1 +ECONOMY BURNISHED TIN|1 +ECONOMY BURNISHED NICKEL|1 +ECONOMY BURNISHED BRASS|1 +ECONOMY BURNISHED STEEL|1 +ECONOMY BURNISHED COPPER|1 +ECONOMY PLATED TIN|1 +ECONOMY PLATED NICKEL|1 +ECONOMY PLATED BRASS|1 +ECONOMY PLATED STEEL|1 +ECONOMY PLATED COPPER|1 +ECONOMY POLISHED TIN|1 +ECONOMY POLISHED NICKEL|1 +ECONOMY POLISHED BRASS|1 +ECONOMY POLISHED STEEL|1 +ECONOMY POLISHED COPPER|1 +ECONOMY BRUSHED TIN|1 +ECONOMY BRUSHED NICKEL|1 +ECONOMY BRUSHED BRASS|1 +ECONOMY BRUSHED STEEL|1 +ECONOMY BRUSHED COPPER|1 +PROMO ANODIZED TIN|1 +PROMO ANODIZED NICKEL|1 +PROMO ANODIZED BRASS|1 +PROMO ANODIZED STEEL|1 +PROMO ANODIZED COPPER|1 +PROMO BURNISHED TIN|1 +PROMO BURNISHED NICKEL|1 +PROMO BURNISHED BRASS|1 +PROMO BURNISHED STEEL|1 +PROMO BURNISHED COPPER|1 +PROMO PLATED TIN|1 +PROMO PLATED NICKEL|1 +PROMO PLATED BRASS|1 +PROMO PLATED STEEL|1 +PROMO PLATED COPPER|1 +PROMO POLISHED TIN|1 +PROMO POLISHED NICKEL|1 +PROMO POLISHED BRASS|1 +PROMO POLISHED STEEL|1 +PROMO POLISHED COPPER|1 +PROMO BRUSHED TIN|1 +PROMO BRUSHED NICKEL|1 +PROMO BRUSHED BRASS|1 +PROMO BRUSHED STEEL|1 +PROMO BRUSHED COPPER|1 +end p_types +### +# colors +# NOTE: This distribution CANNOT be used by pick_str(), since agg_str() perturbs its order +### +begin colors +COUNT|92 +almond|1 +antique|1 +aquamarine|1 +azure|1 +beige|1 +bisque|1 +black|1 +blanched|1 +blue|1 +blush|1 +brown|1 +burlywood|1 +burnished|1 +chartreuse|1 +chiffon|1 +chocolate|1 +coral|1 +cornflower|1 +cornsilk|1 +cream|1 +cyan|1 +dark|1 +deep|1 +dim|1 +dodger|1 +drab|1 +firebrick|1 +floral|1 +forest|1 +frosted|1 +gainsboro|1 +ghost|1 +goldenrod|1 +green|1 +grey|1 +honeydew|1 +hot|1 +indian|1 +ivory|1 +khaki|1 +lace|1 +lavender|1 +lawn|1 +lemon|1 +light|1 +lime|1 +linen|1 +magenta|1 +maroon|1 +medium|1 +metallic|1 +midnight|1 +mint|1 +misty|1 +moccasin|1 +navajo|1 +navy|1 +olive|1 +orange|1 +orchid|1 +pale|1 +papaya|1 +peach|1 +peru|1 +pink|1 +plum|1 +powder|1 +puff|1 +purple|1 +red|1 +rose|1 +rosy|1 +royal|1 +saddle|1 +salmon|1 +sandy|1 +seashell|1 +sienna|1 +sky|1 +slate|1 +smoke|1 +snow|1 +spring|1 +steel|1 +tan|1 +thistle|1 +tomato|1 +turquoise|1 +violet|1 +wheat|1 +white|1 +yellow|1 +end colors +################ +################ +## psuedo text distributions +################ +################ +### +# nouns +### +BEGIN nouns +COUNT|45 +packages|40 +requests|40 +accounts|40 +deposits|40 +foxes|20 +ideas|20 +theodolites|20 +pinto beans|20 +instructions|20 +dependencies|10 +excuses|10 +platelets|10 +asymptotes|10 +courts|5 +dolphins|5 +multipliers|1 +sauternes|1 +warthogs|1 +frets|1 +dinos|1 +attainments|1 +somas|1 +Tiresias|1 +patterns|1 +forges|1 +braids|1 +frays|1 +warhorses|1 +dugouts|1 +notornis|1 +epitaphs|1 +pearls|1 +tithes|1 +waters|1 +orbits|1 +gifts|1 +sheaves|1 +depths|1 +sentiments|1 +decoys|1 +realms|1 +pains|1 +grouches|1 +escapades|1 +hockey players|1 +END nouns +### +# verbs +### +BEGIN verbs +COUNT|40 +sleep|20 +wake|20 +are|20 +cajole|20 +haggle|20 +nag|10 +use|10 +boost|10 +affix|5 +detect|5 +integrate|5 +maintain|1 +nod|1 +was|1 +lose|1 +sublate|1 +solve|1 +thrash|1 +promise|1 +engage|1 +hinder|1 +print|1 +x-ray|1 +breach|1 +eat|1 +grow|1 +impress|1 +mold|1 +poach|1 +serve|1 +run|1 +dazzle|1 +snooze|1 +doze|1 +unwind|1 +kindle|1 +play|1 +hang|1 +believe|1 +doubt|1 +END verbs +### +# adverbs +## +BEGIN adverbs +COUNT|28 +sometimes|1 +always|1 +never|1 +furiously|50 +slyly|50 +carefully|50 +blithely|40 +quickly|30 +fluffily|20 +slowly|1 +quietly|1 +ruthlessly|1 +thinly|1 +closely|1 +doggedly|1 +daringly|1 +bravely|1 +stealthily|1 +permanently|1 +enticingly|1 +idly|1 +busily|1 +regularly|1 +finally|1 +ironically|1 +evenly|1 +boldly|1 +silently|1 +END adverbs +### +# articles +## +BEGIN articles +COUNT|3 +the|50 +a|20 +an|5 +END articles +### +# prepositions +## +BEGIN prepositions +COUNT|47 +about|50 +above|50 +according to|50 +across|50 +after|50 +against|40 +along|40 +alongside of|30 +among|30 +around|20 +at|10 +atop|1 +before|1 +behind|1 +beneath|1 +beside|1 +besides|1 +between|1 +beyond|1 +by|1 +despite|1 +during|1 +except|1 +for|1 +from|1 +in place of|1 +inside|1 +instead of|1 +into|1 +near|1 +of|1 +on|1 +outside|1 +over|1 +past|1 +since|1 +through|1 +throughout|1 +to|1 +toward|1 +under|1 +until|1 +up|1 +upon|1 +whithout|1 +with|1 +within|1 +END prepositions +### +# auxillaries +## +BEGIN auxillaries +COUNT|18 +do|1 +may|1 +might|1 +shall|1 +will|1 +would|1 +can|1 +could|1 +should|1 +ought to|1 +must|1 +will have to|1 +shall have to|1 +could have to|1 +should have to|1 +must have to|1 +need to|1 +try to|1 +END auxiallaries +### +# terminators +## +BEGIN terminators +COUNT|6 +.|50 +;|1 +:|1 +?|1 +!|1 +--|1 +END terminators +### +# adjectives +## +BEGIN adjectives +COUNT|29 +special|20 +pending|20 +unusual|20 +express|20 +furious|1 +sly|1 +careful|1 +blithe|1 +quick|1 +fluffy|1 +slow|1 +quiet|1 +ruthless|1 +thin|1 +close|1 +dogged|1 +daring|1 +brave|1 +stealthy|1 +permanent|1 +enticing|1 +idle|1 +busy|1 +regular|50 +final|40 +ironic|40 +even|30 +bold|20 +silent|10 +END adjectives +### +# grammar +# first level grammar. N=noun phrase, V=verb phrase, +# P=prepositional phrase, T=setence termination +## +BEGIN grammar +COUNT|5 +N V T|3 +N V P T|3 +N V N T|3 +N P V N T|1 +N P V P T|1 +END grammar +### +# NP +# second level grammar. Noun phrases. N=noun, A=article, +# J=adjective, D=adverb +## +BEGIN np +COUNT|4 +N|10 +J N|20 +J, J N|10 +D J N|50 +END np +### +# VP +# second level grammar. Verb phrases. V=verb, X=auxiallary, +# D=adverb +## +BEGIN vp +COUNT|4 +V|30 +X V|1 +V D|40 +X V D|1 +END vp +### +# Q13 +# Substitution parameters for Q13 +## +BEGIN Q13a +COUNT|4 +special|20 +pending|20 +unusual|20 +express|20 +END Q13a +BEGIN Q13b +COUNT|4 +packages|40 +requests|40 +accounts|40 +deposits|40 +END Q13b diff --git a/driver.c b/driver.c new file mode 100644 index 0000000..4f4c903 --- /dev/null +++ b/driver.c @@ -0,0 +1,1144 @@ +/* @(#)driver.c 2.1.8.4 */ +/* main driver for dss banchmark */ + +#define DECLARER /* EXTERN references get defined here */ +#define NO_FUNC (int (*) ()) NULL /* to clean up tdefs */ +#define NO_LFUNC (long (*) ()) NULL /* to clean up tdefs */ + +#include "config.h" +#include +#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ +#ifndef DOS +#include +#include +#endif + +#endif /* WIN32 */ +#include /* */ +#include +#include +#include +#include +#include +#include +#ifdef HP +#include +#endif +#if (defined(WIN32)&&!defined(_POSIX_)) +#include +#pragma warning(disable:4201) +#pragma warning(disable:4214) +#pragma warning(disable:4514) +#define WIN32_LEAN_AND_MEAN +#define NOATOM +#define NOGDICAPMASKS +#define NOMETAFILE +#define NOMINMAX +#define NOMSG +#define NOOPENFILE +#define NORASTEROPS +#define NOSCROLL +#define NOSOUND +#define NOSYSMETRICS +#define NOTEXTMETRIC +#define NOWH +#define NOCOMM +#define NOKANJI +#define NOMCX + +#include "windows.h" + +#pragma warning(default:4201) +#pragma warning(default:4214) +#endif + +#include "dss.h" +#include "dsstypes.h" +#include "bcd2.h" + +/* +* Function prototypes +*/ +void usage (void); +int prep_direct (char *); +int close_direct (void); +void kill_load (void); +int pload (int tbl); +void gen_tbl (int tnum, long start, long count, long upd_num); +int pr_drange (int tbl, long min, long cnt, long num); +int set_files (int t, int pload); +int partial (int, int); + + +extern int optind, opterr; +extern char *optarg; +long rowcnt = 0, minrow = 0, upd_num = 0; +double flt_scale; +#if (defined(WIN32)&&!defined(_POSIX_)) +char *spawn_args[25]; +#endif + + +/* +* general table descriptions. See dss.h for details on structure +* NOTE: tables with no scaling info are scaled according to +* another table +* +* +* the following is based on the tdef structure defined in dss.h as: +* typedef struct +* { +* char *name; -- name of the table; +* flat file output in .tbl +* long base; -- base scale rowcount of table; +* 0 if derived +* int (*header) (); -- function to prep output +* int (*loader[2]) (); -- functions to present output +* long (*gen_seed) (); -- functions to seed the RNG +* int (*verify) (); -- function to verfiy the data set without building it +* int child; -- non-zero if there is an associated detail table +* unsigned long vtotal; -- "checksum" total +* } tdef; +* +*/ + +/* +* flat file print functions; used with -F(lat) option +*/ +#ifdef SSBM +int pr_cust (customer_t * c, int mode); +int pr_part (part_t * p, int mode); +int pr_supp (supplier_t * s, int mode); +int pr_line (order_t * o, int mode); +#else +int pr_cust (customer_t * c, int mode); +int pr_line (order_t * o, int mode); +int pr_order (order_t * o, int mode); +int pr_part (part_t * p, int mode); +int pr_psupp (part_t * p, int mode); +int pr_supp (supplier_t * s, int mode); +int pr_order_line (order_t * o, int mode); +int pr_part_psupp (part_t * p, int mode); +int pr_nation (code_t * c, int mode); +int pr_region (code_t * c, int mode); +#endif + +/* +* inline load functions; used with -D(irect) option +*/ +#ifdef SSBM +int ld_cust (customer_t * c, int mode); +int ld_part (part_t * p, int mode); +int ld_supp (supplier_t * s, int mode); + +/*todo: get rid of ld_order*/ +int ld_line (order_t * o, int mode); +int ld_order (order_t * o, int mode); + +#else +int ld_cust (customer_t * c, int mode); +int ld_line (order_t * o, int mode); +int ld_order (order_t * o, int mode); +int ld_part (part_t * p, int mode); +int ld_psupp (part_t * p, int mode); +int ld_supp (supplier_t * s, int mode); +int ld_order_line (order_t * o, int mode); +int ld_part_psupp (part_t * p, int mode); +int ld_nation (code_t * c, int mode); +int ld_region (code_t * c, int mode); +#endif + +/* +* seed generation functions; used with '-O s' option +*/ +#ifdef SSBM +long sd_cust (int child, long skip_count); +long sd_part (int child, long skip_count); +long sd_supp (int child, long skip_count); + +long sd_line (int child, long skip_count); +long sd_order (int child, long skip_count); + +#else +long sd_cust (int child, long skip_count); +long sd_line (int child, long skip_count); +long sd_order (int child, long skip_count); +long sd_part (int child, long skip_count); +long sd_psupp (int child, long skip_count); +long sd_supp (int child, long skip_count); +long sd_order_line (int child, long skip_count); +long sd_part_psupp (int child, long skip_count); +#endif + +/* +* header output functions); used with -h(eader) option +*/ +#ifdef SSBM +int hd_cust (FILE * f); +int hd_part (FILE * f); +int hd_supp (FILE * f); +int hd_line (FILE * f); + +#else +int hd_cust (FILE * f); +int hd_line (FILE * f); +int hd_order (FILE * f); +int hd_part (FILE * f); +int hd_psupp (FILE * f); +int hd_supp (FILE * f); +int hd_order_line (FILE * f); +int hd_part_psupp (FILE * f); +int hd_nation (FILE * f); +int hd_region (FILE * f); +#endif + +/* +* data verfication functions; used with -O v option +*/ +#ifdef SSBM +int vrf_cust (customer_t * c, int mode); +int vrf_part (part_t * p, int mode); +int vrf_supp (supplier_t * s, int mode); +int vrf_line (order_t * o, int mode); +int vrf_order (order_t * o, int mode); +int vrf_date (date_t,int mode); +#else +int vrf_cust (customer_t * c, int mode); +int vrf_line (order_t * o, int mode); +int vrf_order (order_t * o, int mode); +int vrf_part (part_t * p, int mode); +int vrf_psupp (part_t * p, int mode); +int vrf_supp (supplier_t * s, int mode); +int vrf_order_line (order_t * o, int mode); +int vrf_part_psupp (part_t * p, int mode); +int vrf_nation (code_t * c, int mode); +int vrf_region (code_t * c, int mode); +#endif + + +#ifdef SSBM +tdef tdefs[] = +{ + + {"part.tbl", "part table", 200000, hd_part, + {pr_part, ld_part}, sd_part, vrf_part, PSUPP, 0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {"supplier.tbl", "suppliers table", 2000, hd_supp, + {pr_supp, ld_supp}, sd_supp, vrf_supp, NONE, 0}, + + {"customer.tbl", "customers table", 30000, hd_cust, + {pr_cust, ld_cust}, sd_cust, vrf_cust, NONE, 0}, + {"date.tbl","date table",2556,0,{pr_date,ld_date}, 0,vrf_date, NONE,0}, + /*line order is SF*1,500,000, however due to the implementation + the base here is 150,000 instead if 1500,000*/ + {"lineorder.tbl", "lineorder table", 150000, hd_line, + {pr_line, ld_line}, sd_line, vrf_line, NONE, 0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {0,0,0,0,{0,0}, 0,0,0,0}, + {0,0,0,0,{0,0}, 0,0,0,0}, +}; + +#else + +tdef tdefs[] = +{ + {"part.tbl", "part table", 200000, hd_part, + {pr_part, ld_part}, sd_part, vrf_part, PSUPP, 0}, + {"partsupp.tbl", "partsupplier table", 200000, hd_psupp, + {pr_psupp, ld_psupp}, sd_psupp, vrf_psupp, NONE, 0}, + {"supplier.tbl", "suppliers table", 10000, hd_supp, + {pr_supp, ld_supp}, sd_supp, vrf_supp, NONE, 0}, + {"customer.tbl", "customers table", 150000, hd_cust, + {pr_cust, ld_cust}, sd_cust, vrf_cust, NONE, 0}, + {"orders.tbl", "order table", 150000, hd_order, + {pr_order, ld_order}, sd_order, vrf_order, LINE, 0}, + {"lineitem.tbl", "lineitem table", 150000, hd_line, + {pr_line, ld_line}, sd_line, vrf_line, NONE, 0}, + {"orders.tbl", "orders/lineitem tables", 150000, hd_order_line, + {pr_order_line, ld_order_line}, sd_order, vrf_order_line, LINE, 0}, + {"part.tbl", "part/partsupplier tables", 200000, hd_part_psupp, + {pr_part_psupp, ld_part_psupp}, sd_part, vrf_part_psupp, PSUPP, 0}, + {"nation.tbl", "nation table", NATIONS_MAX, hd_nation, + {pr_nation, ld_nation}, NO_LFUNC, vrf_nation, NONE, 0}, + {"region.tbl", "region table", NATIONS_MAX, hd_region, + {pr_region, ld_region}, NO_LFUNC, vrf_region, NONE, 0}, +}; +#endif +int *pids; + + +/* +* routines to handle the graceful cleanup of multi-process loads +*/ + +void +stop_proc (int signum) +{ + exit (0); +} + +void +kill_load (void) +{ + int i; + +#if !defined(U2200) && !defined(DOS) + for (i = 0; i < children; i++) + if (pids[i]) + KILL (pids[i]); +#endif /* !U2200 && !DOS */ + return; +} + +/* +* re-set default output file names +*/ +int +set_files (int i, int pload) +{ + char line[80], *new_name; + + if (table & (1 << i)) +child_table: + { + if (pload != -1) + sprintf (line, "%s.%d", tdefs[i].name, pload); + else + { + printf ("Enter new destination for %s data: ", + tdefs[i].name); + if (fgets (line, sizeof (line), stdin) == NULL) + return (-1);; + if ((new_name = strchr (line, '\n')) != NULL) + *new_name = '\0'; + if (strlen (line) == 0) + return (0); + } + new_name = (char *) malloc (strlen (line) + 1); + MALLOC_CHECK (new_name); + strcpy (new_name, line); + tdefs[i].name = new_name; + if (tdefs[i].child != NONE) + { + i = tdefs[i].child; + tdefs[i].child = NONE; + goto child_table; + } + } + + return (0); +} + + + +/* +* read the distributions needed in the benchamrk +*/ +void +load_dists (void) +{ + read_dist (env_config (DIST_TAG, DIST_DFLT), "p_cntr", &p_cntr_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "colors", &colors); + read_dist (env_config (DIST_TAG, DIST_DFLT), "p_types", &p_types_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "nations", &nations); + read_dist (env_config (DIST_TAG, DIST_DFLT), "regions", ®ions); + read_dist (env_config (DIST_TAG, DIST_DFLT), "o_oprio", + &o_priority_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "instruct", + &l_instruct_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "smode", &l_smode_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "category", + &l_category_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "rflag", &l_rflag_set); + read_dist (env_config (DIST_TAG, DIST_DFLT), "msegmnt", &c_mseg_set); + + /* load the distributions that contain text generation */ + read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns); + read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs); + read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives); + read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs); + read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries); + read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators); + read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles); + read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions); + read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar); + read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np); + read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp); + +} + +/* +* generate a particular table +*/ +void +gen_tbl (int tnum, long start, long count, long upd_num) +{ + static order_t o; + supplier_t supp; + customer_t cust; + part_t part; +#ifdef SSBM + date_t dt; +#else + code_t code; +#endif + static int completed = 0; + static int init = 0; + long i; + + int rows_per_segment=0; + int rows_this_segment=-1; + int residual_rows=0; + + if (insert_segments) + { + rows_per_segment = count / insert_segments; + residual_rows = count - (rows_per_segment * insert_segments); + } + + if (init == 0) + { + INIT_HUGE(o.okey); + for (i=0; i < O_LCNT_MAX; i++) +#ifdef SSBM + INIT_HUGE(o.lineorders[i].okey); +#else + INIT_HUGE(o.l[i].okey); +#endif + init = 1; + } + + for (i = start; count; count--, i++) + { + LIFENOISE (1000, i); + row_start(tnum); + + switch (tnum) + { + case LINE: +#ifdef SSBM +#else + case ORDER: + case ORDER_LINE: +#endif + mk_order (i, &o, upd_num % 10000); + + if (insert_segments && (upd_num > 0)) + if((upd_num / 10000) < residual_rows) + { + if((++rows_this_segment) > rows_per_segment) + { + rows_this_segment=0; + upd_num += 10000; + } + } + else + { + if((++rows_this_segment) >= rows_per_segment) + { + rows_this_segment=0; + upd_num += 10000; + } + } + + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&o, 0); + else + tdefs[tnum].loader[direct] (&o, upd_num); + break; + case SUPP: + mk_supp (i, &supp); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&supp, 0); + else + tdefs[tnum].loader[direct] (&supp, upd_num); + break; + case CUST: + mk_cust (i, &cust); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&cust, 0); + else + tdefs[tnum].loader[direct] (&cust, upd_num); + break; +#ifdef SSBM + case PART: +#else + case PSUPP: + case PART: + case PART_PSUPP: +#endif + mk_part (i, &part); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&part, 0); + else + tdefs[tnum].loader[direct] (&part, upd_num); + break; +#ifdef SSBM + case DATE: + mk_date (i, &dt); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&dt, 0); + else + tdefs[tnum].loader[direct] (&dt, 0); + break; +#else + case NATION: + mk_nation (i, &code); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&code, 0); + else + tdefs[tnum].loader[direct] (&code, 0); + break; + case REGION: + mk_region (i, &code); + if (set_seeds == 0) + if (validate) + tdefs[tnum].verify(&code, 0); + else + tdefs[tnum].loader[direct] (&code, 0); + break; +#endif + } + row_stop(tnum); + if (set_seeds && (i % tdefs[tnum].base) < 2) + { + printf("\nSeeds for %s at rowcount %ld\n", tdefs[tnum].comment, i); + dump_seeds(tnum); + } + } + completed |= 1 << tnum; +} + + + +void +usage (void) +{ +#ifdef SSBM + fprintf (stderr, "%s\n%s\n\t%s\n%s %s\n\n", + "USAGE:", + "dbgen [-{vfFD}] [-O {fhmsv}][-T {pcsdla}]", + "[-s ][-C ][-S ]", + "dbgen [-v] [-O {dfhmr}] [-s ]", + "[-U ] [-r ]"); + +#else + fprintf (stderr, "%s\n%s\n\t%s\n%s %s\n\n", + "USAGE:", + "dbgen [-{vfFD}] [-O {fhmsv}][-T {pcsoPSOL}]", + "[-s ][-C ][-S ]", + "dbgen [-v] [-O {dfhmr}] [-s ]", + "[-U ] [-r ]"); +#endif + fprintf (stderr, "-b -- load distributions for \n"); + fprintf (stderr, "-C -- use processes to generate data\n"); + fprintf (stderr, " [Under DOS, must be used with -S]\n"); + fprintf (stderr, "-D -- do database load in line\n"); + fprintf (stderr, "-d -- split deletes between files\n"); + fprintf (stderr, "-f -- force. Overwrite existing files\n"); + fprintf (stderr, "-F -- generate flat files output\n"); + fprintf (stderr, "-h -- display this message\n"); + fprintf (stderr, "-i -- split inserts between files\n"); + fprintf (stderr, "-n -- inline load into database \n"); + fprintf (stderr, "-O d -- generate SQL syntax for deletes\n"); + fprintf (stderr, "-O f -- over-ride default output file names\n"); + fprintf (stderr, "-O h -- output files with headers\n"); + fprintf (stderr, "-O m -- produce columnar output\n"); + fprintf (stderr, "-O r -- generate key ranges for deletes.\n"); + fprintf (stderr, "-O v -- Verify data set without generating it.\n"); + fprintf (stderr, "-q -- enable QUIET mode\n"); + fprintf (stderr, "-r -- updates refresh (n/100)%% of the\n"); + fprintf (stderr, " data set\n"); + fprintf (stderr, "-s -- set Scale Factor (SF) to \n"); + fprintf (stderr, "-S -- build the th step of the data/update set\n"); + +#ifdef SSBM + fprintf (stderr, "-T c -- generate cutomers dimension table ONLY\n"); + fprintf (stderr, "-T p -- generate parts dimension table ONLY\n"); + fprintf (stderr, "-T s -- generate suppliers dimension table ONLY\n"); + fprintf (stderr, "-T d -- generate date dimension table ONLY\n"); + fprintf (stderr, "-T l -- generate lineorder fact table ONLY\n"); +#else + fprintf (stderr, "-T c -- generate cutomers ONLY\n"); + fprintf (stderr, "-T l -- generate nation/region ONLY\n"); + fprintf (stderr, "-T L -- generate lineitem ONLY\n"); + fprintf (stderr, "-T n -- generate nation ONLY\n"); + fprintf (stderr, "-T o -- generate orders/lineitem ONLY\n"); + fprintf (stderr, "-T O -- generate orders ONLY\n"); + fprintf (stderr, "-T p -- generate parts/partsupp ONLY\n"); + fprintf (stderr, "-T P -- generate parts ONLY\n"); + fprintf (stderr, "-T r -- generate region ONLY\n"); + fprintf (stderr, "-T s -- generate suppliers ONLY\n"); + fprintf (stderr, "-T S -- generate partsupp ONLY\n"); +#endif + + fprintf (stderr, "-U -- generate update sets\n"); + fprintf (stderr, "-v -- enable VERBOSE mode\n"); + fprintf (stderr, + "\nTo generate the SF=1 (1GB), validation database population, use:\n"); + fprintf (stderr, "\tdbgen -vfF -s 1\n"); + fprintf (stderr, "\nTo generate updates for a SF=1 (1GB), use:\n"); + fprintf (stderr, "\tdbgen -v -U 1 -s 1\n"); +} + +/* +* pload() -- handle the parallel loading of tables +*/ +/* +* int partial(int tbl, int s) -- generate the s-th part of the named tables data +*/ +int +partial (int tbl, int s) +{ + long rowcnt; + long extra; + + if (verbose > 0) + { + fprintf (stderr, "\tStarting to load stage %d of %d for %s...", + s, children, tdefs[tbl].comment); + } + + if (direct == 0) + set_files (tbl, s); + + rowcnt = set_state(tbl, scale, children, s, &extra); + + if (s == children) + gen_tbl (tbl, rowcnt * (s - 1) + 1, rowcnt + extra, upd_num); + else + gen_tbl (tbl, rowcnt * (s - 1) + 1, rowcnt, upd_num); + + if (verbose > 0) + fprintf (stderr, "done.\n"); + + return (0); +} + +#ifndef DOS + +int +pload (int tbl) +{ + int c = 0, i, status; + + if (verbose > 0) + { + fprintf (stderr, "Starting %d children to load %s", + children, tdefs[tbl].comment); + } + for (c = 0; c < children; c++) + { + pids[c] = SPAWN (); + if (pids[c] == -1) + { + perror ("Child loader not created"); + kill_load (); + exit (-1); + } + else if (pids[c] == 0) /* CHILD */ + { + SET_HANDLER (stop_proc); + verbose = 0; + partial (tbl, c+1); + exit (0); + } + else if (verbose > 0) /* PARENT */ + fprintf (stderr, "."); + } + + if (verbose > 0) + fprintf (stderr, "waiting..."); + + c = children; + while (c) + { + i = WAIT (&status, pids[c - 1]); + if (i == -1 && children) + { + if (errno == ECHILD) + fprintf (stderr, "\nCould not wait on pid %d\n", pids[c - 1]); + else if (errno == EINTR) + fprintf (stderr, "\nProcess %d stopped abnormally\n", pids[c - 1]); + else if (errno == EINVAL) + fprintf (stderr, "\nProgram bug\n"); + } + if (! WIFEXITED(status)) { + (void) fprintf(stderr, "\nProcess %d: ", i); + if (WIFSIGNALED(status)) { + (void) fprintf(stderr, "rcvd signal %d\n", + WTERMSIG(status)); + } else if (WIFSTOPPED(status)) { + (void) fprintf(stderr, "stopped, signal %d\n", + WSTOPSIG(status)); + } + + } + c--; + } + + if (verbose > 0) + fprintf (stderr, "done\n"); + return (0); +} +#endif + + +void +process_options (int count, char **vector) +{ + int option; + + while ((option = getopt (count, vector, + "b:C:Dd:Ffi:hn:O:P:qr:s:S:T:U:v")) != -1) + switch (option) + { + case 'b': /* load distributions from named file */ + d_path = (char *)malloc(strlen(optarg) + 1); + MALLOC_CHECK(d_path); + strcpy(d_path, optarg); + break; + case 'q': /* all prompts disabled */ + verbose = -1; + break; + case 'i': + insert_segments = atoi (optarg); + break; + case 'd': + delete_segments = atoi (optarg); + break; + case 'S': /* generate a particular STEP */ + step = atoi (optarg); + break; + case 'v': /* life noises enabled */ + verbose = 1; + break; + case 'f': /* blind overwrites; Force */ + force = 1; + break; + case 'T': /* generate a specifc table */ + switch (*optarg) + { +#ifdef SSBM + case 'c': /* generate customer ONLY */ + table = 1 << CUST; + break; + case 'p': /* generate part ONLY */ + table = 1 << PART; + break; + case 's': /* generate partsupp ONLY */ + table = 1 << SUPP; + break; + case 'd': /* generate date ONLY */ + table = 1 << DATE; + break; + case 'l': /* generate lineorder table ONLY */ + table = 1 << LINE; + break; + case 'a': + table = 1 << CUST; + table |= 1 << PART; + table |= 1 << SUPP; + table |= 1 << DATE; + table |= 1 << LINE; + break; +#else + case 'c': /* generate customer ONLY */ + table = 1 << CUST; + break; + case 'L': /* generate lineitems ONLY */ + table = 1 << LINE; + break; + case 'l': /* generate code table ONLY */ + table = 1 << NATION; + table |= 1 << REGION; + break; + case 'n': /* generate nation table ONLY */ + table = 1 << NATION; + break; + case 'O': /* generate orders ONLY */ + table = 1 << ORDER; + break; + case 'o': /* generate orders/lineitems ONLY */ + table = 1 << ORDER_LINE; + break; + case 'P': /* generate part ONLY */ + table = 1 << PART; + break; + case 'p': /* generate part/partsupp ONLY */ + table = 1 << PART_PSUPP; + break; + case 'r': /* generate region table ONLY */ + table = 1 << REGION; + break; + case 'S': /* generate partsupp ONLY */ + table = 1 << PSUPP; + break; + case 's': /* generate suppliers ONLY */ + table = 1 << SUPP; + break; +#endif + default: + fprintf (stderr, "Unknown table name %s\n", + optarg); + usage (); + exit (1); + } + break; + case 's': /* scale by Percentage of base rowcount */ + case 'P': /* for backward compatibility */ + flt_scale = atof (optarg); + if (flt_scale < MIN_SCALE) + { + int i; + + scale = 1; + for (i = PART; i < REGION; i++) + { + tdefs[i].base *= flt_scale; + if (tdefs[i].base < 1) + tdefs[i].base = 1; + } + } + else + scale = (long) flt_scale; + if (scale > MAX_SCALE) + { + fprintf (stderr, "%s %5.0f %s\n\t%s\n\n", + "NOTE: Data generation for scale factors >", + MAX_SCALE, + "GB is still in development,", + "and is not yet supported.\n"); + fprintf (stderr, + "Your resulting data set MAY NOT BE COMPLIANT!\n"); + } + break; + case 'O': /* optional actions */ + switch (tolower (*optarg)) + { + case 'd': /* generate SQL for deletes */ + gen_sql = 1; + break; + case 'f': /* over-ride default file names */ + fnames = 1; + break; + case 'h': /* generate headers */ + header = 1; + break; + case 'm': /* generate columnar output */ + columnar = 1; + break; + case 'r': /* generate key ranges for delete */ + gen_rng = 1; + break; + case 's': /* calibrate the RNG usage */ + set_seeds = 1; + break; + case 'v': /* validate the data set */ + validate = 1; + break; + default: + fprintf (stderr, "Unknown option name %s\n", + optarg); + usage (); + exit (1); + } + break; + case 'D': /* direct load of generated data */ + direct = 1; + break; + case 'F': /* generate flat files for later loading */ + direct = 0; + break; + case 'U': /* generate flat files for update stream */ + updates = atoi (optarg); + break; + case 'r': /* set the refresh (update) percentage */ + refresh = atoi (optarg); + break; +#ifndef DOS + case 'C': + children = atoi (optarg); + break; +#endif /* !DOS */ + case 'n': /* set name of database for direct load */ + db_name = (char *) malloc (strlen (optarg) + 1); + MALLOC_CHECK (db_name); + strcpy (db_name, optarg); + break; + default: + printf ("ERROR: option '%c' unknown.\n", + *(vector[optind] + 1)); + case 'h': /* something unexpected */ + fprintf (stderr, + "%s Population Generator (Version %d.%d.%d%s)\n", + NAME, VERSION, RELEASE, + MODIFICATION, PATCH); + fprintf (stderr, "Copyright %s %s\n", TPC, C_DATES); + usage (); + exit (1); + } + +#ifndef DOS + if (children != 1 && step == -1) + { + pids = malloc(children * sizeof(pid_t)); + MALLOC_CHECK(pids) + } +#else + if (children != 1 && step < 0) + { + fprintf(stderr, "ERROR: -C must be accompanied by -S on this platform\n"); + exit(1); + } +#endif /* DOS */ + + return; +} + +/* +* MAIN +* +* assumes the existance of getopt() to clean up the command +* line handling +*/ +int +main (int ac, char **av) +{ + int i; + + table = (1 << CUST) | + (1 << SUPP) | + (1 << NATION) | + (1 << REGION) | + (1 << PART_PSUPP) | + (1 << ORDER_LINE); + force = 0; + insert_segments=0; + delete_segments=0; + insert_orders_segment=0; + insert_lineitem_segment=0; + delete_segment=0; + verbose = 0; + columnar = 0; + set_seeds = 0; + header = 0; + direct = 0; + scale = 1; + flt_scale = 1.0; + updates = 0; + refresh = UPD_PCT; + step = -1; +#ifdef SSBM + tdefs[LINE].base *= + ORDERS_PER_CUST; /* have to do this after init */ +#else + tdefs[ORDER].base *= + ORDERS_PER_CUST; /* have to do this after init */ + tdefs[LINE].base *= + ORDERS_PER_CUST; /* have to do this after init */ + tdefs[ORDER_LINE].base *= + ORDERS_PER_CUST; /* have to do this after init */ +#endif + fnames = 0; + db_name = NULL; + gen_sql = 0; + gen_rng = 0; + children = 1; + d_path = NULL; + +#ifdef NO_SUPPORT + signal (SIGINT, exit); +#endif /* NO_SUPPORT */ + process_options (ac, av); +#if (defined(WIN32)&&!defined(_POSIX_)) + for (i = 0; i < ac; i++) + { + spawn_args[i] = malloc ((strlen (av[i]) + 1) * sizeof (char)); + MALLOC_CHECK (spawn_args[i]); + strcpy (spawn_args[i], av[i]); + } + spawn_args[ac] = NULL; +#endif + + if (verbose >= 0) + { + fprintf (stderr, + "%s Population Generator (Version %d.%d.%d%s)\n", + NAME, VERSION, RELEASE, MODIFICATION, PATCH); + fprintf (stderr, "Copyright %s %s\n", TPC, C_DATES); + } + + load_dists (); + /* have to do this after init */ + tdefs[NATION].base = nations.count; + tdefs[REGION].base = regions.count; + + /* + * updates are never parallelized + */ + if (updates) + { + /* + * set RNG to start generating rows beyond SF=scale + */ + double fix1; + +#ifdef SSBM + set_state (LINE, scale, 1, 2, (long *)&i); + fix1 = (double)tdefs[LINE].base / (double)10000; /*represent the %% percentage (n/100)%*/ +#else + set_state (ORDER, scale, 1, 2, (long *)&i); + fix1 = (double)tdefs[ORDER_LINE].base / (double)10000; +#endif + rowcnt = (int)(fix1 * scale * refresh); + if (step > 0) + { + /* + * adjust RNG for any prior update generation + */ + sd_order(0, rowcnt * (step - 1)); + sd_line(0, rowcnt * (step - 1)); + upd_num = step - 1; + } + else + upd_num = 0; + + while (upd_num < updates) + { + if (verbose > 0) +#ifdef SSBM + fprintf (stderr, + "Generating update pair #%d for %s [pid: %d]", + upd_num + 1, tdefs[LINE].comment, DSS_PROC); +#else + fprintf (stderr, + "Generating update pair #%d for %s [pid: %d]", + upd_num + 1, tdefs[ORDER_LINE].comment, DSS_PROC); + +#endif + insert_orders_segment=0; + insert_lineitem_segment=0; + delete_segment=0; + minrow = upd_num * rowcnt + 1; +#ifdef SSBM + gen_tbl (LINE, minrow, rowcnt, upd_num + 1); +#else + gen_tbl (ORDER_LINE, minrow, rowcnt, upd_num + 1); +#endif + if (verbose > 0) + fprintf (stderr, "done.\n"); +#ifdef SSBM + pr_drange (LINE, minrow, rowcnt, upd_num + 1); +#else + pr_drange (ORDER_LINE, minrow, rowcnt, upd_num + 1); +#endif + upd_num++; + } + + exit (0); + } + + /** + ** actual data generation section starts here + **/ +/* + * open database connection or set all the file names, as appropriate + */ + if (direct) + prep_direct ((db_name) ? db_name : DBNAME); + else if (fnames) + for (i = PART; i <= REGION; i++) + { + if (table & (1 << i)) + if (set_files (i, -1)) + { + fprintf (stderr, "Load aborted!\n"); + exit (1); + } + } + +/* + * traverse the tables, invoking the appropriate data generation routine for any to be built + */ + for (i = PART; i <= REGION; i++) + if (table & (1 << i)) + { + if (children > 1 && i < NATION) + if (step >= 0) + { + if (validate) + { + INTERNAL_ERROR("Cannot validate parallel data generation"); + } + else + partial (i, step); + } +#ifdef DOS + else + { + fprintf (stderr, + "Parallel load is not supported on your platform.\n"); + exit (1); + } +#else + else + { + if (validate) + { + INTERNAL_ERROR("Cannot validate parallel data generation"); + } + else + pload (i); + } +#endif /* DOS */ + else + { + minrow = 1; + if (i < NATION) + rowcnt = tdefs[i].base * scale; + else + rowcnt = tdefs[i].base; +#ifdef SSBM + if(i==PART){ + rowcnt = tdefs[i].base * (floor(1+log((double)(scale))/(log(2)))); + } + if(i==DATE){ + rowcnt = tdefs[i].base; + } +#endif + if (verbose > 0) + fprintf (stderr, "%s data for %s [pid: %ld]", + (validate)?"Validating":"Generating", tdefs[i].comment, DSS_PROC); + gen_tbl (i, minrow, rowcnt, upd_num); + if (verbose > 0) + fprintf (stderr, "done.\n"); + } + if (validate) + printf("Validation checksum for %s at %d GB: %0x\n", + tdefs[i].name, scale, tdefs[i].vtotal); + } + + if (direct) + close_direct (); + + return (0); +} + + + + + + + + + + + diff --git a/dss.ddl b/dss.ddl new file mode 100644 index 0000000..2fe3e70 --- /dev/null +++ b/dss.ddl @@ -0,0 +1,70 @@ +-- Sccsid: @(#)dss.ddl 2.1.8.1 +CREATE TABLE TPCD.NATION ( N_NATIONKEY INTEGER NOT NULL, + N_NAME CHAR(25) NOT NULL, + N_REGIONKEY INTEGER NOT NULL, + N_COMMENT VARCHAR(152)); + +CREATE TABLE TPCD.REGION ( R_REGIONKEY INTEGER NOT NULL, + R_NAME CHAR(25) NOT NULL, + R_COMMENT VARCHAR(152)); + +CREATE TABLE TPCD.PART ( P_PARTKEY INTEGER NOT NULL, + P_NAME VARCHAR(55) NOT NULL, + P_MFGR CHAR(25) NOT NULL, + P_BRAND CHAR(10) NOT NULL, + P_TYPE VARCHAR(25) NOT NULL, + P_SIZE INTEGER NOT NULL, + P_CONTAINER CHAR(10) NOT NULL, + P_RETAILPRICE DECIMAL(15,2) NOT NULL, + P_COMMENT VARCHAR(23) NOT NULL ); + +CREATE TABLE TPCD.SUPPLIER ( S_SUPPKEY INTEGER NOT NULL, + S_NAME CHAR(25) NOT NULL, + S_ADDRESS VARCHAR(40) NOT NULL, + S_NATIONKEY INTEGER NOT NULL, + S_PHONE CHAR(15) NOT NULL, + S_ACCTBAL DECIMAL(15,2) NOT NULL, + S_COMMENT VARCHAR(101) NOT NULL); + +CREATE TABLE TPCD.PARTSUPP ( PS_PARTKEY INTEGER NOT NULL, + PS_SUPPKEY INTEGER NOT NULL, + PS_AVAILQTY INTEGER NOT NULL, + PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, + PS_COMMENT VARCHAR(199) NOT NULL ); + +CREATE TABLE TPCD.CUSTOMER ( C_CUSTKEY INTEGER NOT NULL, + C_NAME VARCHAR(25) NOT NULL, + C_ADDRESS VARCHAR(40) NOT NULL, + C_NATIONKEY INTEGER NOT NULL, + C_PHONE CHAR(15) NOT NULL, + C_ACCTBAL DECIMAL(15,2) NOT NULL, + C_MKTSEGMENT CHAR(10) NOT NULL, + C_COMMENT VARCHAR(117) NOT NULL); + +CREATE TABLE TPCD.ORDERS ( O_ORDERKEY INTEGER NOT NULL, + O_CUSTKEY INTEGER NOT NULL, + O_ORDERSTATUS CHAR(1) NOT NULL, + O_TOTALPRICE DECIMAL(15,2) NOT NULL, + O_ORDERDATE DATE NOT NULL, + O_ORDERPRIORITY CHAR(15) NOT NULL, -- R + O_CLERK CHAR(15) NOT NULL, -- R + O_SHIPPRIORITY INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL); + +CREATE TABLE TPCD.LINEITEM ( L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, -- R + L_SHIPMODE CHAR(10) NOT NULL, -- R + L_COMMENT VARCHAR(44) NOT NULL); + diff --git a/dss.h b/dss.h new file mode 100644 index 0000000..8f78d89 --- /dev/null +++ b/dss.h @@ -0,0 +1,610 @@ +/* + * Sccsid: @(#)dss.h 2.1.8.5 + * + * general definitions and control information for the DSS code + * generator; if it controls the data set, it's here + */ +#ifndef DSS_H +#define DSS_H + +#ifdef SSBM +#define NAME "SSBM (Star Schema Benchmark)" +#define VERSION 1 +#define RELEASE 0 +#define MODIFICATION 0 +#define PATCH "" + + +/*global variables*/ +/*SSBM added DATE table*/ +#define DATE 4 + +/*SSBM use the lineorder without partsupp and order table*/ +#define L_SKEY_MIN 1 +#define L_SKEY_MAX (tdefs[SUPP].base * scale) + +#endif + +#ifdef TPCH +#define NAME "TPC-H" +#define VERSION 1 +#define RELEASE 3 +#define MODIFICATION 0 +#define PATCH "" +#endif +#ifdef TPCR +#define NAME "TPC-R" +#define VERSION 1 +#define RELEASE 3 +#define MODIFICATION 0 +#define PATCH "" +#endif +#ifndef NAME +#error Benchmark version must be defined in config.h +#endif +#define TPC "Transaction Processing Performance Council" +#define C_DATES "1994 - 2000" + +#include "config.h" +#include "shared.h" + +#include +#include +#ifdef SSBM +#include +#endif + +#define NONE -1 +#define PART 0 +#define PSUPP 1 +#define SUPP 2 +#define CUST 3 +#define ORDER 4 +#define LINE 5 +#define ORDER_LINE 6 +#define PART_PSUPP 7 +#define NATION 8 +#define REGION 9 +#define UPDATE 10 +#define MAX_TABLE 11 +#define ONE_STREAM 1 +#define ADD_AT_END 2 + +#ifdef MAX +#undef MAX +#endif +#ifdef MIN +#undef MIN +#endif +#define MAX(a,b) ((a > b )?a:b) +#define MIN(A,B) ( (A) < (B) ? (A) : (B)) + +#define INTERNAL_ERROR(p) {fprintf(stderr,"%s", p);abort();} +#define LN_CNT 4 +static char lnoise[4] = {'|', '/', '-', '\\' }; +#define LIFENOISE(n, var) \ + if (verbose > 0) fprintf(stderr, "%c\b", lnoise[(var%LN_CNT)]) + +#define MALLOC_CHECK(var) \ + if ((var) == NULL) \ + { \ + fprintf(stderr, "Malloc failed at %s:%d\n", \ + __FILE__, __LINE__); \ + exit(1);\ + } +#define OPEN_CHECK(var, path) \ + if ((var) == NULL) \ + { \ + fprintf(stderr, "Open failed for %s at %s:%d\n", \ + path, __FILE__, __LINE__); \ + exit(1);\ + } +#ifndef MAX_CHILDREN +#define MAX_CHILDREN 1000 +#endif + +/* + * macros that control sparse keys + * + * refer to Porting.Notes for a complete explanation + */ +#ifndef BITS_PER_LONG +#define BITS_PER_LONG 32 +#define MAX_LONG 0x7FFFFFFF +#endif /* BITS_PER_LONG */ +#define SPARSE_BITS 2 +#define SPARSE_KEEP 3 +#define MK_SPARSE(key, seq) \ + (((((key>>3)<<2)|(seq & 0x0003))<<3)|(key & 0x0007)) + +#define RANDOM(tgt, lower, upper, stream) dss_random(&tgt, lower, upper, stream) +#ifdef SSBM +typedef struct{ + char * name; + int start_day; + int start_month; + int end_day; + int end_month; +} season; +typedef struct { + char * name; + int month; + int day; +} holiday; + + +#endif + + +typedef struct +{ + long weight; + char *text; +} set_member; + +typedef struct +{ + int count; + int max; + set_member *list; + long *permute; +} distribution; + +/* + * some handy access functions + */ +#define DIST_SIZE(d) d->count +#define DIST_MEMBER(d, i) ((set_member *)((d)->list + i))->text + +typedef struct +{ + char *name; + char *comment; + long base; + int (*header) (); + int (*loader[2]) (); + long (*gen_seed)(); + int (*verify) (); + int child; + unsigned long vtotal; +} tdef; + +typedef struct SEED_T { + long table; + long value; + long usage; + long boundary; + } seed_t; + + +#if defined(__STDC__) +#define PROTO(s) s +#else +#define PROTO(s) () +#endif + +/* bm_utils.c */ +char *env_config PROTO((char *var, char *dflt)); +long yes_no PROTO((char *prompt)); +int a_rnd PROTO((int min, int max, int column, char *dest)); +int tx_rnd PROTO((long min, long max, long column, char *tgt)); +long julian PROTO((long date)); +long unjulian PROTO((long date)); +FILE *tbl_open PROTO((int tbl, char *mode)); +long dssncasecmp PROTO((char *s1, char *s2, int n)); +long dsscasecmp PROTO((char *s1, char *s2)); +int pick_str PROTO((distribution * s, int c, char *target)); +void agg_str PROTO((distribution *set, long count, long col, char *dest)); +void read_dist PROTO((char *path, char *name, distribution * target)); +void embed_str PROTO((distribution *d, int min, int max, int stream, char *dest)); +#ifndef STDLIB_HAS_GETOPT +int getopt PROTO((int arg_cnt, char **arg_vect, char *oprions)); +#endif /* STDLIB_HAS_GETOPT */ +long set_state PROTO((int t, long scale, long procs, long step, long *e)); + +/* rnd.c */ +long NextRand PROTO((long nSeed)); +long UnifInt PROTO((long nLow, long nHigh, long nStream)); +double UnifReal PROTO((double dLow, double dHigh, long nStream)); +double Exponential PROTO((double dMean, long nStream)); +void dss_random(long *tgt, long min, long max, long seed); +void row_start(int t); +void row_stop(int t); +void dump_seeds(int t); + +/* text.c */ +#define MAX_GRAMMAR_LEN 12 /* max length of grammar component */ +#define MAX_SENT_LEN 256 /* max length of populated sentence */ +#define RNG_PER_SENT 27 /* max number of RNG calls per sentence */ + +int dbg_text PROTO((char * t, int min, int max, int s)); + +#ifdef DECLARER +#define EXTERN +#else +#define EXTERN extern +#endif /* DECLARER */ + + +EXTERN distribution nations; +EXTERN distribution nations2; +EXTERN distribution regions; +EXTERN distribution o_priority_set; +EXTERN distribution l_instruct_set; +EXTERN distribution l_smode_set; +EXTERN distribution l_category_set; +EXTERN distribution l_rflag_set; +EXTERN distribution c_mseg_set; +EXTERN distribution colors; +EXTERN distribution p_types_set; +EXTERN distribution p_cntr_set; + +/* distributions that control text generation */ +EXTERN distribution articles; +EXTERN distribution nouns; +EXTERN distribution adjectives; +EXTERN distribution adverbs; +EXTERN distribution prepositions; +EXTERN distribution verbs; +EXTERN distribution terminators; +EXTERN distribution auxillaries; +EXTERN distribution np; +EXTERN distribution vp; +EXTERN distribution grammar; + + +EXTERN long scale; +EXTERN int refresh; +EXTERN int resume; +EXTERN long verbose; +EXTERN long force; +EXTERN long header; +EXTERN long columnar; +EXTERN long direct; +EXTERN long updates; +EXTERN long table; +EXTERN long children; +EXTERN long fnames; +EXTERN int gen_sql; +EXTERN int gen_rng; +EXTERN char *db_name; +EXTERN int step; +EXTERN int set_seeds; +EXTERN int validate; +EXTERN char *d_path; + +/* added for segmented updates */ +EXTERN int insert_segments; +EXTERN int delete_segments; +EXTERN int insert_orders_segment; +EXTERN int insert_lineitem_segment; +EXTERN int delete_segment; + + +#ifndef DECLARER +extern tdef tdefs[]; + +#endif /* DECLARER */ + + +/***************************************************************** + ** table level defines use the following naming convention: t_ccc_xxx + ** with: t, a table identifier + ** ccc, a column identifier + ** xxx, a limit type + **************************************************************** + */ + +/* + * defines which control the parts table + */ +#define P_SIZE 126 +#ifdef SSBM +#define P_NAME_SCL 3 /*5 change to 3 according to the new schema*/ +#else +#define P_NAME_SCL 5 +#endif +#define P_MFG_TAG "Manufacturer#" +#define P_MFG_FMT "%s%01d" +#define P_MFG_MIN 1 +#define P_MFG_MAX 5 +#define P_BRND_TAG "Brand#" +#define P_BRND_FMT "%s%02d" +#define P_BRND_MIN 1 + +/*#ifdef SSBM +#define P_BRND_MAX 5 +#else*/ +#define P_BRND_MAX 40 +/*#endif*/ + +#define P_SIZE_MIN 1 +#define P_SIZE_MAX 50 +#define P_MCST_MIN 100 +#define P_MCST_MAX 99900 +#define P_MCST_SCL 100.0 +#define P_RCST_MIN 90000 +#define P_RCST_MAX 200000 +#define P_RCST_SCL 100.0 +/* + * defines which control the suppliers table + */ +#define S_SIZE 145 +#define S_NAME_TAG "Supplier#" +#define S_NAME_FMT "%s%09ld" +#define S_ABAL_MIN -99999 +#define S_ABAL_MAX 999999 +#define S_CMNT_MAX 101 +#define S_CMNT_BBB 10 /* number of BBB comments/SF */ +#define BBB_DEADBEATS 50 /* % that are complaints */ +#define BBB_BASE "Customer " +#define BBB_COMPLAIN "Complaints" +#define BBB_COMMEND "Recommends" +#define BBB_CMNT_LEN 19 +#define BBB_BASE_LEN 9 +#define BBB_TYPE_LEN 10 + +/* + * defines which control the partsupp table + */ +#define PS_SIZE 145 +#define PS_SKEY_MIN 0 +#define PS_SKEY_MAX ((tdefs[SUPP].base - 1) * scale) +#define PS_SCST_MIN 100 +#define PS_SCST_MAX 100000 +#define PS_QTY_MIN 1 +#define PS_QTY_MAX 9999 +/* + * defines which control the customers table + */ +#define C_SIZE 165 +#define C_NAME_TAG "Customer#" +#define C_NAME_FMT "%s%09d" +#define C_MSEG_MAX 5 +#define C_ABAL_MIN -99999 +#define C_ABAL_MAX 999999 +/* + * defines which control the order table + */ +#define O_SIZE 109 +#define O_CKEY_MIN 1 +#define O_CKEY_MAX (long)(tdefs[CUST].base * scale) +#define O_ODATE_MIN STARTDATE +#define O_ODATE_MAX (STARTDATE + TOTDATE - \ + (L_SDTE_MAX + L_RDTE_MAX) - 1) +#define O_CLRK_TAG "Clerk#" +#define O_CLRK_FMT "%s%09d" +#define O_CLRK_SCL 1000 +#define O_LCNT_MIN 1 +#define O_LCNT_MAX 7 + +/* + * defines which control the lineitem table + */ +#define L_SIZE 144L +#define L_QTY_MIN 1 +#define L_QTY_MAX 50 +#define L_TAX_MIN 0 +#define L_TAX_MAX 8 +#define L_DCNT_MIN 0 +#define L_DCNT_MAX 10 +#define L_PKEY_MIN 1 + +#ifdef SSBM +/*part table log based*/ +#define L_PKEY_MAX (tdefs[PART].base * (floor(log((double)scale))+1)) +#else +#define L_PKEY_MAX (tdefs[PART].base * scale) +#endif + +#define L_SDTE_MIN 1 +#define L_SDTE_MAX 121 +#define L_CDTE_MIN 30 +#define L_CDTE_MAX 90 +#define L_RDTE_MIN 1 +#define L_RDTE_MAX 30 +/* + * defines which control the time table + */ +#define T_SIZE 30 +#define T_START_DAY 3 /* wednesday ? */ +#define LEAP(y) ((!(y % 4) && (y % 100))?1:0) + +/******************************************************************* + ******************************************************************* + *** + *** general or inter table defines + *** + ******************************************************************* + *******************************************************************/ +#define SUPP_PER_PART 4 +#define ORDERS_PER_CUST 10 /* sync this with CUST_MORTALITY */ +#define CUST_MORTALITY 3 /* portion with have no orders */ +#define NATIONS_MAX 90 /* limited by country codes in phone numbers */ +#define PHONE_FMT "%02d-%03d-%03d-%04d" +#define STARTDATE 92001 +#define CURRENTDATE 95168 +#define ENDDATE 98365 +#define TOTDATE 2557 +#define UPD_PCT 10 +#define MAX_STREAM 47 +#define V_STR_LOW 0.4 +#define PENNIES 100 /* for scaled int money arithmetic */ +#define Q11_FRACTION (double)0.0001 +/* + * max and min SF in GB; Larger SF will require changes to the build routines + */ +#define MIN_SCALE 1.0 +#define MAX_SCALE 1000.0 +/* + * beyond this point we need to allow for BCD calculations + */ +#define MAX_32B_SCALE 1000.0 +#define INIT_HUGE(v) { \ + v = (DSS_HUGE *)malloc(sizeof(DSS_HUGE) * HUGE_COUNT); \ + MALLOC_CHECK(v); \ + } +#define FREE_HUGE(v) free(v) +#ifdef SUPPORT_64BITS +#define LONG2HUGE(src, dst) *dst = (DSS_HUGE)src +#define HUGE2LONG(src, dst) *dst = (long)src +#define HUGE_SET(src, dst) *dst = *src +#define HUGE_MUL(op1, op2) *op1 *= op2 +#define HUGE_DIV(op1, op2) *op1 /= op2 +#define HUGE_ADD(op1, op2, dst) *dst = *op1 + op2 +#define HUGE_SUB(op1, op2, dst) *dst = *op1 - op2 +#define HUGE_MOD(op1, op2) *op1 % op2 +#define HUGE_CMP(op1, op2) (*op1 == *op2)?0:(*op1 < *op2)-1:1 +#else +#define LONG2HUGE(src, dst) {*dst = src; *(dst + 1) = 0;} +#define HUGE2LONG(src, dst) { dst=0 ; \ + bcd2_bin(dst, (src + 1)); \ + bcd2_bin(dst, src); } +#define HUGE_SET(src, dst) { *dst = *src ; *(dst + 1) = *(src + 1); } +#define HUGE_MUL(op1,op2) bcd2_mul(op1, (op1 + 1), op2) +#define HUGE_DIV(op1,op2) bcd2_div(op1, (op1 + 1), op2) +#define HUGE_ADD(op1,op2,d) { \ + HUGE_SET(op1, d); \ + bcd2_add(d, (d + 1), op2); \ + } +#define HUGE_SUB(op1,op2,d) { \ + HUGE_SET(op1, d); \ + bcd2_sub(d, (d + 1), op2); \ + } +#define HUGE_MOD(op1, op2) bcd2_mod(op1, (op1 + 1), op2) +#define HUGE_CMP(op1, op2) (bcd2_cmp(op1, (op1 + 1), op2) == 0)?0:\ + ((bcd2_cmp(op1, (op1 + 1), op2) < 0)?-1:1) +#endif /* SUPPORT_64BITS */ + +/******** environmental variables and defaults ***************/ +#define DIST_TAG "DSS_DIST" /* environment var to override ... */ +#define DIST_DFLT "dists.dss" /* default file to hold distributions */ +#define PATH_TAG "DSS_PATH" /* environment var to override ... */ +#define PATH_DFLT "." /* default directory to hold tables */ +#define CONFIG_TAG "DSS_CONFIG" /* environment var to override ... */ +#define CONFIG_DFLT "." /* default directory to config files */ +#define ADHOC_TAG "DSS_ADHOC" /* environment var to override ... */ +#define ADHOC_DFLT "adhoc.dss" /* default file name for adhoc vars */ + +/******* output macros ********/ +#ifndef SEPARATOR +#define SEPARATOR '|' /* field spearator for generated flat files */ +#endif +/* Data type flags for a single print routine */ +#define DT_STR 0 +#ifndef MVS +#define DT_VSTR DT_STR +#else +#define DT_VSTR 1 +#endif /* MVS */ +#define DT_INT 2 +#define DT_HUGE 3 +#define DT_KEY 4 +#define DT_MONEY 5 +#define DT_CHR 6 + +int dbg_print(int dt, FILE *tgt, void *data, int len, int eol); +#define PR_STR(f, str, len) dbg_print(DT_STR, f, (void *)str, len, 1) +#define PR_VSTR(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 1) +#define PR_VSTR_LAST(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 0) +#define PR_INT(f, str) dbg_print(DT_INT, f, (void *)str, 0, 1) +#define PR_HUGE(f, str) dbg_print(DT_HUGE, f, (void *)str, 0, 1) +#define PR_KEY(f, str) dbg_print(DT_KEY, f, (void *)str, 0, -1) +#define PR_MONEY(f, str) dbg_print(DT_MONEY, f, (void *)str, 0, 1) +#define PR_CHR(f, str) dbg_print(DT_CHR, f, (void *)str, 0, 1) +#define PR_STRT(fp) /* any line prep for a record goes here */ +#define PR_END(fp) fprintf(fp, "\n") /* finish the record here */ + +#ifdef SSBM +#define PR_DATE(tgt, yr, mn, dy) \ + sprintf(tgt, "19%02d%02d%02d", yr, mn, dy) +#else +#ifdef MDY_DATE +#define PR_DATE(tgt, yr, mn, dy) \ + sprintf(tgt, "%02d-%02d-19%02d", mn, dy, yr) +#else +#define PR_DATE(tgt, yr, mn, dy) \ +sprintf(tgt, "19%02d-%02d-%02d", yr, mn, dy) +#endif /* DATE_FORMAT */ +#endif +/* + * verification macros + */ +#define VRF_STR(t, d) {char *xx = d; while (*xx) tdefs[t].vtotal += *xx++;} +#define VRF_INT(t,d) tdefs[t].vtotal += d +#ifdef SUPPORT_64BITS +#define VRF_HUGE(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1)) +#else +#define VRF_HUGE(t,d) tdefs[t].vtotal += d[0] + d[1] +#endif /* SUPPORT_64BITS */ +/* assume float is a 64 bit quantity */ +#define VRF_MONEY(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1)) +#define VRF_CHR(t,d) tdefs[t].vtotal += d +#define VRF_STRT(t) +#define VRF_END(t) + +/*********** distribuitons currently defined *************/ +#define UNIFORM 0 + +/* + * seed indexes; used to separate the generation of individual columns + */ +#define P_MFG_SD 0 +#define P_BRND_SD 1 +#define P_TYPE_SD 2 +#define P_SIZE_SD 3 +#define P_CNTR_SD 4 +#define P_RCST_SD 5 +#define PS_QTY_SD 7 +#define PS_SCST_SD 8 +#define O_SUPP_SD 10 +#define O_CLRK_SD 11 +#define O_ODATE_SD 13 +#define L_QTY_SD 14 +#define L_DCNT_SD 15 +#define L_TAX_SD 16 +#define L_SHIP_SD 17 +#define L_SMODE_SD 18 +#define L_PKEY_SD 19 +#define L_SKEY_SD 20 +#define L_SDTE_SD 21 +#define L_CDTE_SD 22 +#define L_RDTE_SD 23 +#define L_RFLG_SD 24 +#define C_NTRG_SD 27 +#define C_PHNE_SD 28 +#define C_ABAL_SD 29 +#define C_MSEG_SD 30 +#define S_NTRG_SD 33 +#define S_PHNE_SD 34 +#define S_ABAL_SD 35 +#define P_NAME_SD 37 +#define O_PRIO_SD 38 +#define HVAR_SD 39 +#define O_CKEY_SD 40 +#define N_CMNT_SD 41 +#define R_CMNT_SD 42 +#define O_LCNT_SD 43 +#define BBB_JNK_SD 44 +#define BBB_TYPE_SD 45 +#define BBB_CMNT_SD 46 +#define BBB_OFFSET_SD 47 + +#endif /* DSS_H */ + + + + + + + + + + + + + + + + + diff --git a/dss.ri b/dss.ri new file mode 100644 index 0000000..fb4c002 --- /dev/null +++ b/dss.ri @@ -0,0 +1,100 @@ +-- Sccsid: @(#)dss.ri 2.1.8.1 +-- TPCD Benchmark Version 8.0 + +CONNECT TO TPCD; + +--ALTER TABLE TPCD.REGION DROP PRIMARY KEY; +--ALTER TABLE TPCD.NATION DROP PRIMARY KEY; +--ALTER TABLE TPCD.PART DROP PRIMARY KEY; +--ALTER TABLE TPCD.SUPPLIER DROP PRIMARY KEY; +--ALTER TABLE TPCD.PARTSUPP DROP PRIMARY KEY; +--ALTER TABLE TPCD.ORDERS DROP PRIMARY KEY; +--ALTER TABLE TPCD.LINEITEM DROP PRIMARY KEY; +--ALTER TABLE TPCD.CUSTOMER DROP PRIMARY KEY; + + +-- For table REGION +ALTER TABLE TPCD.REGION +ADD PRIMARY KEY (R_REGIONKEY); + +-- For table NATION +ALTER TABLE TPCD.NATION +ADD PRIMARY KEY (N_NATIONKEY); + +ALTER TABLE TPCD.NATION +ADD FOREIGN KEY NATION_FK1 (N_REGIONKEY) references TPCD.REGION; + +COMMIT WORK; + +-- For table PART +ALTER TABLE TPCD.PART +ADD PRIMARY KEY (P_PARTKEY); + +COMMIT WORK; + +-- For table SUPPLIER +ALTER TABLE TPCD.SUPPLIER +ADD PRIMARY KEY (S_SUPPKEY); + +ALTER TABLE TPCD.SUPPLIER +ADD FOREIGN KEY SUPPLIER_FK1 (S_NATIONKEY) references TPCD.NATION; + +COMMIT WORK; + +-- For table PARTSUPP +ALTER TABLE TPCD.PARTSUPP +ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY); + +COMMIT WORK; + +-- For table CUSTOMER +ALTER TABLE TPCD.CUSTOMER +ADD PRIMARY KEY (C_CUSTKEY); + +ALTER TABLE TPCD.CUSTOMER +ADD FOREIGN KEY CUSTOMER_FK1 (C_NATIONKEY) references TPCD.NATION; + +COMMIT WORK; + +-- For table LINEITEM +ALTER TABLE TPCD.LINEITEM +ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER); + +COMMIT WORK; + +-- For table ORDERS +ALTER TABLE TPCD.ORDERS +ADD PRIMARY KEY (O_ORDERKEY); + +COMMIT WORK; + +-- For table PARTSUPP +ALTER TABLE TPCD.PARTSUPP +ADD FOREIGN KEY PARTSUPP_FK1 (PS_SUPPKEY) references TPCD.SUPPLIER; + +COMMIT WORK; + +ALTER TABLE TPCD.PARTSUPP +ADD FOREIGN KEY PARTSUPP_FK2 (PS_PARTKEY) references TPCD.PART; + +COMMIT WORK; + +-- For table ORDERS +ALTER TABLE TPCD.ORDERS +ADD FOREIGN KEY ORDERS_FK1 (O_CUSTKEY) references TPCD.CUSTOMER; + +COMMIT WORK; + +-- For table LINEITEM +ALTER TABLE TPCD.LINEITEM +ADD FOREIGN KEY LINEITEM_FK1 (L_ORDERKEY) references TPCD.ORDERS; + +COMMIT WORK; + +ALTER TABLE TPCD.LINEITEM +ADD FOREIGN KEY LINEITEM_FK2 (L_PARTKEY,L_SUPPKEY) references + TPCD.PARTSUPP; + +COMMIT WORK; + + diff --git a/dsstypes.h b/dsstypes.h new file mode 100644 index 0000000..ce2b7d8 --- /dev/null +++ b/dsstypes.h @@ -0,0 +1,312 @@ + /* + * Sccsid: @(#)dsstypes.h 2.1.8.1 + * + * general definitions and control information for the DSS data types + * and function prototypes + * Modified for SSBM prototype + */ + +/* + * typedefs + */ +#ifdef SSBM +typedef struct +{ + long custkey; + char name[C_NAME_LEN + 1]; + int nlen; + char address[C_ADDR_MAX + 1]; + int alen; + char city[CITY_FIX+1]; + int nation_key; + char nation_name[C_NATION_NAME_LEN+1]; + int region_key; + char region_name[C_REGION_NAME_LEN+1]; + char phone[PHONE_LEN + 1]; + char mktsegment[MAXAGG_LEN + 1]; +} customer_t; +#else +typedef struct +{ + long custkey; + char name[C_NAME_LEN + 1]; + char address[C_ADDR_MAX + 1]; + int alen; + long nation_code; + char phone[PHONE_LEN + 1]; + long acctbal; + char mktsegment[MAXAGG_LEN + 1]; + char comment[C_CMNT_MAX + 1]; + int clen; +} customer_t; +#endif + +/* customers.c */ +long mk_cust PROTO((long n_cust, customer_t * c)); +int pr_cust PROTO((customer_t * c, int mode)); +int ld_cust PROTO((customer_t * c, int mode)); + +#ifdef SSBM + +typedef struct +{ + DSS_HUGE *okey; /*for clustering line items*/ + int linenumber; /*integer, constrain to max of 7*/ + long custkey; + long partkey; + long suppkey; + char orderdate[DATE_LEN]; + char opriority[MAXAGG_LEN + 1]; + long ship_priority; + long quantity; + long extended_price; + long order_totalprice; + long discount; + long revenue; + long supp_cost; + long tax; + char commit_date[DATE_LEN] ; + char shipmode[O_SHIP_MODE_LEN + 1]; +} lineorder_t; +#else +typedef struct +{ + DSS_HUGE *okey; + long partkey; + long suppkey; + long lcnt; + long quantity; + long eprice; + long discount; + long tax; + char rflag[1]; + char lstatus[1]; + char cdate[DATE_LEN]; + char sdate[DATE_LEN]; + char rdate[DATE_LEN]; + char shipinstruct[MAXAGG_LEN + 1]; + char shipmode[MAXAGG_LEN + 1]; + char comment[L_CMNT_MAX + 1]; + int clen; +} line_t; +#endif + +#ifdef SSBM +typedef struct +{ + DSS_HUGE *okey; + long custkey; + int totalprice; + char odate[DATE_LEN]; + char opriority[MAXAGG_LEN + 1]; + char clerk[O_CLRK_LEN + 1]; + int spriority; + long lines; + lineorder_t lineorders[O_LCNT_MAX]; +} order_t; +#else +typedef struct +{ + DSS_HUGE *okey; + long custkey; + char orderstatus; + long totalprice; + char odate[DATE_LEN]; + char opriority[MAXAGG_LEN + 1]; + char clerk[O_CLRK_LEN + 1]; + long spriority; + long lines; + char comment[O_CMNT_MAX + 1]; + int clen; + line_t l[O_LCNT_MAX]; +} order_t; +#endif + +/* order.c */ +long mk_order PROTO((long index, order_t * o, long upd_num)); +int pr_order PROTO((order_t * o, int mode)); +int ld_order PROTO((order_t * o, int mode)); +void ez_sparse PROTO((long index, DSS_HUGE *ok, long seq)); +#ifndef SUPPORT_64BITS +void hd_sparse PROTO((long index, DSS_HUGE *ok, long seq)); +#endif + +#ifdef SSBM +/*SSBM removes the part supplier table*/ +#else +typedef struct +{ + long partkey; + long suppkey; + long qty; + long scost; + char comment[PS_CMNT_MAX + 1]; + int clen; +} partsupp_t; +#endif + +#ifdef SSBM +typedef struct +{ + long partkey; + char name[P_NAME_LEN + 1]; + int nlen; + char mfgr[P_MFG_LEN + 1]; + char category[P_CAT_LEN + 1]; + char brand[P_BRND_LEN + 1]; + char color[P_COLOR_MAX + 1]; + int clen; + char type[P_TYPE_MAX + 1]; + int tlen; + long size; + char container[P_CNTR_LEN + 1]; +} part_t; +#else +typedef struct +{ + long partkey; + char name[P_NAME_LEN + 1]; + int nlen; + char mfgr[P_MFG_LEN + 1]; + char brand[P_BRND_LEN + 1]; + char type[P_TYPE_LEN + 1]; + int tlen; + long size; + char container[P_CNTR_LEN + 1]; + long retailprice; + char comment[P_CMNT_MAX + 1]; + int clen; + partsupp_t s[SUPP_PER_PART]; +} part_t; +#endif + +/* parts.c */ +long mk_part PROTO((long index, part_t * p)); +int pr_part PROTO((part_t * part, int mode)); +int ld_part PROTO((part_t * part, int mode)); + +#ifdef SSBM +typedef struct +{ + long suppkey; + char name[S_NAME_LEN + 1]; + char address[S_ADDR_MAX + 1]; + int alen; + char city[CITY_FIX +1]; + int nation_key; + char nation_name[S_NATION_NAME_LEN+1]; + int region_key; + char region_name[S_REGION_NAME_LEN+1]; + char phone[PHONE_LEN + 1]; +} supplier_t; +#else +typedef struct +{ + long suppkey; + char name[S_NAME_LEN + 1]; + char address[S_ADDR_MAX + 1]; + int alen; + long nation_code; + char phone[PHONE_LEN + 1]; + long acctbal; + char comment[S_CMNT_MAX + 1]; + int clen; +} supplier_t; +#endif + +/* supplier.c */ +long mk_supp PROTO((long index, supplier_t * s)); +int pr_supp PROTO((supplier_t * supp, int mode)); +int ld_supp PROTO((supplier_t * supp, int mode)); + +#ifdef SSBM +/*todo: add new date table*/ + +typedef struct +{ + long datekey; + char date[D_DATE_LEN+1]; + char dayofweek[D_DAYWEEK_LEN+1] ; + char month[D_MONTH_LEN+1]; + int year; + int yearmonthnum; + char yearmonth[D_YEARMONTH_LEN+1]; + int daynuminweek; + int daynuminmonth; + int daynuminyear; + int monthnuminyear; + int weeknuminyear; + char sellingseason[D_SEASON_LEN + 1]; + int slen; + char lastdayinweekfl[2]; + char lastdayinmonthfl[2]; + char holidayfl[2]; + char weekdayfl[2]; +} date_t; + +/* date.c */ + +long mk_date PROTO((long index, date_t * d)); +int pr_date PROTO((date_t * date, int mode)); +int ld_date PROTO((date_t * date, int mode)); + +#endif + +typedef struct +{ + long timekey; + char alpha[DATE_LEN]; + long year; + long month; + long week; + long day; +} dss_time_t; + +/* time.c */ +long mk_time PROTO((long index, dss_time_t * t)); + + + +/* + * this assumes that N_CMNT_LEN >= R_CMNT_LEN + */ +typedef struct +{ + long code; + char *text; + long join; + char comment[N_CMNT_MAX + 1]; + int clen; +} code_t; + +/* code table */ +int mk_nation PROTO((long i, code_t * c)); +int pr_nation PROTO((code_t * c, int mode)); +int ld_nation PROTO((code_t * c, int mode)); +int mk_region PROTO((long i, code_t * c)); +int pr_region PROTO((code_t * c, int mode)); +int ld_region PROTO((code_t * c, int mode)); + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/history.html b/history.html new file mode 100644 index 0000000..e9dfd0d --- /dev/null +++ b/history.html @@ -0,0 +1,586 @@ + + + + + + +DBGEN Release Notes + + + + + +

Release notes for DBGEN and QGEN

+
+These notes are taken from the History file which is distributed with the TPC-D soft appendix.

+ +

    +
  • Changes as of 5/11/00
      +
    • versions: TPCH 1.3.0, TPCR 1.3.0 +
    • Corrected update range rollover after 1000 update segements +
    • Corrected problem in permute.c causing invalid substitutions in Q16 +
    +
  • Changes as of 10/11/99
      +
    • versions: TPCH 1.2.0a, TPCR 1.1.0a +
    • Corrected range setting of segmented updates that was causing extra file to be generated +
    • Porting corrections for DigUnix +
    +
  • Changes as of 08/30/99
      +
    • versions: TPCH 1.2.0, TPCR 1.1.0 +
    • reduced parameter substitution range for Q18 +
    • added new option to specify location of dists file (-b) +
    • added DBGEN option to suppress all output (-q) +
    +
  • Changes as of 08/16/99
      +
    • versions: TPCH 1.1.0a, TPCR 1.0.1e +
    • prevent "reuse" of original data in update files +
    • correction to lint target in makefile.suite +
    • removal of vestigal l_partkey predicate from 21.sql +
    • reorder lineitem/order join in q5 +
    • removal of table aliases from 2.sql +
    • randomize seeding of qgen RNG to close bug 52 +
    • correct possible round off error in segmented update files +
    • corrected soft copy answer set for Q22 +
    • corrected percision of answer set for Q19 +
    +
  • Changes as of 07/08/99
      +
    • versions: TPCH 1.1.0, TPCR 1.0.1 +
    • WORKLOAD must be set to either TPCH or TPCR in the makefile +
    • unneeded reference to part table removed from q21 template +
    +
  • Changes as of 06/04/99
      +
    • version 1.0.1d +
    • Restarted version numbering to match specification revisions for + TPC-H and TPC-R +
    • Corrected answer set for for Q13 +
    • Corrected parameter substitutions for Q16, Q17, Q19, Q20, Q21, Q22 +
    • Corrected RNG initialization in qgen.c +
    • added adhoc.c adhoc.h to code base to support randomized data sets; + currently disabled +
    • replaced calls to UnifInt() row_stop with call to NthElement() +
    • Corrected a problem that caused small negative money values to + print as a positive value +
    • Simplification of PR_xxx macros +
    • QGEN building correct parameter logs again +
    + +
+

+ +******************
+* NOTE NOTE NOTE *
+******************
+Below this line the file refers to TPC-D which was retired in favor of +TPC-H and TPC-R. Since the new speicifications are numbered from 1.0.0 +the program version was reset.
+******************
+* NOTE NOTE NOTE *
+******************
+
+

+

    + +
  • Changes as of 01/05/99
      +
    • version 2.0.1 +
    • added 1999 to the copyright notice +
    • corrected C++ compilation problem +
    • sub-select phrasing corrected in Q4, Q21, Q22 +
    • added support for segmenting update files (contributed by Larry Kemp, HP) +
    +
  • Changes as of 12/08/98
      +
    • version 2.0.0 +
    • Removed permute.h from clean target in makefile.suite +
    +
  • Changes as of 11/17/98
      +
    • version 2.0.0 Alpha 8 +
    • corrected o_custkey overrun bug +
    • removed upper bound on -C command option +
    • added static permute.h to distribution to match the specification +
    +
  • Changes as of 10/23/98
      +
    • version 2.0.0 Alpha 7 +
    • removed references to DSS_SEED and SEED_TAG +
    • minor query template cleanup +
    • V2 answer sets added +
    • correction to hd_sparse for SF > 300 +
    • added static declaration to row types in gen_tbl to fix update problem +
    • permuted params to Q22 +
    +
  • Changes as of 5/20/98
      +
    • version 2.0.0 Alpha6b +
    • removed trailing apostrophe from dists.dss nouns for Tandem loader +
    • corrected mk_sparse() problem with alpha6 +
    • added 64b support for NCR/Metaware +
    • corrected generation of parent/child tables in parallel +
    • renamed ORDER table to ORDERS table +
    • revision of DBGEN synced with revision of 2.0 specification +
    • portability changes to process termination provided by John Matzka +
    • portability changes for Watcom C provided by Andrew Eisenberg +
    • standardized query template format +
    • queries now include a consistant header format +
    +
  • Changes as of 4/28/98
      +
    • version 2.0.0 Alpha5 +
    • NO RELEASE OF ALPHA 5 ; skipped to sync spec/DBGEN revision levels
    +
  • Changes as of 7 April 98
      +
    • version 2.0.0 Alpha4 +
    • Query template corrections for Q9, Q12, Q15a, Q22 +
    • Parallel generation of parent/child tables fixed +
    • Minor corrections to dists.dss +
    • Portability changes for HPUX +
    + +
  • Changes as of 3/24/98
      +
    • version 2.0.0 Alpha3 +
    • include substitution parameters for Q22 +
    • correct substitution parameters for Q16 under AIX +
    • include permute.h until unix/NT makefile fix +
    • correct orderkey generation +
    +
  • Changes as of 3/20/98
      +
    • version 2.0.0 Alpha2 +
    • correct runtime malloc error from bad INIT_HUGE macro +
    • improve pseudo text distribution in comments +
    • fix problem with parallelism of data gen +
    • re-enable generation of parent/child tables +
    • remove recombinaton code for parallel flat files +
    +
  • Changes as of 3/11/98
      +
    • version 2.0.0 Alpha1 +
    • removed the TIME table +
    • removed the need for seed files +
    • made 1GB the validation database size +
    • add pseudo text support in comments +
    • correct character selection in a_rnd() +
    • correct population of P_NAME +
    • removed unclaimed variants +
    • added new queries 18-22, replaced Q13 +
  • Changes as of 2/6/98
      +
    • version 1.3.1 +
    • Revised 64 bit support to clean up bcd2_bin()and mk_sparse() +
    • Add 64b support for NT +
  • Changes as of 12/31/97
      +
    • version 1.3.0 +
    • support for seed generation > 1TB (data gen still to be tested) +
    • rework of 64b support +
    • added bcd support for subtraction, comparison, modulo +
    • added 1998 to the copyright notice +
    • clarified comments in dists.dss +
    • corrected substitution problem in Q11 +
    • standardized fopen() error messages with OPEN_CHECK() +
    • introduced PATH_SEP in config.h to allow changes in path separators +
  • Changes as of 12/15/96
      +
    • version 1.2.0 +
    • corrected typos in queries 8a, 8c, 8d, 11a, 12F and 14F, 17a +
    • added variant 15c +
    • defined MAX_SCALE and MIN_SCALE; issued error messages for SF >& 1000 +since implementation is incomplete +
    • seed file generation can now be resumed with dbgen -R < n > ... +
    • corrected slight compile bug under Solaris 2.5.1 +
    • documented compile problems under SunOS +
  • Changes as of 8/1/96
      +
    • version 1.1.0D +
    • included new variants for queries 8 and 15 +
    • re-introduced answer sets in the source tree +
  • Changes as of 5/1/96
      +
    • version 1.1.0C +
    • unified version numbering of DBGEN and QGEN +
    • updated BUGS list +
    • removed FAQ from soft appendix; web site will keep the current +version of the FAQ +
    • added 1996 to the copyright notice +
    • corrected bug in PR_DATE macro; NO CHANGE TO DATA SET +
    • properly initialize param values for cleaner logging +
    • adjusted output format of Q11 partam to allow scaling to 1TB +
    • corrected typos in variant 14c +
    • corrected data type for YEAR in variant 8c +
    • corrected typos in variant 10a +
    • added variant 8d +
  • Changes as of 1/23/96
      +
    • qgen version 1.1.0B +
    • include support for ANSI semantics +
    • improved patch for seed sensetivity +
  • Changes as of 1/23/96
      +
    • updated BUGS list +
    • dbgen version 1.1.0A +
    • patch to limit BCD2 fields to 12 characters for columnar output +
    • qgen version 1.1.0A +
    • patch to fix the "unknown flag" problem +
    • patch to fix the seed sensetivity problem +
  • Changes as of 12/19/95
      +
    • updated BUGS list +
    • dbgen version 1.1.0 +
    • upped default value of MAX_CHILDREN to 1000 +
    • corrected naming of detail tables in incremental load +
    • corrected range delete output +
    • forced delete files to truncate existing files +
    • removed fixed size tables from seed generation +
    • corrected overflow problem with large scale seed generation +
    • allow date generation as MM-DD-YY based on config.h #define +
    • correct truncation problem with columnar output in PR_VSTR() +
    • added support for Windows NT +
    • added PLATFORM macro to makefile, removed platform defines from +config.h +
    • removed MAX_CHILDREN define from config.h (set to 1000 in dss.h) +
    • qgen version 1.1.0 +
    • correct SET_OUTPUT macro to TDAT +
    • use %ld in output for q17; portability +
    • add support for SQLSERVER database dialect +
    • add support for SYBASE database dialect +
    • adjust parameter ranges for Q1, Q3, Q6 +
    • add -T/-t option to usage summary +
    • added support for Windows NT +
  • Changes as of 09/01/95
      +
    • qgen version 1.0.1 +
    • formalized version numbering +
    • -p now generates correct query permutations +
    • added separate verion number for qgen +
    • corrected Q3 substitution problem +
    • updated permissible range for Q10 +
    • corrected rowcount_dflt and the MAX row indicator (-1) +
    • expanded param logging to include all possible parameters +
    • allowed qgen's -d option to be used at all scale factors +
    • made parameter substitution permutation-independent +
    • added qgen suppport for END_TRAN (-E) and DFLT_NUM (-N) +
    • correct handling of :n directive +
    • added more complete explanation of QGEN to README +
    • rename of random to rndm, for portability +
    • dbgen version 1.0.1 +
    • formalized version numbering +
    • inclusion of SF=1 seed file +
    • correct typo in usage() update example +
    • patch to driver.c to allow correct updates +
    • documentation change to README to clarify seed/stage/update +intereaction +
    • corrected minor glitch in "open failed" error msg in print.c +
    • added missing line continuation to makefile.suite +
    • seed files are now based on scale factor and number of generators +
    • seed files now hold seeds for one "step" of a given build +
    • clean up of parallel load routines +
    • inclusion of faster seed generation routines from Susanne Englert +
    • removed the -E(xisting) option +
    • assure proper scaling of O_CUSTKEY +
    • corrected default update percentage +
    • proper handling of child tables with '-O f' +
    • removed seed files from the distribution +
    • modified rpb_routine() to limit contribution of partkey in +retailprice +
    • added '-S(tep)' option to allow multi-stage loads +
    • roll in of 32 bit speed_seed routines from Dick Shelton +
    • miscelaneous typo corrections in the documentation +
    • cleanup of usage output +
  • Changes as of 05/08/95
      +
    • version 1.0 +
    • add Teradata defines to tpcd.h for QGEN +
    • add :c to query templates for database CONNECT syntax +
    • add examples of DBGEN and QGEN usage to README +
    • add -T option to qgen to allow time table usage +
    • query template names only requre .sql suffix, rest is arbitrary +
  • Changes as of 03/13/95
      +
    • version 9.1 +
    • surround DBNAME with ifndef in config.h +
    • remove -DDBNAME from makefile.suite +
    • sync varchar handling with 9.1 draft +
  • Changes as of 02/21/95
      +
    • version 9.0a +
    • fixed bug in qgen that incorrectly included rnd.h +
    • included revised DDL with Changes for char/varchar and l_quantity +
    • updated DBGEN help message to include new single table options for +order/lineitem and part/partsupp +
    • included handling for multi-set seed files TPCDSEED.xxx +
    • generated seeds up through 400GB; headed to 1TB! +
    • ANSI lint cleanup; more needed +
    • UF2 now defaults to key lists; use "-O r" to generate key ranges +also note, this routine this routine does NOT use the BCD2_* +routines. As a result, it WILL fail if the keys being deleted +exceed 32 bits. Since this would require ~660 update iterations, +this seems an acceptable oversight +
  • Changes as of 01/19/95
      +
    • version 9.0 +
    • allowed command line seeding of RNG for QGEN +
    • order and number of params in QGEN now matches +presentation in spec +
    • fixed bug in time table format of O_ORDERDATE +
    • changed l_QUANTITY to FLOAT in dss.ddl +
    • reworked QGEN options to be more useful +
    • allowed creation of sparse keys beyond 32 bits (for 1TB) +
    • removed unused '#ifdef' and associated code +
    • allowed independent generation of master/detail tables +(eg, order/lineitem) +
  • Changes as of 12/06/94
      +
    • version 8.6 +
    • fixed renaming of flat files for child tables +
    • various documentation fixes +
    • added naming convention section to Porting.Notes +
    • added -DIBM flag to config.h +
    • synced up QGEN with draft 8.1 +
  • Changes as of 10/25/94
      +
    • version 8.5a +
    • corrected bug in columnar output of pr_supp +
    • added pr_drange to generate a list of order keys to be +deleted instead of generating SQL +
    • added '-O d' to generate range delete as SQL +
    • updated default values for QGEN to sync with spec 8.1 +
    • corrected MK_SPARSE to reflect groups of 8 +
    • corrected a bug in o_orderstatus +
    • regenerated seed files for SF in [1,10] +
    • ANSI cleanup (primarily function declarations) +
  • Changes as of 10/11/94
      +
    • version 8.5 +
    • remove deletes/inserts to other than order/lineitem +
    • increased cardinality for part.type part.container +
    • '-r' argument is now integer; percentage in basis points +
    • initial roll-in of new update scheme +
    • added BBB comments to supplier table +
  • Changes as of 9/27/94
      +
    • version 8.4 +
    • all money calculations now use integer math. This should +bring everyone's data sets into exact aggreement. +
  • Changes as of 9/21/94
      +
    • version 8.3b +
    • fixed handling of MAX_STREAM +
    • added floor function to RPRICE bridge +
    • misc lint cleanup (type fixes, new prototypes, etc.) +
    • MONEY format becomes lf for DOS +
    • further cleanup of PR_VSTR and its length argument +
    • change to parameter generation for Q6 to allow for float +discount +
  • Changes as of 9/15/94
      +
    • version 8.3a +
    • isolated MONEY format for Unisys (Lf) using DOS +
    • make sure all arguments to MAKE_MONEY were double's +
    • rolled in NEW_PTEXT to allow Berni to experiment +
  • Changes as of 9/12/94
      +
    • version 8.3 +
    • added -T n and -T r to usage to match getopt() and README +
    • changed PR_MONEY to remove leading blanks +
    • included revised DDL from Berni +
    • included some MVS portability fixes in re malloc.h +
    • cleaned up error messages in qgen and made #define ofp usage +universal +
    • additional DOS portability changes +
    • added {c,a}len to provide specific length for columnar +output of varchar +
    • added PR_VSTR to handle varchar printing under MVS +
    • fixed bit masking in a_rnd and cleaned up prototype match +with V_STR +
    • PR_MONEY now used %Lf +
    • added revised pseudo text under NEW_PTEXT ifdef for +experiments +
  • Changes as of 9/09/94
      +
    • version 8.2 +
    • l_discount and l_tax are now fractional (per teleconference) +
    • money calculations moved to scaled integer math to clean up +answer sets +
    • changed PR_FLT() to PR_MONEY to clarify usage +
    • portability changes for SYBASE: dbname
    • > db_name +STATUS
    • > DBGEN_STATUS +
    • added nations2 to dists.dss to handle qgen needs for now +
    • reintroduced #ifndef DOS +
    • reintroduced U2200 define to control kill_load() +
    • broke out nation and region separately in -T option +
    • updated dss.ddl based on mail from Berni +
  • Changes as of 8/31/94
      +
    • version 8.1 +
    • scaling for clerks needed to be 1000 (was 100) +
    • added qgen parameter for scale +
    • changed qgen parameter from s)tream to p)ermutation +
    • synced qgen paramter values with 8.0 spec +
    • corrected duplications in dists.dss +
  • Changes as of 8/24/94
      +
    • version 8.0 +
    • added sparse keys to lineitem/order +
    • added varchar generation for comments/addresses +
    • added variable lineitems/orders +
    • removed ifdef for normalized code_tables +
    • included code for parameter generation and template->EQT +routines +
    • updated README and Porting.Notes to reflect QGEN +
    • included DDL and RI examples from Berni +
  • Changes as of 6/15/94
      +
    • version 7.0b (numbers now match spec revsion) +
    • rework of code tables to properly map nation/region; when +compiled with -DCODE_TABLES distributions are taken from +code.dss and two additional fields are generated for +customers and suppliers, [cs]_ncode and [cs]_rcode, +immediately following [cs]_region +
    • replaced ifdef's around DEAD_DATA with opposites. DEAD_DATA +is now the default +
    • worked through code to see that it conformed to 7.0 +specification +
    • adjusted scale factors/rowcounts for 1 GB == sf1 +
    • brought help message in line with current code +
    • fixed order per customer at 10 +
    • make suppkey scalable in lineitem/partsupp +
  • Changes as of 4/25/94
      +
    • version 1.5 +
    • added the customers with no orders; Compile with -DDEAD_DATA +to activate the change. +
    • added the code table for nation and region; +Compile with -DCODE_TABLES to activate the change. +
  • Changes as of 3/17/94
      +
    • version 1.41 +
    • completed implementation of JULIAN_DAY after talks with Berni +
    • misc cleanup in usage/README files +
    • removed all tabs and capped line length at 75 +
    • added -n option to allowing naming of inline-loaded database +
  • Changes as of 3/16/94
      +
    • version 1.4 +
    • prottyped julian day/month for query re-write work. Compile +with -DJULIAN_DAY to enable +
    • removed gen_times() from driver.c +
    • added VMS ifdef to config.h to clean up fork/signal issues +
    • added ICL ifdef to config.h to clean up getopt() issues +
    • changed header file references to config.h from machine.h +
  • Changes as of 3/2/94
      +
    • version 1.31 +
    • corrected format of C_NAME to match S_NAME and O_CLERK +
    • re-allowed fractional scale factors < 1 (updates not +contiguous) +
    • added DSS_CONFIG environemnt variable +
    • reworked read_dist() to look for DSS_DIST in DSS_CONFIG +
    • updated the README file +
  • Changes as of 2/16/94
      +
    • version 1.3 +
    • added command line options for parallel load and data set +expansion +
    • changed dists.dss delimiter to | for portability +
    • limited scale factors to integer values +
    • added command line option for seed file generation +
    • added all seed files to distribution for SFs 1 - 10 +
    • moved machine.h to config.h and added MAX_CHILDREN define +
    • added 'f' flag to options to allow renaming of output files +
    • added generation of SQL delete statements to match updates +(Note: updates are still single-threaded; -C is cleared +by -U) +
    • corrected field sizing in dsstypes.h typedefs to match v 6.4 +
    • update percentage default set to 1% +
  • Changes as of 12/3/93
      +
    • version 1.2 +
    • added command line option to adjust update percentage +
    • fixed update gneration for proper primary key ordering +
    • renamed UUSR/PRC to RUSSIA/CHINA in dists.dss +
    • cleaned up phone number generation to be consistant regard- +less of order of evaluation +
    • adjusted size of lineitem comment to bring data in line with +100 MB == SF=1 +
  • Changes as of 10/15/93
      +
    • added command line option for update data creation +
    • miscelaneous porting and cleanup changes +
    • reworked table generation to allow reuse for updates +
    • added comment field to tdefs structure +
    • added load_state and store_state to sync data gen and +update gen +
  • Changes as of 7/26/93
      +
    • combined loader and header stubs in load_stubs.c +
    • separated Revision History (this file) from README +
    • simplified makefile +
    • removed redundancies from colors distribution +
    • added getopt() for portability +
    • created Porting.Notes +
    • adjusted scaling rules +
    • added help option to the command line +
  • Changes as of 2/26/93
      +
    • combined all typedefs in one header: dsstypes.h +
    • combined flat file generation in print.ec +
    • combined typedef population in build.ec +
    • added -P to control rowcnt scaling (P for percentage) +
    • added -D option for Direct data generation and added +appropriate hooks in tdefs[] structure +
    • added -F option for flat file generation +
    • reused -T option (use -P 0.1 to build test size database) +now accepts suboptions c,o,p,s for single table builds. +
    • dropped -M option (scaling is now by rowcount) +
    • added -O option for optional controls. Currently defined: +-O t
    • generate optional time table a join fields in +order/lineitem +-O h
    • generate headers for flat file output +-O m
    • generate fixed column-length output +
    • removed dynamic memory allocation, redundant calls to +UnifInt, etc to improve performance +
  • Changes as of 1/12/92
      +
    • julian() changed to handle orders -> orderdate correctly +
    • rflag distributions corrected in dists.dss +
    • sea, gold removed from color distribution to clean up substring +problems +
    • part-> number and supplier-> adjusted for 1-based indexing +
    • time-> day changed to be day of month, not day of year +
    • t.week changed to be week in year, not day of week +
  • Changes as of 11/18/92
      +
    • checked line length and tab for transmission +
    • another chapter in the portability wars. added #include +"machine.h" to dss.h (which is included by everyone else). Any +machine particular porting changes should go here. +
    • fixed fixed-field formats to prevent double printing +
    • expanded PR_FLT formats to %010.2 +
  • Changes as of 10/21/92
      +
    • added fixed format and column header handling; users of headers +will have to define the header functions to be called in +int (*tdefs.header)() +
  • Changes as of 10/09/92:
      +
    • added ansi prototypes and recompiled with gcc -ansi. users may +need to change the CC definition in the makefile and the contents +of CFLAGS to reflect their particular ansi compiler. +
    • replaced all int references with long +
    • replaced all float references with double +
    • found and fixed odate/julian problem TS mentioned in 10/09 phone +call + +
  • Changes as of 9/09/92:
      +
    • Park/Miller random number generator included +
    • clerk scaling changed to 100 * scale +
    • parts.name always built from 5 selections from colors set +
    • test scaling changed to ~60MB (TEST_SCALING == 10) +
    • logarithmic scaling removed +
    • mfgcost removed and retail/supplier cost bounds adjusted +
    • agg_str memory leak fixed +
    • independent RNG streams on a per column basis +
    +
+This is the revised data generator for DSS. + +The rewrite tried to accomplish three things: +
    +
  1. identify and isolate +all the implicit assumptions about limits, bounds, ranges, distributions, etc.; +
  2. standardize the way any given table was generated/ +printed to ease understanding and maintenance; +
  3. bring the generator +in line with the current work of the committee and the excellent spec +the Indira put together; +
  4. provide an easy way to adjust distributions, string contents and to facilitate experimentation to get a +better idea of the impact of data population changes. +

+ +The files included are:

+

+
driver.c +
main and the calling routines for the generator +
dist.c +
should really be named dss_util.c; misc routines +
customer.c
generation and print routines for customer table +
orders.c
"" "" order table +
parts.c
"" "" parts/partsupp +
suppliers.c
"" "" suppliers table +
time.c
"" "" time table +
customer.h
associate header files; contain structure +definitions +
dss.h
dss.h holds the large number of assumptions and +
orders.h
values that have been used as IFDEFs. +
parts.h
+
suppliers.h
+
time.h
+
dists.dss
string selections and weights; used to build +distributions + +
+

+Running make will create an executable (using the compiler flags in +CFLAGS, the ld flags in LDFLAGS and the libraries in LIBS [-O, -s, +and -lm by default]) which will create flat files suitable for dbload. + + + + + + diff --git a/load_stub.c b/load_stub.c new file mode 100644 index 0000000..e3339b5 --- /dev/null +++ b/load_stub.c @@ -0,0 +1,281 @@ +/***************************************************************** + * Title: load_stub.c + * Sccsid: @(#)load_stub.c 2.1.8.1 + * Description: + * stub routines for: + * inline load of dss benchmark + * header creation for dss benchmark + * + ***************************************************************** + */ + +#include +#include "config.h" +#include "dss.h" +#include "dsstypes.h" + +int +close_direct(void) +{ + /* any post load cleanup goes here */ + return(0); +} + +int +prep_direct(void) +{ + /* any preload prep goes here */ + return(0); +} + +int +hd_cust (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the customer table\n"); + + return(0); +} + +int +ld_cust (customer_t *cp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the customer table"); + + return(0); +} + +int +hd_part (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the part table\n"); + + return(0); +} + +int +ld_part (part_t *pp, int mode) +{ + static int count = 0; + + if (! count++) + printf("No load routine has been defined for the part table\n"); + + return(0); +} + +int +ld_psupp (part_t *pp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined for the", + "psupp table\n"); + + return(0); + +} + + +int +hd_supp (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the supplier table\n"); + + return(0); +} + +int +ld_supp (supplier_t *sp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the supplier table\n"); + + return(0); +} + + +int +hd_order (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the order table\n"); + + return(0); +} + +int +ld_order (order_t *p, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the order table"); + + return(0); +} + +ld_line (order_t *p, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the line table"); + + return(0); +} + + + +int +hd_psupp (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No header has been defined for the", + "part supplier table"); + + return(0); +} + + +int +hd_line (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the lineitem table\n"); + + return(0); +} + +int +hd_nation (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the nation table\n"); + + return(0); +} + +#ifdef SSBM +#else +int +ld_nation (code_t *cp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the nation table"); + + return(0); +} + +int +hd_region (FILE *f) +{ + static int count = 0; + + if (! count++) + printf("No header has been defined for the region table\n"); + + return(0); +} + +int +ld_region (code_t *cp, int mode) +{ + static int count = 0; + + if (! count++) + printf("%s %s\n", + "No load routine has been defined", + "for the region table"); + + return(0); +} + +int +ld_order_line (order_t *p, int mode) +{ + ld_order(p, mode); + ld_line (p, mode); + + return(0); +} + +int +hd_order_line (FILE *f) +{ + hd_order(f); + hd_line (f); + + return(0); +} + +int +ld_part_psupp (part_t *p, int mode) +{ + ld_part(p, mode); + ld_psupp (p, mode); + + return(0); +} + +int +hd_part_psupp (FILE *f) +{ + hd_part(f); + hd_psupp(f); + + return(0); +} +#endif + +#ifdef SSBM +int +ld_date (date_t *d, int mode) +{ + /*do nothing for now*/ + return(0); +} + +#endif + + + + + + diff --git a/makefile b/makefile new file mode 100644 index 0000000..273c4be --- /dev/null +++ b/makefile @@ -0,0 +1,127 @@ +# @(#)makefile.suite 2.1.8.1 +################ +## CHANGE NAME OF ANSI COMPILER HERE +################ +CC = gcc +# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata) +# SQLSERVER, SYBASE +# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS, +# SGI, SUN, U2200, VMS, LINUX +# Current values for WORKLOAD are: SSBM, TPCH, TPCR +DATABASE=DB2 +MACHINE =LINUX +WORKLOAD =SSBM +# +# add -EDTERABYTE if orderkey will execeed 32 bits (SF >= 300) +# and make the appropriate change in gen_schema() of runit.sh +CFLAGS = -O -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) +LDFLAGS = -O +# The OBJ,EXE and LIB macros will need to be changed for compilation under +# Windows NT +OBJ = .o +EXE = +LIBS = -lm +# +# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE +############### +TREE_ROOT=/tmp/tree +# +PROG1 = dbgen$(EXE) +PROG2 = qgen$(EXE) +PROGS = $(PROG1) $(PROG2) +# +HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h +HDR2 = tpcd.h permute.h +HDR = $(HDR1) $(HDR2) +# +SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \ + speed_seed.c text.c permute.c +SRC2 = qgen.c varsub.c +SRC = $(SRC1) $(SRC2) +# +OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \ + load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) +OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \ + text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) +OBJS = $(OBJ1) $(OBJ2) +# +SETS = dists.dss +DOC=README HISTORY PORTING.NOTES BUGS +DDL = dss.ddl dss.ri +OTHER=makefile.suite $(SETS) $(DDL) +# case is *important* in TEST_RES +TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res +# +DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3) +QD=1.sql 2.sql 3.sql 4.sql 5.sql 6.sql 7.sql 8.sql 9.sql 10.sql \ + 11.sql 12.sql 13.sql 14.sql 15.sql 16.sql 17.sql 18.sql \ + 19.sql 20.sql 21.sql 22.sql +VARIANTS= 8a.sql 12a.sql 13a.sql 14a.sql 15a.sql +ANS = 1.ans 2.ans 3.ans 4.ans 5.ans 6.ans 7.ans 8.ans 9.ans 10.ans 11.ans \ + 12.ans 13.ans 14.ans 15.ans 16.ans 17.ans 18.ans 19.ans 20.ans \ + 21.ans 22.ans +QSRC = $(FQD) $(VARIANTS) +ALLSRC=$(DBGENSRC) +TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme +JUNK = +# +all: $(PROGS) +$(PROG1): $(OBJ1) $(SETS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS) +$(PROG2): permute.h $(OBJ2) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS) +clean: + rm -f $(PROGS) $(OBJS) $(JUNK) +lint: + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC1) + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC2) + +tar: $(DBGENSRC) + tar cvhf $(PROG1).tar $(DBGENSRC) +dbgenshar: $(DBGENSRC) + shar -o dbgen.shar $(DBGENSRC) +zip: $(DBGENSRC) + zip dbgen $(DBGENSRC) +tree: $(DBGENSRC) $(FQD) $(VARIANTS) $(TREE_DOC) $(ANS) + rm -rf $(TREE_ROOT) + mkdir $(TREE_ROOT) + mkdir $(TREE_ROOT)/appendix + mkdir $(TREE_ROOT)/appendix/queries + mkdir $(TREE_ROOT)/appendix/variants + mkdir $(TREE_ROOT)/appendix/dbgen + mkdir $(TREE_ROOT)/appendix/answers + cp tree.readme $(TREE_ROOT)/README + cp appendix.readme $(TREE_ROOT)/appendix/README + cp answers.readme $(TREE_ROOT)/appendix/answers/README + cp queries.readme $(TREE_ROOT)/appendix/queries/README + cp variants.readme $(TREE_ROOT)/appendix/variants/README + cp tree.changes $(TREE_ROOT)/CHANGES + cp appendix.version $(TREE_ROOT)/appendix/VERSION + cp $(FQD) $(TREE_ROOT)/appendix/queries + cp $(VARIANTS) $(TREE_ROOT)/appendix/variants + cp $(DBGENSRC) $(TREE_ROOT)/appendix/dbgen + cp $(ANS) $(TREE_ROOT)/appendix/answers + (cd $(TREE_ROOT); tar chf - .) |compress > tree.tar.Z + (cd $(TREE_ROOT); zip -r - . ) > tree.zip + date > tree.update +portable: + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f; \ + awk 'length > 72 { print FILENAME ":" NR " too long " }' /tmp/$$f ; \ + rm /tmp/$$f ; \ + done +release: + @chkout $(SRC) $(HDR) + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f ; \ + mv /tmp/$$f $$f ; \ + done + @chkin $(SRC) $(HDR) + +rnd$(OBJ): rnd.h +$(OBJ1): $(HDR1) +$(OBJ2): dss.h tpcd.h config.h +$(QSRC) $(ALLSRC): + get -r`cat .version` ./SCCS/s.$@ diff --git a/makefile.suite b/makefile.suite new file mode 100644 index 0000000..5ab13d1 --- /dev/null +++ b/makefile.suite @@ -0,0 +1,127 @@ +# @(#)makefile.suite 2.1.8.1 +################ +## CHANGE NAME OF ANSI COMPILER HERE +################ +CC = +# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata) +# SQLSERVER, SYBASE +# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS, +# SGI, SUN, U2200, VMS, LINUX +# Current values for WORKLOAD are: SSBM, TPCH, TPCR +DATABASE= +MACHINE = +WORKLOAD = +# +# add -EDTERABYTE if orderkey will execeed 32 bits (SF >= 300) +# and make the appropriate change in gen_schema() of runit.sh +CFLAGS = -O -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) +LDFLAGS = -O +# The OBJ,EXE and LIB macros will need to be changed for compilation under +# Windows NT +OBJ = .o +EXE = +LIBS = -lm +# +# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE +############### +TREE_ROOT=/tmp/tree +# +PROG1 = dbgen$(EXE) +PROG2 = qgen$(EXE) +PROGS = $(PROG1) $(PROG2) +# +HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h +HDR2 = tpcd.h permute.h +HDR = $(HDR1) $(HDR2) +# +SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \ + speed_seed.c text.c permute.c +SRC2 = qgen.c varsub.c +SRC = $(SRC1) $(SRC2) +# +OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \ + load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) +OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \ + text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) +OBJS = $(OBJ1) $(OBJ2) +# +SETS = dists.dss +DOC=README HISTORY PORTING.NOTES BUGS +DDL = dss.ddl dss.ri +OTHER=makefile.suite $(SETS) $(DDL) +# case is *important* in TEST_RES +TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res +# +DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3) +QD=1.sql 2.sql 3.sql 4.sql 5.sql 6.sql 7.sql 8.sql 9.sql 10.sql \ + 11.sql 12.sql 13.sql 14.sql 15.sql 16.sql 17.sql 18.sql \ + 19.sql 20.sql 21.sql 22.sql +VARIANTS= 8a.sql 12a.sql 13a.sql 14a.sql 15a.sql +ANS = 1.ans 2.ans 3.ans 4.ans 5.ans 6.ans 7.ans 8.ans 9.ans 10.ans 11.ans \ + 12.ans 13.ans 14.ans 15.ans 16.ans 17.ans 18.ans 19.ans 20.ans \ + 21.ans 22.ans +QSRC = $(FQD) $(VARIANTS) +ALLSRC=$(DBGENSRC) +TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme +JUNK = +# +all: $(PROGS) +$(PROG1): $(OBJ1) $(SETS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS) +$(PROG2): permute.h $(OBJ2) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS) +clean: + rm -f $(PROGS) $(OBJS) $(JUNK) +lint: + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC1) + lint $(CFLAGS) -u -x -wO -Ma -p $(SRC2) + +tar: $(DBGENSRC) + tar cvhf $(PROG1).tar $(DBGENSRC) +dbgenshar: $(DBGENSRC) + shar -o dbgen.shar $(DBGENSRC) +zip: $(DBGENSRC) + zip dbgen $(DBGENSRC) +tree: $(DBGENSRC) $(FQD) $(VARIANTS) $(TREE_DOC) $(ANS) + rm -rf $(TREE_ROOT) + mkdir $(TREE_ROOT) + mkdir $(TREE_ROOT)/appendix + mkdir $(TREE_ROOT)/appendix/queries + mkdir $(TREE_ROOT)/appendix/variants + mkdir $(TREE_ROOT)/appendix/dbgen + mkdir $(TREE_ROOT)/appendix/answers + cp tree.readme $(TREE_ROOT)/README + cp appendix.readme $(TREE_ROOT)/appendix/README + cp answers.readme $(TREE_ROOT)/appendix/answers/README + cp queries.readme $(TREE_ROOT)/appendix/queries/README + cp variants.readme $(TREE_ROOT)/appendix/variants/README + cp tree.changes $(TREE_ROOT)/CHANGES + cp appendix.version $(TREE_ROOT)/appendix/VERSION + cp $(FQD) $(TREE_ROOT)/appendix/queries + cp $(VARIANTS) $(TREE_ROOT)/appendix/variants + cp $(DBGENSRC) $(TREE_ROOT)/appendix/dbgen + cp $(ANS) $(TREE_ROOT)/appendix/answers + (cd $(TREE_ROOT); tar chf - .) |compress > tree.tar.Z + (cd $(TREE_ROOT); zip -r - . ) > tree.zip + date > tree.update +portable: + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f; \ + awk 'length > 72 { print FILENAME ":" NR " too long " }' /tmp/$$f ; \ + rm /tmp/$$f ; \ + done +release: + @chkout $(SRC) $(HDR) + @ for f in $(SRC) $(HDR) ; \ + do \ + expand $$f > /tmp/$$f ; \ + mv /tmp/$$f $$f ; \ + done + @chkin $(SRC) $(HDR) + +rnd$(OBJ): rnd.h +$(OBJ1): $(HDR1) +$(OBJ2): dss.h tpcd.h config.h +$(QSRC) $(ALLSRC): + get -r`cat .version` ./SCCS/s.$@ diff --git a/makefile_win b/makefile_win new file mode 100644 index 0000000..71b3245 --- /dev/null +++ b/makefile_win @@ -0,0 +1,85 @@ +VC="c:/Program Files/Microsoft Visual Studio 9.0/VC" +WIN_INC="C:\Program Files\Microsoft SDKs\Windows\v6.0A\Include" +WIN_LIB="C:\Program Files\Microsoft SDKs\Windows\v6.0A\Lib" +#VC = "C:\Program Files\Microsoft Visual Studio .NET 2003\Vc7" +VCLIB = $(VC)\LIB +# @(#)makefile.suite 2.1.8.1 +################ +## CHANGE NAME OF ANSI COMPILER HERE +################ +CC =cl.exe +# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata) +# SQLSERVER, SYBASE +# Current values for MACHINE are: ATT, DOS, WIN32 HP, IBM, ICL, MVS, +# SGI, SUN, U2200, VMS, LINUX +# Current values for WORKLOAD are: SSBM, TPCH, TPCR +DATABASE=DB2 +MACHINE =WIN32 +WORKLOAD =SSBM +# +# add -EDTERABYTE if orderkey will execeed 32 bits (SF >= 300) +# and make the appropriate change in gen_schema() of runit.sh +CFLAGS = -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) /I$(VC)\include /I$(WIN_INC) + +#LDFLAGS = -O +# The OBJ,EXE and LIB macros will need to be changed for compilation under +# Windows NT +OBJ = .obj +EXE = .exe +LIBS =$(VCLIB)\libcmt.lib $(VCLIB)\oldnames.lib $(VCLIB)\oldnames.lib $(WIN_LIB)\kernel32.lib +# +# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE +############### +# +PROG1 = dbgen$(EXE) +PROG2 = qgen$(EXE) +PROGS = $(PROG1) $(PROG2) +# +HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h +HDR2 = tpcd.h permute.h +HDR = $(HDR1) $(HDR2) +# +SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \ + speed_seed.c text.c permute.c +SRC2 = qgen.c varsub.c +SRC = $(SRC1) $(SRC2) +# +OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \ + load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) +OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \ + text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) +OBJS = $(OBJ1) $(OBJ2) +# +SETS = dists.dss +DOC=README HISTORY PORTING.NOTES BUGS +DDL = dss.ddl dss.ri +OTHER=makefile.suite $(SETS) $(DDL) +# case is *important* in TEST_RES +TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res +# +DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3) +QD=1.sql 2.sql 3.sql 4.sql 5.sql 6.sql 7.sql 8.sql 9.sql 10.sql \ + 11.sql 12.sql 13.sql 14.sql 15.sql 16.sql 17.sql 18.sql \ + 19.sql 20.sql 21.sql 22.sql +VARIANTS= 8a.sql 12a.sql 13a.sql 14a.sql 15a.sql +ANS = 1.ans 2.ans 3.ans 4.ans 5.ans 6.ans 7.ans 8.ans 9.ans 10.ans 11.ans \ + 12.ans 13.ans 14.ans 15.ans 16.ans 17.ans 18.ans 19.ans 20.ans \ + 21.ans 22.ans +QSRC = $(FQD) $(VARIANTS) +ALLSRC=$(DBGENSRC) +TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme +JUNK = +# +all: $(PROGS) + +$(PROG1): $(OBJ1) $(SETS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS) + +$(PROG2): permute.h $(OBJ2) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS) + +clean: + del /F $(PROGS) $(OBJS) $(JUNK) + +$(OBJ1): $(HDR1) +$(OBJ2): dss.h tpcd.h config.h diff --git a/permute.c b/permute.c new file mode 100644 index 0000000..b34f04c --- /dev/null +++ b/permute.c @@ -0,0 +1,175 @@ +/* @(#)permute.c 2.1.8.3 */ +/* +* permute.c -- a permutation generator for the query +* sequences in TPC-H and TPC-R +*/ + +#ifdef TEST +#define DECLARER +#endif +#include "config.h" +#include "dss.h" +#ifdef TEST +#include +#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ +#include +#include +#endif /* WIN32 */ +#include /* */ +#include +#include +#include +#include +#include +#include +#ifdef HP +#include +#endif +#if (defined(WIN32)&&!defined(_POSIX_)) +#include +#pragma warning(disable:4201) +#pragma warning(disable:4214) +#pragma warning(disable:4514) +#define WIN32_LEAN_AND_MEAN +#define NOATOM +#define NOGDICAPMASKS +#define NOMETAFILE +#define NOMINMAX +#define NOMSG +#define NOOPENFILE +#define NORASTEROPS +#define NOSCROLL +#define NOSOUND +#define NOSYSMETRICS +#define NOTEXTMETRIC +#define NOWH +#define NOCOMM +#define NOKANJI +#define NOMCX +#include +#pragma warning(default:4201) +#pragma warning(default:4214) +#endif +#endif + +long NextRand(long seed); +long *permute(long *set, int cnt, long stream); +long *permute_dist(distribution *d, long stream); +long seed; +char *eol[2] = {" ", "},"}; +extern seed_t Seed[]; +#ifdef TEST +tdef tdefs = { NULL }; +#endif + + +#define MAX_QUERY 22 +#define ITERATIONS 1000 +#define UNSET 0 + +long * +permute(long *a, int c, long s) + { + int i; + static long source; + static long *set, temp; + + if (a != (long *)NULL) + { + set = a; + for (i=0; i < c; i++) + *(a + i) = i; + for (i=0; i < c; i++) + { + RANDOM(source, 0L, (long)(c - 1), s); + temp = *(a + source); + *(a + source) = *(a + i) ; + *(a + i) = temp; + source = 0; + } + } + else + source += 1; + + if (source >= c) + source -= c; + + return(set + source); + } + +long * +permute_dist(distribution *d, long stream) + { + static distribution *dist = NULL; + int i; + + if (d != NULL) + { + if (d->permute == (long *)NULL) + { + d->permute = (long *)malloc(sizeof(long) * DIST_SIZE(d)); + MALLOC_CHECK(d->permute); + for (i=0; i < DIST_SIZE(d); i++) + *(d->permute + i) = i; + } + dist = d; + return(permute(dist->permute, DIST_SIZE(dist), stream)); + } + + + if (dist != NULL) + return(permute(NULL, DIST_SIZE(dist), stream)); + else + INTERNAL_ERROR("Bad call to permute_dist"); + } + + +#ifdef TEST + +main(int ac, char *av[]) + { + long *sequence, + i, + j, + streams = UNSET, + *a; + char sep; + int index = 0; + + set_seeds = 0; + sequence = (long *)malloc(MAX_QUERY * sizeof(long)); + a = sequence; + for (i=0; i < MAX_QUERY; i++) + *(sequence + i) = i; + if (ac < 3) + goto usage; + Seed[0].value = (long)atoi(av[1]); + streams = atoi(av[2]); + if (Seed[0].value == UNSET || streams == UNSET) + goto usage; + + index = 0; + printf("long permutation[%d][%d] = {\n", streams, MAX_QUERY); + for (j=0; j < streams; j++) + { + sep = '{'; + printf("%s\n", eol[index]); + for (i=0; i < MAX_QUERY; i++) + { + printf("%c%2d", sep, *permute(a, MAX_QUERY, 0) + 1); + a = (long *)NULL; + sep = ','; + } + a = sequence; + index=1; + } + printf("}\n};\n"); + return(0); + +usage: + printf("Usage: %s \n",av[0]); + printf(" uses to start the generation of permutations of [1..%d]\n", MAX_QUERY); + return(-1); + + } +#endif /* TEST */ diff --git a/permute.h b/permute.h new file mode 100644 index 0000000..bf5e8c4 --- /dev/null +++ b/permute.h @@ -0,0 +1,47 @@ +/* + * @(#)permute.h 2.1.8.1 + */ +long permutation[41][22] = +{ + {14, 2, 9,20, 6,17,18, 8,21,13, 3,22,16, 4,11,15, 1,10,19, 5, 7,12}, + {21, 3,18, 5,11, 7, 6,20,17,12,16,15,13,10, 2, 8,14,19, 9,22, 1, 4}, + { 6,17,14,16,19,10, 9, 2,15, 8, 5,22,12, 7,13,18, 1, 4,20, 3,11,21}, + { 8, 5, 4, 6,17, 7, 1,18,22,14, 9,10,15,11,20, 2,21,19,13,16,12, 3}, + { 5,21,14,19,15,17,12, 6, 4, 9, 8,16,11, 2,10,18, 1,13, 7,22, 3,20}, + {21,15, 4, 6, 7,16,19,18,14,22,11,13, 3, 1, 2, 5, 8,20,12,17,10, 9}, + {10, 3,15,13, 6, 8, 9, 7, 4,11,22,18,12, 1, 5,16, 2,14,19,20,17,21}, + {18, 8,20,21, 2, 4,22,17, 1,11, 9,19, 3,13, 5, 7,10,16, 6,14,15,12}, + {19, 1,15,17, 5, 8, 9,12,14, 7, 4, 3,20,16, 6,22,10,13, 2,21,18,11}, + { 8,13, 2,20,17, 3, 6,21,18,11,19,10,15, 4,22, 1, 7,12, 9,14, 5,16}, + { 6,15,18,17,12, 1, 7, 2,22,13,21,10,14, 9, 3,16,20,19,11, 4, 8, 5}, + {15,14,18,17,10,20,16,11, 1, 8, 4,22, 5,12, 3, 9,21, 2,13, 6,19, 7}, + { 1, 7,16,17,18,22,12, 6, 8, 9,11, 4, 2, 5,20,21,13,10,19, 3,14,15}, + {21,17, 7, 3, 1,10,12,22, 9,16, 6,11, 2, 4, 5,14, 8,20,13,18,15,19}, + { 2, 9, 5, 4,18, 1,20,15,16,17, 7,21,13,14,19, 8,22,11,10, 3,12, 6}, + {16, 9,17, 8,14,11,10,12, 6,21, 7, 3,15, 5,22,20, 1,13,19, 2, 4,18}, + { 1, 3, 6, 5, 2,16,14,22,17,20, 4, 9,10,11,15, 8,12,19,18,13, 7,21}, + { 3,16, 5,11,21, 9, 2,15,10,18,17, 7, 8,19,14,13, 1, 4,22,20, 6,12}, + {14, 4,13, 5,21,11, 8, 6, 3,17, 2,20, 1,19,10, 9,12,18,15, 7,22,16}, + { 4,12,22,14, 5,15,16, 2, 8,10,17, 9,21, 7, 3, 6,13,18,11,20,19, 1}, + {16,15,14,13, 4,22,18,19, 7, 1,12,17, 5,10,20, 3, 9,21,11, 2, 6, 8}, + {20,14,21,12,15,17, 4,19,13,10,11, 1,16, 5,18, 7, 8,22, 9, 6, 3, 2}, + {16,14,13, 2,21,10,11, 4, 1,22,18,12,19, 5, 7, 8, 6, 3,15,20, 9,17}, + {18,15, 9,14,12, 2, 8,11,22,21,16, 1, 6,17, 5,10,19, 4,20,13, 3, 7}, + { 7, 3,10,14,13,21,18, 6,20, 4, 9, 8,22,15, 2, 1, 5,12,19,17,11,16}, + {18, 1,13, 7,16,10,14, 2,19, 5,21,11,22,15, 8,17,20, 3, 4,12, 6, 9}, + {13, 2,22, 5,11,21,20,14, 7,10, 4, 9,19,18, 6, 3, 1, 8,15,12,17,16}, + {14,17,21, 8, 2, 9, 6, 4, 5,13,22, 7,15, 3, 1,18,16,11,10,12,20,19}, + {10,22, 1,12,13,18,21,20, 2,14,16, 7,15, 3, 4,17, 5,19, 6, 8, 9,11}, + {10, 8, 9,18,12, 6, 1, 5,20,11,17,22,16, 3,13, 2,15,21,14,19, 7, 4}, + { 7,17,22, 5, 3,10,13,18, 9, 1,14,15,21,19,16,12, 8, 6,11,20, 4, 2}, + { 2, 9,21, 3, 4, 7, 1,11,16, 5,20,19,18, 8,17,13,10,12,15, 6,14,22}, + {15,12, 8, 4,22,13,16,17,18, 3, 7, 5, 6, 1, 9,11,21,10,14,20,19, 2}, + {15,16, 2,11,17, 7, 5,14,20, 4,21, 3,10, 9,12, 8,13, 6,18,19,22, 1}, + { 1,13,11, 3, 4,21, 6,14,15,22,18, 9, 7, 5,10,20,12,16,17, 8,19, 2}, + {14,17,22,20, 8,16, 5,10, 1,13, 2,21,12, 9, 4,18, 3, 7, 6,19,15,11}, + { 9,17, 7, 4, 5,13,21,18,11, 3,22, 1, 6,16,20,14,15,10, 8, 2,12,19}, + {13,14, 5,22,19,11, 9, 6,18,15, 8,10, 7, 4,17,16, 3, 1,12, 2,21,20}, + {20, 5, 4,14,11, 1, 6,16, 8,22, 7, 3, 2,12,21,19,17,13,10,15,18, 9}, + { 3, 7,14,15, 6, 5,21,20,18,10, 4,16,19, 1,13, 9, 8,17,11,12,22, 2}, + {13,15,17, 1,22,11, 3, 4, 7,20,14,21, 9, 8, 2,18,16, 6,10,12, 5,19} +}; diff --git a/print.c b/print.c new file mode 100644 index 0000000..932a576 --- /dev/null +++ b/print.c @@ -0,0 +1,1006 @@ +/* @(#)print.c 2.1.8.2 */ +/* generate flat files for data load */ +#include +#ifndef VMS +#include +#endif + +#if defined(SUN) +#include +#endif + +#if defined(LINUX) +#include +#endif /*LINUX*/ + +#include + +#include "dss.h" +#include "dsstypes.h" +#include + +#include +#include + + +/* + * Function Prototypes + */ +FILE *print_prep PROTO((int table, int update)); +int pr_drange PROTO((int tbl, long min, long cnt, long num)); + +FILE * +print_prep(int table, int update) +{ + char upath[128]; + FILE *res; + + if (updates) + { + if (update > 0) /* updates */ + if ( insert_segments ) + { + int this_segment; + if(strcmp(tdefs[table].name,"orders.tbl")) + this_segment=++insert_orders_segment; + else + this_segment=++insert_lineitem_segment; + sprintf(upath, "%s%c%s.u%d.%d", + env_config(PATH_TAG, PATH_DFLT), + PATH_SEP, tdefs[table].name, update%10000,this_segment); + } + else + { + sprintf(upath, "%s%c%s.u%d", + env_config(PATH_TAG, PATH_DFLT), + PATH_SEP, tdefs[table].name, update); + } + else /* deletes */ + if ( delete_segments ) + { + ++delete_segment; + sprintf(upath, "%s%cdelete.u%d.%d", + env_config(PATH_TAG, PATH_DFLT), PATH_SEP, -update%10000, + delete_segment); + } + else + { + sprintf(upath, "%s%cdelete.%d", + env_config(PATH_TAG, PATH_DFLT), PATH_SEP, -update); + } + return(fopen(upath, "w")); + } + res = tbl_open(table, "w"); + OPEN_CHECK(res, tdefs[table].name); + return(res); +} + +int +dbg_print(int format, FILE *target, void *data, int len, int sep) +{ + int dollars, + cents; + + switch(format) + { + case DT_STR: + if (columnar) + fprintf(target, "%-*s", len, (char *)data); + else + fprintf(target, "%s", (char *)data); + break; +#ifdef MVS + case DT_VSTR: + /* note: only used in MVS, assumes columnar output */ + fprintf(target, "%c%c%-*s", + (len >> 8) & 0xFF, len & 0xFF, len, (char *)data); + break; +#endif /* MVS */ + case DT_INT: + if (columnar) + fprintf(target, "%12ld", (long)data); + else + fprintf(target, "%ld", (long)data); + break; + case DT_HUGE: +#ifndef SUPPORT_64BITS + if (*(long *)((long *)data + 1) == 0) \ + if (columnar) fprintf(target, "%12ld", *(long *)data); + else fprintf(target, "%ld", *(long *)data); + else + if (columnar) fprintf(target, "%5ld%07ld", + *(long *)((long *)data + 1), *(long *)data); + else fprintf(target,"%ld%07ld", + *(long *)((long *)data + 1), *(long *)data); +#else + fprintf(target, HUGE_FORMAT, *(DSS_HUGE *)data); +#endif /* SUPPORT_64BITS */ + break; + case DT_KEY: + fprintf(target, "%ld", (long)data); + break; + case DT_MONEY: + cents = (long)data; + if (cents < 0) + { + fprintf(target, "-"); + cents = -cents; + } + dollars = cents / 100; + cents %= 100; + if (columnar) + fprintf(target, "%12ld.%02ld", dollars, cents); + else + fprintf(target, "%ld.%02ld", dollars, cents); + break; + case DT_CHR: + if (columnar) + fprintf(target, "%c ", (char)data); + else + fprintf(target, "%c", (char)data); + break; + } + +#ifdef EOL_HANDLING + if (sep) +#endif /* EOL_HANDLING */ + if (!columnar && (sep != -1)) + fprintf(target, "%c", SEPARATOR); + + return(0); +} + +#ifdef SSBM +int +pr_cust(customer_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(CUST, 0); + + PR_STRT(fp); + PR_INT(fp, c->custkey); + PR_VSTR(fp, c->name, C_NAME_LEN); + PR_VSTR(fp, c->address, + (columnar)?(long)(ceil(C_ADDR_LEN * V_STR_HGH)):c->alen); + PR_STR(fp, c->city,CITY_FIX); + PR_STR(fp, c->nation_name, C_NATION_NAME_LEN); + PR_STR(fp, c->region_name, C_REGION_NAME_LEN); + PR_STR(fp, c->phone, PHONE_LEN); + PR_STR(fp, c->mktsegment,MAXAGG_LEN); + PR_END(fp); + + return(0); +} + +#else +int +pr_cust(customer_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(CUST, 0); + + PR_STRT(fp); + PR_INT(fp, c->custkey); + PR_VSTR(fp, c->name, C_NAME_LEN); + PR_VSTR(fp, c->address, + (columnar)?(long)(ceil(C_ADDR_LEN * V_STR_HGH)):c->alen); + PR_INT(fp, c->nation_code); + PR_STR(fp, c->phone, PHONE_LEN); + PR_MONEY(fp, c->acctbal); + PR_STR(fp, c->mktsegment, C_MSEG_LEN); + PR_VSTR_LAST(fp, c->comment, + (columnar)?(long)(ceil(C_CMNT_LEN * V_STR_HGH)):c->clen); + PR_END(fp); + + return(0); +} +#endif + +/* + * print the numbered order + */ +#ifdef SSBM + +#else +int +pr_order(order_t *o, int mode) +{ + static FILE *fp_o = NULL; + static int last_mode = 0; + + if (fp_o == NULL || mode != last_mode) + { + if (fp_o) + fclose(fp_o); + fp_o = print_prep(ORDER, mode); + last_mode = mode; + } + PR_STRT(fp_o); + PR_HUGE(fp_o, o->okey); + PR_INT(fp_o, o->custkey); + PR_CHR(fp_o, o->orderstatus); + PR_MONEY(fp_o, o->totalprice); + PR_STR(fp_o, o->odate, DATE_LEN); + PR_STR(fp_o, o->opriority, O_OPRIO_LEN); + PR_STR(fp_o, o->clerk, O_CLRK_LEN); + PR_INT(fp_o, o->spriority); + PR_VSTR_LAST(fp_o, o->comment, + (columnar)?(long)(ceil(O_CMNT_LEN * V_STR_HGH)):o->clen); + PR_END(fp_o); + + return(0); +} +#endif + +/* + * print an order's lineitems + */ +#ifdef SSBM +int +pr_line(order_t *o, int mode) +{ + + static FILE *fp_l = NULL; + static int last_mode = 0; + long i; + int days; + char buf[100]; + + if (fp_l == NULL || mode != last_mode) + { + if (fp_l) + fclose(fp_l); + fp_l = print_prep(LINE, mode); + last_mode = mode; + } + + for (i = 0; i < o->lines; i++) + { + PR_STRT(fp_l); + PR_HUGE(fp_l, o->lineorders[i].okey); + PR_INT(fp_l, o->lineorders[i].linenumber); + PR_INT(fp_l, o->lineorders[i].custkey); + PR_INT(fp_l, o->lineorders[i].partkey); + PR_INT(fp_l, o->lineorders[i].suppkey); + PR_STR(fp_l, o->lineorders[i].orderdate, DATE_LEN); + PR_STR(fp_l, o->lineorders[i].opriority, O_OPRIO_LEN); + PR_INT(fp_l, o->lineorders[i].ship_priority); + PR_INT(fp_l, o->lineorders[i].quantity); + PR_INT(fp_l, o->lineorders[i].extended_price); + PR_INT(fp_l, o->lineorders[i].order_totalprice); + PR_INT(fp_l, o->lineorders[i].discount); + PR_INT(fp_l, o->lineorders[i].revenue); + PR_INT(fp_l, o->lineorders[i].supp_cost); + PR_INT(fp_l, o->lineorders[i].tax); + PR_STR(fp_l, o->lineorders[i].commit_date, DATE_LEN); + PR_STR(fp_l, o->lineorders[i].shipmode, O_SHIP_MODE_LEN); + PR_END(fp_l); + } + + return(0); +} +#else +int +pr_line(order_t *o, int mode) +{ + static FILE *fp_l = NULL; + static int last_mode = 0; + long i; + int days; + char buf[100]; + + if (fp_l == NULL || mode != last_mode) + { + if (fp_l) + fclose(fp_l); + fp_l = print_prep(LINE, mode); + last_mode = mode; + } + + for (i = 0; i < o->lines; i++) + { + PR_STRT(fp_l); + PR_HUGE(fp_l, o->l[i].okey); + PR_INT(fp_l, o->l[i].partkey); + PR_INT(fp_l, o->l[i].suppkey); + PR_INT(fp_l, o->l[i].lcnt); + PR_INT(fp_l, o->l[i].quantity); + PR_MONEY(fp_l, o->l[i].eprice); + PR_MONEY(fp_l, o->l[i].discount); + PR_MONEY(fp_l, o->l[i].tax); + PR_CHR(fp_l, o->l[i].rflag[0]); + PR_CHR(fp_l, o->l[i].lstatus[0]); + PR_STR(fp_l, o->l[i].sdate, DATE_LEN); + PR_STR(fp_l, o->l[i].cdate, DATE_LEN); + PR_STR(fp_l, o->l[i].rdate, DATE_LEN); + PR_STR(fp_l, o->l[i].shipinstruct, L_INST_LEN); + PR_STR(fp_l, o->l[i].shipmode, L_SMODE_LEN); + PR_VSTR_LAST(fp_l, o->l[i].comment, + (columnar)?(long)(ceil(L_CMNT_LEN * + V_STR_HGH)):o->l[i].clen); + PR_END(fp_l); + } + + return(0); +} +#endif + +/* + * print the numbered order *and* its associated lineitems + */ +#ifdef SSBM +#else +int +pr_order_line(order_t *o, int mode) +{ + tdefs[ORDER].name = tdefs[ORDER_LINE].name; + pr_order(o, mode); + pr_line(o, mode); + + return(0); +} +#endif + +/* + * print the given part + */ +#ifdef SSBM +int +pr_part(part_t *part, int mode) +{ + static FILE *p_fp = NULL; + + if (p_fp == NULL) + p_fp = print_prep(PART, 0); + + PR_STRT(p_fp); + PR_INT(p_fp, part->partkey); + PR_VSTR(p_fp, part->name, + (columnar)?(long)P_NAME_LEN:part->nlen); + PR_STR(p_fp, part->mfgr, P_MFG_LEN); + PR_STR(p_fp, part->category, P_CAT_LEN); + PR_STR(p_fp, part->brand, P_BRND_LEN); + + /*need to handle color*/ + PR_VSTR(p_fp, part->color,(columnar)?(long)P_COLOR_LEN:part->clen); + PR_VSTR(p_fp, part->type, + (columnar)?(long)P_TYPE_LEN:part->tlen); + PR_INT(p_fp, part->size); + PR_STR(p_fp, part->container, P_CNTR_LEN); + PR_END(p_fp); + return(0); +} + +#else +int +pr_part(part_t *part, int mode) +{ +static FILE *p_fp = NULL; + + if (p_fp == NULL) + p_fp = print_prep(PART, 0); + + PR_STRT(p_fp); + PR_INT(p_fp, part->partkey); + PR_VSTR(p_fp, part->name, + (columnar)?(long)P_NAME_LEN:part->nlen); + PR_STR(p_fp, part->mfgr, P_MFG_LEN); + PR_STR(p_fp, part->brand, P_BRND_LEN); + PR_VSTR(p_fp, part->type, + (columnar)?(long)P_TYPE_LEN:part->tlen); + PR_INT(p_fp, part->size); + PR_STR(p_fp, part->container, P_CNTR_LEN); + PR_MONEY(p_fp, part->retailprice); + PR_VSTR_LAST(p_fp, part->comment, + (columnar)?(long)(ceil(P_CMNT_LEN * V_STR_HGH)):part->clen); + PR_END(p_fp); + + return(0); +} +#endif + +/* + * print the given part's suppliers + */ +#ifdef SSBM +/*SSBM don't have partsupplier table*/ +#else +int +pr_psupp(part_t *part, int mode) +{ + static FILE *ps_fp = NULL; + long i; + + if (ps_fp == NULL) + ps_fp = print_prep(PSUPP, mode); + + for (i = 0; i < SUPP_PER_PART; i++) + { + PR_STRT(ps_fp); + PR_INT(ps_fp, part->s[i].partkey); + PR_INT(ps_fp, part->s[i].suppkey); + PR_INT(ps_fp, part->s[i].qty); + PR_MONEY(ps_fp, part->s[i].scost); + PR_VSTR_LAST(ps_fp, part->s[i].comment, + (columnar)?(long)(ceil(PS_CMNT_LEN * V_STR_HGH)):part->s[i].clen); + PR_END(ps_fp); + } + + return(0); +} +#endif + +/* + * print the given part *and* its suppliers + */ +#ifdef SSBM +/*SSBM don't have partsupplier table*/ +#else +int +pr_part_psupp(part_t *part, int mode) +{ + tdefs[PART].name = tdefs[PART_PSUPP].name; + pr_part(part, mode); + pr_psupp(part, mode); + + return(0); +} +#endif + + +#ifdef SSBM +int +pr_supp(supplier_t *supp, int mode) +{ + static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(SUPP, mode); + + PR_STRT(fp); + PR_INT(fp, supp->suppkey); + PR_STR(fp, supp->name, S_NAME_LEN); + + PR_VSTR(fp, supp->address, + (columnar)?(long)(ceil(S_ADDR_LEN * V_STR_HGH)):supp->alen); + PR_STR(fp, supp->city, CITY_FIX); + PR_STR(fp, supp->nation_name, C_NATION_NAME_LEN); + PR_STR(fp, supp->region_name, C_REGION_NAME_LEN); + PR_STR(fp, supp->phone, PHONE_LEN); + PR_END(fp); + + return(0); +} +#else +int +pr_supp(supplier_t *supp, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(SUPP, mode); + + PR_STRT(fp); + PR_INT(fp, supp->suppkey); + PR_STR(fp, supp->name, S_NAME_LEN); + PR_VSTR(fp, supp->address, + (columnar)?(long)(ceil(S_ADDR_LEN * V_STR_HGH)):supp->alen); + PR_INT(fp, supp->nation_code); + PR_STR(fp, supp->phone, PHONE_LEN); + PR_MONEY(fp, supp->acctbal); + PR_VSTR_LAST(fp, supp->comment, + (columnar)?(long)(ceil(S_CMNT_LEN * V_STR_HGH)):supp->clen); + PR_END(fp); + + return(0); +} +#endif + +#ifdef SSBM +#else +int +pr_nation(code_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(NATION, mode); + + PR_STRT(fp); + PR_INT(fp, c->code); + PR_STR(fp, c->text, NATION_LEN); + PR_INT(fp, c->join); + PR_VSTR_LAST(fp, c->comment, + (columnar)?(long)(ceil(N_CMNT_LEN * V_STR_HGH)):c->clen); + PR_END(fp); + + return(0); +} + +int +pr_region(code_t *c, int mode) +{ +static FILE *fp = NULL; + + if (fp == NULL) + fp = print_prep(REGION, mode); + + PR_STRT(fp); + PR_INT(fp, c->code); + PR_STR(fp, c->text, REGION_LEN); + PR_VSTR_LAST(fp, c->comment, + (columnar)?(long)(ceil(R_CMNT_LEN * V_STR_HGH)):c->clen); + PR_END(fp); + + return(0); +} +#endif + +/* + * NOTE: this routine does NOT use the BCD2_* routines. As a result, + * it WILL fail if the keys being deleted exceed 32 bits. Since this + * would require ~660 update iterations, this seems an acceptable + * oversight + */ +int +pr_drange(int tbl, long min, long cnt, long num) +{ + static int last_num = 0; + static FILE *dfp = NULL; + int child = -1; + long start, last, new; + + static int rows_per_segment=0; + static int rows_this_segment=0; + static int residual_rows=0; + + if (last_num != num) + { + if (dfp) + fclose(dfp); + dfp = print_prep(tbl, -num); + if (dfp == NULL) + return(-1); + last_num = num; + rows_this_segment=0; + } + + start = MK_SPARSE(min, (num - 1)/ (10000 / refresh)); + last = start - 1; + for (child=min; cnt > 0; child++, cnt--) + { + new = MK_SPARSE(child, (num - 1) / (10000 / refresh)); + if (gen_rng == 1 && new - last == 1) + { + last = new; + continue; + } + if (gen_sql) + { + fprintf(dfp, + "delete from %s where %s between %ld and %ld;\n", + tdefs[ORDER].name, "o_orderkey", start, last); + fprintf(dfp, + "delete from %s where %s between %ld and %ld;\n", + tdefs[LINE].name, "l_orderkey", start, last); + fprintf(dfp, "commit work;\n"); + } + else + if (gen_rng) + { + PR_STRT(dfp); + PR_INT(dfp, start); + PR_INT(dfp, last); + PR_END(dfp); + } + else + { + if (delete_segments) + { + if(rows_per_segment==0) + { + rows_per_segment = (cnt / delete_segments); + residual_rows = (cnt % delete_segments); + rows_per_segment++; + } + if(delete_segment <= residual_rows) + { + if((++rows_this_segment) > rows_per_segment) + { + fclose(dfp); + dfp = print_prep(tbl, -num); + if (dfp == NULL) return(-1); + last_num = num; + rows_this_segment=1; + } + } + else + { + if((++rows_this_segment) >= rows_per_segment) + { + fclose(dfp); + dfp = print_prep(tbl, -num); + if (dfp == NULL) return(-1); + last_num = num; + rows_this_segment=1; + } + } + } + PR_STRT(dfp); + PR_KEY(dfp, new); + PR_END(dfp); + } + start = new; + last = new; + } + if (gen_rng) + { + PR_STRT(dfp); + PR_INT(dfp, start); + PR_INT(dfp, last); + PR_END(dfp); + } + + return(0); +} + +#ifdef SSBM +int pr_date(date_t *d, int mode){ + static FILE *d_fp = NULL; + + if (d_fp == NULL) + d_fp = print_prep(DATE, 0); + + PR_STRT(d_fp); + PR_INT(d_fp, d->datekey); + PR_STR(d_fp, d->date,D_DATE_LEN); + PR_STR(d_fp, d->dayofweek,D_DAYWEEK_LEN); + PR_STR(d_fp, d->month,D_MONTH_LEN); + PR_INT(d_fp, d->year); + PR_INT(d_fp, d->yearmonthnum); + PR_STR(d_fp, d->yearmonth,D_YEARMONTH_LEN); + PR_INT(d_fp, d->daynuminweek); + PR_INT(d_fp, d->daynuminmonth); + PR_INT(d_fp, d->daynuminyear); + PR_INT(d_fp, d->monthnuminyear); + PR_INT(d_fp, d->weeknuminyear); + PR_VSTR(d_fp, + d->sellingseason,(columnar)?(long)D_SEASON_LEN:d->slen); + PR_STR(d_fp,d->lastdayinweekfl,2); + PR_STR(d_fp,d->lastdayinmonthfl,2); + PR_STR(d_fp,d->holidayfl,2); + PR_STR(d_fp,d->weekdayfl,2); + + PR_END(d_fp); + return(0); + +} + +#endif +/* + * verify functions: routines which replace the pr_routines and generate a pseudo checksum + * instead of generating the actual contents of the tables. Meant to allow large scale data + * validation without requiring a large amount of storage + */ +#ifdef SSBM +int +vrf_cust(customer_t *c, int mode) +{ + VRF_STRT(CUST); + VRF_INT(CUST, c->custkey); + VRF_STR(CUST, c->name); + VRF_STR(CUST, c->address); + VRF_STR(CUST, c->city); + VRF_STR(CUST, c->nation_name); + VRF_STR(CUST, c->region_name); + VRF_STR(CUST, c->phone); + VRF_STR(CUST, c->mktsegment); + VRF_END(CUST); + + return(0); +} + +#else +int +vrf_cust(customer_t *c, int mode) +{ + VRF_STRT(CUST); + VRF_INT(CUST, c->custkey); + VRF_STR(CUST, c->name); + VRF_STR(CUST, c->address); + VRF_INT(CUST, c->nation_code); + VRF_STR(CUST, c->phone); + VRF_MONEY(CUST, c->acctbal); + VRF_STR(CUST, c->mktsegment); + VRF_STR(CUST, c->comment); + VRF_END(CUST); + + return(0); +} +#endif + +/* + * print the numbered order + */ +#ifdef SSBM +#else +int +vrf_order(order_t *o, int mode) +{ + VRF_STRT(ORDER); + VRF_HUGE(ORDER, o->okey); + VRF_INT(ORDER, o->custkey); + VRF_CHR(ORDER, o->orderstatus); + VRF_MONEY(ORDER, o->totalprice); + VRF_STR(ORDER, o->odate); + VRF_STR(ORDER, o->opriority); + VRF_STR(ORDER, o->clerk); + VRF_INT(ORDER, o->spriority); + VRF_STR(ORDER, o->comment); + VRF_END(ORDER); + + return(0); +} +#endif + +/* + * print an order's lineitems + */ +#ifdef SSBM +int +vrf_line(order_t *o, int mode) +{ + int i; + + for (i = 0; i < o->lines; i++) + { + VRF_STRT(LINE); + VRF_HUGE(LINE, o->lineorders[i].okey); + VRF_INT(LINE, o->lineorders[i].linenumber); + VRF_INT(LINE, o->lineorders[i].custkey); + VRF_INT(LINE, o->lineorders[i].partkey); + VRF_INT(LINE, o->lineorders[i].suppkey); + VRF_STR(LINE, o->lineorders[i].orderdate); + VRF_STR(LINE, o->lineorders[i].opriority); + VRF_INT(LINE, o->lineorders[i].ship_priority); + VRF_INT(LINE, o->lineorders[i].quantity); + VRF_INT(LINE, o->lineorders[i].extended_price); + VRF_INT(LINE, o->lineorders[i].order_totalprice); + VRF_INT(LINE, o->lineorders[i].discount); + VRF_INT(LINE, o->lineorders[i].revenue); + VRF_INT(LINE, o->lineorders[i].supp_cost); + VRF_INT(LINE, o->lineorders[i].tax); + VRF_STR(LINE, o->lineorders[i].commit_date); + VRF_STR(LINE, o->lineorders[i].shipmode); + VRF_END(LINE); + } + + return(0); +} + +#else +int +vrf_line(order_t *o, int mode) +{ + int i; + + for (i = 0; i < o->lines; i++) + { + VRF_STRT(LINE); + VRF_HUGE(LINE, o->l[i].okey); + VRF_INT(LINE, o->l[i].partkey); + VRF_INT(LINE, o->l[i].suppkey); + VRF_INT(LINE, o->l[i].lcnt); + VRF_INT(LINE, o->l[i].quantity); + VRF_MONEY(LINE, o->l[i].eprice); + VRF_MONEY(LINE, o->l[i].discount); + VRF_MONEY(LINE, o->l[i].tax); + VRF_CHR(LINE, o->l[i].rflag[0]); + VRF_CHR(LINE, o->l[i].lstatus[0]); + VRF_STR(LINE, o->l[i].sdate); + VRF_STR(LINE, o->l[i].cdate); + VRF_STR(LINE, o->l[i].rdate); + VRF_STR(LINE, o->l[i].shipinstruct); + VRF_STR(LINE, o->l[i].shipmode); + VRF_STR(LINE, o->l[i].comment); + VRF_END(LINE); + } + + return(0); +} +#endif + +/* + * print the numbered order *and* its associated lineitems + */ +#ifdef SSBM +#else +int +vrf_order_line(order_t *o, int mode) +{ + vrf_order(o, mode); + vrf_line(o, mode); + + return(0); +} +#endif + +/* + * print the given part + */ +#ifdef SSBM +int +vrf_part(part_t *part, int mode) +{ + + VRF_STRT(PART); + VRF_INT(PART, part->partkey); + VRF_STR(PART, part->name); + VRF_STR(PART, part->mfgr); + VRF_STR(PART, part->brand); + VRF_STR(PART, part->type); + VRF_INT(PART, part->size); + VRF_STR(PART, part->container); + VRF_STR(PART, part->category); + VRF_END(PART); + + return(0); +} + +#else +int +vrf_part(part_t *part, int mode) +{ + + VRF_STRT(PART); + VRF_INT(PART, part->partkey); + VRF_STR(PART, part->name); + VRF_STR(PART, part->mfgr); + VRF_STR(PART, part->brand); + VRF_STR(PART, part->type); + VRF_INT(PART, part->size); + VRF_STR(PART, part->container); + VRF_MONEY(PART, part->retailprice); + VRF_STR(PART, part->comment); + VRF_END(PART); + + return(0); +} +#endif + +/* + * print the given part's suppliers + */ +#ifdef SSBM +#else +int +vrf_psupp(part_t *part, int mode) +{ + long i; + + for (i = 0; i < SUPP_PER_PART; i++) + { + VRF_STRT(PSUPP); + VRF_INT(PSUPP, part->s[i].partkey); + VRF_INT(PSUPP, part->s[i].suppkey); + VRF_INT(PSUPP, part->s[i].qty); + VRF_MONEY(PSUPP, part->s[i].scost); + VRF_STR(PSUPP, part->s[i].comment); + VRF_END(PSUPP); + } + + return(0); +} +#endif + +/* + * print the given part *and* its suppliers + */ +#ifdef SSBM +#else +int +vrf_part_psupp(part_t *part, int mode) +{ + vrf_part(part, mode); + vrf_psupp(part, mode); + + return(0); +} +#endif + +#ifdef SSBM +int +vrf_supp(supplier_t *supp, int mode) +{ + VRF_STRT(SUPP); + VRF_INT(SUPP, supp->suppkey); + VRF_STR(SUPP, supp->name); + + VRF_STR(CUST, supp->address); + VRF_INT(CUST, supp->nation_key); + VRF_STR(CUST, supp->nation_name); + VRF_INT(CUST, supp->region_key); + VRF_STR(CUST, supp->region_name); + VRF_STR(CUST, supp->phone); + VRF_END(SUPP); + + return(0); +} + +#else +int +vrf_supp(supplier_t *supp, int mode) +{ + VRF_STRT(SUPP); + VRF_INT(SUPP, supp->suppkey); + VRF_STR(SUPP, supp->name); + VRF_STR(SUPP, supp->address); + VRF_INT(SUPP, supp->nation_code); + VRF_STR(SUPP, supp->phone); + VRF_MONEY(SUPP, supp->acctbal); + VRF_STR(SUPP, supp->comment); + VRF_END(SUPP); + + return(0); +} +#endif + +#ifdef SSBM +#else +int +vrf_nation(code_t *c, int mode) +{ + VRF_STRT(NATION); + VRF_INT(NATION, c->code); + VRF_STR(NATION, c->text); + VRF_INT(NATION, c->join); + VRF_STR(NATION, c->comment); + VRF_END(NATION); + + return(0); +} + +int +vrf_region(code_t *c, int mode) +{ + VRF_STRT(REGION); + VRF_INT(REGION, c->code); + VRF_STR(REGION, c->text); + VRF_STR(REGION, c->comment); + VRF_END(fp); + + return(0); +} +#endif + + +#ifdef SSBM +int vrf_date(date_t * d, int mode) +{ + VRF_STRT(DATE); + VRF_INT(DATE, d->datekey); + VRF_STR(DATE, d->date); + VRF_STR(DATE, d->dayofweek); + VRF_STR(DATE, d->month); + VRF_INT(DATE, d->year); + VRF_INT(DATE, d->yearmonthnum); + VRF_STR(DATE, d->yearmonth); + VRF_INT(DATE, d->daynuminweek); + VRF_INT(DATE, d->daynuminmonth); + VRF_INT(DATE, d->daynuminyear); + VRF_INT(DATE, d->monthnuminyear); + VRF_INT(DATE, d->weeknuminyear); + VRF_STR(DATE, d->sellingseason); + VRF_STR(DATE, d->lastdayinweekfl); + VRF_STR(DATE, d->lastdayinmonthfl); + VRF_STR(DATE, d->weekdayfl); + VRF_END(DATE); + return(0); + +} +#endif + diff --git a/qgen.c b/qgen.c new file mode 100644 index 0000000..7931f8a --- /dev/null +++ b/qgen.c @@ -0,0 +1,469 @@ +/* + * Sccsid: @(#)qgen.c 2.1.8.2 + * qgen.c -- routines to convert query templates to executable query + * text for TPC-H and TPC-R + */ +#define DECLARER + +#include +#include +#if (defined(_POSIX_)||!defined(WIN32)) +/* +#include +*/ +#else +#include "process.h" +#endif /* WIN32 */ +#include +#include +#include "config.h" +#include "dss.h" +#include "tpcd.h" +#include "permute.h" + + +#define LINE_SIZE 512 + +/* + * Function Protoypes + */ +void varsub PROTO((int qnum, int vnum, int flags)); +int strip_comments PROTO((char *line)); +void usage PROTO((void)); +int process_options PROTO((int cnt, char **args)); +int setup PROTO((void)); +void qsub PROTO((char *qtag, int flags)); + + + +extern char *optarg; +extern int optind; +char **mk_ascdate(void); +extern seed_t Seed[]; + +char **asc_date; +int snum = -1; +char *prog; +tdef tdefs = { NULL }; +long rndm; +double flt_scale; +distribution q13a, q13b; +int qnum; + + +/* + * FUNCTION strip_comments(line) + * + * remove all comments from 'line'; recognizes both {} and -- comments + */ +int +strip_comments(char *line) +{ + static int in_comment = 0; + char *cp1, *cp2; + + cp1 = line; + + while (1) /* traverse the entire string */ + { + if (in_comment) + { + if ((cp2 = strchr(cp1, '}')) != NULL) /* comment ends */ + { + strcpy(cp1, cp2 + 1); + in_comment = 0; + continue; + } + else + { + *cp1 = '\0'; + break; + } + } + else /* not in_comment */ + { + if ((cp2 = strchr(cp1, '-')) != NULL) + { + if (*(cp2 + 1) == '-') /* found a '--' comment */ + { + *cp2 = '\0'; + break; + } + } + if ((cp2 = strchr(cp1, '{')) != NULL) /* comment starts */ + { + in_comment = 1; + *cp2 = ' '; + continue; + } + else break; + } + } + return(0); +} + +/* + * FUNCTION qsub(char *qtag, int flags) + * + * based on the settings of flags, and the template file $QDIR/qtag.sql + * make the following substitutions to turn a query template into EQT + * + * String Converted to Based on + * ====== ============ =========== + * first line database ; -n from command line + * second line set explain on; -x from command line + * : parameter + * :k set number + * :o output to outpath/qnum.snum + * -o from command line, SET_OUTPUT + * :s stream number + * :b BEGIN WORK; -a from command line, START_TRAN + * :e COMMIT WORK; -a from command line, END_TRAN + * :q query number + * :n sets rowcount to be returned + */ +void +qsub(char *qtag, int flags) +{ +static char *line = NULL, + *qpath = NULL; +FILE *qfp; +char *cptr, + *mark, + *qroot = NULL; + + qnum = atoi(qtag); + if (line == NULL) + { + line = malloc(BUFSIZ); + qpath = malloc(BUFSIZ); + MALLOC_CHECK(line); + MALLOC_CHECK(qpath); + } + + qroot = env_config(QDIR_TAG, QDIR_DFLT); + sprintf(qpath, "%s%c%s.sql", + qroot, PATH_SEP, qtag); + qfp = fopen(qpath, "r"); + OPEN_CHECK(qfp, qpath); + + rowcnt = rowcnt_dflt[qnum]; + varsub(qnum, 0, flags); /* set the variables */ + if (flags & DFLT_NUM) + fprintf(ofp, SET_ROWCOUNT, rowcnt); + while (fgets(line, BUFSIZ, qfp) != NULL) + { + if (!(flags & COMMENT)) + strip_comments(line); + mark = line; + while ((cptr = strchr(mark, VTAG)) != NULL) + { + *cptr = '\0'; + cptr++; + fprintf(ofp,"%s", mark); + switch(*cptr) + { + case 'b': + case 'B': + if (!(flags & ANSI)) + fprintf(ofp,"%s\n", START_TRAN); + cptr++; + break; + case 'c': + case 'C': + if (flags & DBASE) + fprintf(ofp, SET_DBASE, db_name); + cptr++; + break; + case 'e': + case 'E': + if (!(flags & ANSI)) + fprintf(ofp,"%s\n", END_TRAN); + cptr++; + break; + case 'n': + case 'N': + if (!(flags & DFLT_NUM)) + { + rowcnt=atoi(++cptr); + while (isdigit(*cptr) || *cptr == ' ') cptr++; + fprintf(ofp, SET_ROWCOUNT, rowcnt); + } + continue; + case 'o': + case 'O': + if (flags & OUTPUT) + fprintf(ofp,"%s '%s/%s.%d'", SET_OUTPUT, osuff, + qtag, (snum < 0)?0:snum); + cptr++; + break; + case 'q': + case 'Q': + fprintf(ofp,"%s", qtag); + cptr++; + break; + case 's': + case 'S': + fprintf(ofp,"%d", (snum < 0)?0:snum); + cptr++; + break; + case 'X': + case 'x': + if (flags & EXPLAIN) + fprintf(ofp, "%s\n", GEN_QUERY_PLAN); + cptr++; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + varsub(qnum, atoi(cptr), flags & DFLT); + while (isdigit(*++cptr)); + break; + default: + fprintf(stderr, "-- unknown flag '%c%c' ignored\n", + VTAG, *cptr); + cptr++; + break; + } + mark=cptr; + } + fprintf(ofp,"%s", mark); + } + fclose(qfp); + fflush(stdout); + return; +} + +void +usage(void) +{ +printf("%s Parameter Substitution (v. %d.%d.%d%s)\n", + NAME, VERSION,RELEASE, + MODIFICATION,PATCH); +printf("Copyright %s %s\n", TPC, C_DATES); +printf("USAGE: %s [ queries ]\n", prog); +printf("Options:\n"); +printf("\t-a\t\t-- use ANSI semantics.\n"); +printf("\t-b \t-- load distributions from \n"); +printf("\t-c\t\t-- retain comments found in template.\n"); +printf("\t-d\t\t-- use default substitution values.\n"); +printf("\t-h\t\t-- print this usage summary.\n"); +printf("\t-i \t-- use the contents of file to begin a query.\n"); +printf("\t-l \t-- log parameters to .\n"); +printf("\t-n \t-- connect to database .\n"); +printf("\t-N\t\t-- use default rowcounts and ignore :n directive.\n"); +printf("\t-o \t-- set the output file base path to .\n"); +printf("\t-p \t\t-- use the query permutation for stream \n"); +printf("\t-r \t\t-- seed the random number generator with \n"); +printf("\t-s \t\t-- base substitutions on an SF of \n"); +printf("\t-v\t\t-- verbose.\n"); +printf("\t-t \t-- use the contents of file to complete a query\n"); +printf("\t-x\t\t-- enable SET EXPLAIN in each query.\n"); +} + +int +process_options(int cnt, char **args) +{ + int flag; + + while((flag = getopt(cnt, args, "ab:cdhi:n:Nl:o:p:r:s:t:vx")) != -1) + switch(flag) + { + case 'a': /* use ANSI semantics */ + flags |= ANSI; + break; + case 'b': /* load distributions from named file */ + d_path = (char *)malloc(strlen(optarg) + 1); + MALLOC_CHECK(d_path); + strcpy(d_path, optarg); + break; + case 'c': /* retain comments in EQT */ + flags |= COMMENT; + break; + case 'd': /* use default substitution values */ + flags |= DFLT; + break; + case 'h': /* just generate the usage summary */ + usage(); + exit(0); + break; + case 'i': /* set stream initialization file name */ + ifile = malloc(strlen(optarg) + 1); + MALLOC_CHECK(ifile); + strcpy(ifile, optarg); + flags |= INIT; + break; + case 'l': /* log parameter usages */ + lfile = malloc(strlen(optarg) + 1); + MALLOC_CHECK(lfile); + strcpy(lfile, optarg); + flags |= LOG; + break; + case 'N': /* use default rowcounts */ + flags |= DFLT_NUM; + break; + case 'n': /* set database name */ + db_name = malloc(strlen(optarg) + 1); + MALLOC_CHECK(db_name); + strcpy(db_name, optarg); + flags |= DBASE; + break; + case 'o': /* set the output path */ + osuff = malloc(strlen(optarg) + 1); + MALLOC_CHECK(osuff); + strcpy(osuff, optarg); + flags |=OUTPUT; + break; + case 'p': /* permutation for a given stream */ + snum = atoi(optarg); + break; + case 'r': /* set random number seed for parameter gen */ + flags |= SEED; + rndm = atol(optarg); + break; + case 's': /* scale of data set to run against */ + flt_scale = atof(optarg); + if (scale > MAX_SCALE) + fprintf(stderr, "%s %5.0f %s\n%s\n", + "WARNING: Support for scale factors >", + MAX_SCALE, + "GB is still in development.", + "Data set integrity is not guaranteed.\n"); + break; + case 't': /* set termination file name */ + tfile = malloc(strlen(optarg) + 1); + MALLOC_CHECK(tfile); + strcpy(tfile, optarg); + flags |= TERMINATE; + break; + case 'v': /* verbose */ + flags |= VERBOSE; + break; + case 'x': /* set explain in the queries */ + flags |= EXPLAIN; + break; + default: + printf("unknown option '%s' ignored\n", args[optind]); + usage(); + exit(1); + break; + } + return(0); +} + +int +setup(void) +{ + + asc_date = mk_ascdate(); + + read_dist(env_config(DIST_TAG, DIST_DFLT), "p_cntr", &p_cntr_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "colors", &colors); + read_dist(env_config(DIST_TAG, DIST_DFLT), "p_types", &p_types_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "nations", &nations); + read_dist(env_config(DIST_TAG, DIST_DFLT), "nations2", &nations2); + read_dist(env_config(DIST_TAG, DIST_DFLT), "regions", ®ions); + read_dist(env_config(DIST_TAG, DIST_DFLT), "o_oprio", + &o_priority_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "instruct", + &l_instruct_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "smode", &l_smode_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "category", + &l_category_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "rflag", &l_rflag_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "msegmnt", &c_mseg_set); + read_dist(env_config(DIST_TAG, DIST_DFLT), "Q13a", &q13a); + read_dist(env_config(DIST_TAG, DIST_DFLT), "Q13b", &q13b); + + return(0); +} + + +main(int ac, char **av) +{ + int i; + FILE *ifp; + char line[LINE_SIZE]; + + prog = av[0]; + flt_scale = (double)1.0; + flags = 0; + d_path = NULL; + process_options(ac, av); + if (flags & VERBOSE) + fprintf(ofp, + "-- TPC %s Parameter Substitution (Version %d.%d.%d%s)\n", + NAME, VERSION, RELEASE, MODIFICATION, PATCH); + + setup(); + + if (!(flags & DFLT)) /* perturb the RNG */ + { + if (!(flags & SEED)) + rndm = (long)((unsigned)time(NULL) * DSS_PROC); + if (rndm < 0) + rndm += 2147483647; + Seed[0].value = rndm; + for (i=1; i <= QUERIES_PER_SET; i++) + { + Seed[0].value = NextRand(Seed[0].value); + Seed[i].value = Seed[0].value; + } + printf("-- using %ld as a seed to the RNG\n", rndm); + } + else + printf("-- using default substitutions\n"); + + if (flags & INIT) /* init stream with ifile */ + { + ifp = fopen(ifile, "r"); + OPEN_CHECK(ifp, ifile); + while (fgets(line, LINE_SIZE, ifp) != NULL) + fprintf(stdout, "%s", line); + } + + if (snum >= 0) + if (optind < ac) + for (i=optind; i < ac; i++) + { + char qname[10]; + sprintf(qname, "%d", SEQUENCE(snum, atoi(av[i]))); + qsub(qname, flags); + } + else + for (i=1; i <= QUERIES_PER_SET; i++) + { + char qname[10]; + sprintf(qname, "%d", SEQUENCE(snum, i)); + qsub(qname, flags); + } + else + if (optind < ac) + for (i=optind; i < ac; i++) + qsub(av[i], flags); + else + for (i=1; i <= QUERIES_PER_SET; i++) + { + char qname[10]; + sprintf(qname, "%d", i); + qsub(qname, flags); + } + + if (flags & TERMINATE) /* terminate stream with tfile */ + { + ifp = fopen(tfile, "r"); + if (ifp == NULL) + OPEN_CHECK(ifp, tfile); + while (fgets(line, LINE_SIZE, ifp) != NULL) + fprintf(stdout, "%s", line); + } + + return(0); +} + diff --git a/rnd.c b/rnd.c new file mode 100644 index 0000000..a159446 --- /dev/null +++ b/rnd.c @@ -0,0 +1,262 @@ +/* @(#)rnd.c 2.1.8.2 + * + * + * RANDOM.C -- Implements Park & Miller's "Minimum Standard" RNG + * + * (Reference: CACM, Oct 1988, pp 1192-1201) + * + * NextRand: Computes next random integer + * UnifInt: Yields an long uniformly distributed between given bounds + * UnifReal: ields a real uniformly distributed between given bounds + * Exponential: Yields a real exponentially distributed with given mean + * + */ + +#include "config.h" +#include +#include +#include "dss.h" +#include "rnd.h" + +char *env_config PROTO((char *tag, char *dflt)); +void NthElement(long, long *); + +void +dss_random(long *tgt, long lower, long upper, long stream) +{ + *tgt = UnifInt((long)lower, (long)upper, (long)stream); + Seed[stream].usage += 1; + + return; +} + +void +row_start(int t) \ +{ + int i; + for (i=0; i <= MAX_STREAM; i++) + Seed[i].usage = 0 ; + + return; +} + +void +row_stop(int t) \ + { + int i; + + /* need to allow for handling the master and detail together */ + if (t == ORDER_LINE) + t = ORDER; + if (t == PART_PSUPP) + t = PART; + + for (i=0; i <= MAX_STREAM; i++) + if ((Seed[i].table == t) || (Seed[i].table == tdefs[t].child)) + { + if (set_seeds && (Seed[i].usage > Seed[i].boundary)) + { + fprintf(stderr, "\nSEED CHANGE: seed[%d].usage = %d\n", + i, Seed[i].usage); + Seed[i].boundary = Seed[i].usage; + } + else + { + NthElement((Seed[i].boundary - Seed[i].usage), &Seed[i].value); + } + } + return; + } + +void +dump_seeds(int tbl) +{ + int i; + + for (i=0; i <= MAX_STREAM; i++) + if (Seed[i].table == tbl) + printf("%d:\t%ld\n", i, Seed[i].value); + return; +} + +/****************************************************************** + + NextRand: Computes next random integer + +*******************************************************************/ + +/* + * long NextRand( long nSeed ) + */ +long +NextRand(long nSeed) + +/* + * nSeed is the previous random number; the returned value is the + * next random number. The routine generates all numbers in the + * range 1 .. nM-1. + */ + +{ + + /* + * The routine returns (nSeed * nA) mod nM, where nA (the + * multiplier) is 16807, and nM (the modulus) is + * 2147483647 = 2^31 - 1. + * + * nM is prime and nA is a primitive element of the range 1..nM-1. + * This * means that the map nSeed = (nSeed*nA) mod nM, starting + * from any nSeed in 1..nM-1, runs through all elements of 1..nM-1 + * before repeating. It never hits 0 or nM. + * + * To compute (nSeed * nA) mod nM without overflow, use the + * following trick. Write nM as nQ * nA + nR, where nQ = nM / nA + * and nR = nM % nA. (For nM = 2147483647 and nA = 16807, + * get nQ = 127773 and nR = 2836.) Write nSeed as nU * nQ + nV, + * where nU = nSeed / nQ and nV = nSeed % nQ. Then we have: + * + * nM = nA * nQ + nR nQ = nM / nA nR < nA < nQ + * + * nSeed = nU * nQ + nV nU = nSeed / nQ nV < nU + * + * Since nA < nQ, we have nA*nQ < nM < nA*nQ + nA < nA*nQ + nQ, + * i.e., nM/nQ = nA. This gives bounds on nU and nV as well: + * nM > nSeed => nM/nQ * >= nSeed/nQ => nA >= nU ( > nV ). + * + * Using ~ to mean "congruent mod nM" this gives: + * + * nA * nSeed ~ nA * (nU*nQ + nV) + * + * ~ nA*nU*nQ + nA*nV + * + * ~ nU * (-nR) + nA*nV (as nA*nQ ~ -nR) + * + * Both products in the last sum can be computed without overflow + * (i.e., both have absolute value < nM) since nU*nR < nA*nQ < nM, + * and nA*nV < nA*nQ < nM. Since the two products have opposite + * sign, their sum lies between -(nM-1) and +(nM-1). If + * non-negative, it is the answer (i.e., it's congruent to + * nA*nSeed and lies between 0 and nM-1). Otherwise adding nM + * yields a number still congruent to nA*nSeed, but now between + * 0 and nM-1, so that's the answer. + */ + + long nU, nV; + + nU = nSeed / nQ; + nV = nSeed - nQ * nU; /* i.e., nV = nSeed % nQ */ + nSeed = nA * nV - nU * nR; + if (nSeed < 0) + nSeed += nM; + return (nSeed); +} + +/****************************************************************** + + UnifInt: Yields an long uniformly distributed between given bounds + +*******************************************************************/ + +/* + * long UnifInt( long nLow, long nHigh, long nStream ) + */ +long +UnifInt(long nLow, long nHigh, long nStream) + +/* + * Returns an integer uniformly distributed between nLow and nHigh, + * including * the endpoints. nStream is the random number stream. + * Stream 0 is used if nStream is not in the range 0..MAX_STREAM. + */ + +{ + double dRange; + long nTemp; + + if (nStream < 0 || nStream > MAX_STREAM) + nStream = 0; + + if (nLow > nHigh) + { + nTemp = nLow; + nLow = nHigh; + nHigh = nTemp; + } + + dRange = DOUBLE_CAST (nHigh - nLow + 1); + Seed[nStream].value = NextRand(Seed[nStream].value); + nTemp = (long) (((double) Seed[nStream].value / dM) * (dRange)); + return (nLow + nTemp); +} + + + +/****************************************************************** + + UnifReal: Yields a real uniformly distributed between given bounds + +*******************************************************************/ + +/* + * double UnifReal( double dLow, double dHigh, long nStream ) + */ +double +UnifReal(double dLow, double dHigh, long nStream) + +/* + * Returns a double uniformly distributed between dLow and dHigh, + * excluding the endpoints. nStream is the random number stream. + * Stream 0 is used if nStream is not in the range 0..MAX_STREAM. + */ + +{ + double dTemp; + + if (nStream < 0 || nStream > MAX_STREAM) + nStream = 0; + if (dLow == dHigh) + return (dLow); + if (dLow > dHigh) + { + dTemp = dLow; + dLow = dHigh; + dHigh = dTemp; + } + Seed[nStream].value = NextRand(Seed[nStream].value); + dTemp = ((double) Seed[nStream].value / dM) * (dHigh - dLow); + return (dLow + dTemp); +} + + + +/******************************************************************% + + Exponential: Yields a real exponentially distributed with given mean + +*******************************************************************/ + +/* + * double Exponential( double dMean, long nStream ) + */ +double +Exponential(double dMean, long nStream) + +/* + * Returns a double uniformly distributed with mean dMean. + * 0.0 is returned iff dMean <= 0.0. nStream is the random number + * stream. Stream 0 is used if nStream is not in the range + * 0..MAX_STREAM. + */ + +{ + double dTemp; + + if (nStream < 0 || nStream > MAX_STREAM) + nStream = 0; + if (dMean <= 0.0) + return (0.0); + + Seed[nStream].value = NextRand(Seed[nStream].value); + dTemp = (double) Seed[nStream].value / dM; /* unif between 0..1 */ + return (-dMean * log(1.0 - dTemp)); +} diff --git a/rnd.h b/rnd.h new file mode 100644 index 0000000..a8e8d36 --- /dev/null +++ b/rnd.h @@ -0,0 +1,80 @@ +/* + * Sccsid: @(#)rnd.h 2.1.8.1 + * + * rnd.h -- header file for use withthe portable random number generator + * provided by Frank Stephens of Unisys + */ + +/* function protypes */ +long NextRand PROTO((long)); +long UnifInt PROTO((long, long, long)); +double UnifReal PROTO((double, double, long)); +double Exponential PROTO((double, long)); + +static long nA = 16807; /* the multiplier */ +static long nM = 2147483647;/* the modulus == 2^31 - 1 */ +static long nQ = 127773; /* the quotient nM / nA */ +static long nR = 2836; /* the remainder nM % nA */ + +static double dM = 2147483647.0; + +/* + * macros to control RNG and assure reproducible multi-stream + * runs without the need for seed files. Keep track of invocations of RNG + * and always round-up to a known per-row boundary. + */ +/* + * preferred solution, but not initializing correctly + */ +#define VSTR_MAX(len) (long)(len / 5 + (len % 5 == 0)?0:1 + 1) +seed_t Seed[MAX_STREAM + 1] = +{ + {PART, 1, 0, 1}, /* P_MFG_SD 0 */ + {PART, 46831694, 0, 1}, /* P_BRND_SD 1 */ + {PART, 1841581359, 0, 1}, /* P_TYPE_SD 2 */ + {PART, 1193163244, 0, 1}, /* P_SIZE_SD 3 */ + {PART, 727633698, 0, 1}, /* P_CNTR_SD 4 */ + {NONE, 933588178, 0, 1}, /* P_RCST_SD 5 UNUSED 2-4-98 */ + {PART, 804159733, 0, RNG_PER_SENT * 3}, /* P_CMNT_SD 6 */ + {PSUPP, 1671059989, 0, SUPP_PER_PART}, /* PS_QTY_SD 7 */ + {PSUPP, 1051288424, 0, SUPP_PER_PART}, /* PS_SCST_SD 8 */ + {PSUPP, 1961692154, 0, SUPP_PER_PART * RNG_PER_SENT * 20}, /* PS_CMNT_SD 9 */ + {ORDER, 1227283347, 0, 1}, /* O_SUPP_SD 10 */ + {ORDER, 1171034773, 0, 1}, /* O_CLRK_SD 11 */ + {ORDER, 276090261, 0, RNG_PER_SENT * 8}, /* O_CMNT_SD 12 */ + {ORDER, 1066728069, 0, 1}, /* O_ODATE_SD 13 */ + {LINE, 209208115, 0, O_LCNT_MAX}, /* L_QTY_SD 14 */ + {LINE, 554590007, 0, O_LCNT_MAX}, /* L_DCNT_SD 15 */ + {LINE, 721958466, 0, O_LCNT_MAX}, /* L_TAX_SD 16 */ + {LINE, 1371272478, 0, O_LCNT_MAX}, /* L_SHIP_SD 17 */ + {LINE, 675466456, 0, O_LCNT_MAX}, /* L_SMODE_SD 18 */ + {LINE, 1808217256, 0, O_LCNT_MAX}, /* L_PKEY_SD 19 */ + {LINE, 2095021727, 0, O_LCNT_MAX}, /* L_SKEY_SD 20 */ + {LINE, 1769349045, 0, O_LCNT_MAX}, /* L_SDTE_SD 21 */ + {LINE, 904914315, 0, O_LCNT_MAX}, /* L_CDTE_SD 22 */ + {LINE, 373135028, 0, O_LCNT_MAX}, /* L_RDTE_SD 23 */ + {LINE, 717419739, 0, O_LCNT_MAX}, /* L_RFLG_SD 24 */ + {LINE, 1095462486, 0, O_LCNT_MAX * RNG_PER_SENT * 5}, /* L_CMNT_SD 25 */ + {CUST, 881155353, 0, 9}, /* C_ADDR_SD 26 */ + {CUST, 1489529863, 0, 1}, /* C_NTRG_SD 27 */ + {CUST, 1521138112, 0, 3}, /* C_PHNE_SD 28 */ + {CUST, 298370230, 0, 1}, /* C_ABAL_SD 29 */ + {CUST, 1140279430, 0, 1}, /* C_MSEG_SD 30 */ + {CUST, 1335826707, 0, RNG_PER_SENT * 12}, /* C_CMNT_SD 31 */ + {SUPP, 706178559, 0, 9}, /* S_ADDR_SD 32 */ + {SUPP, 110356601, 0, 1}, /* S_NTRG_SD 33 */ + {SUPP, 884434366, 0, 3}, /* S_PHNE_SD 34 */ + {SUPP, 962338209, 0, 1}, /* S_ABAL_SD 35 */ + {SUPP, 1341315363, 0, RNG_PER_SENT * 11}, /* S_CMNT_SD 36 */ + {PART, 709314158, 0, 92}, /* P_NAME_SD 37 */ + {ORDER, 591449447, 0, 1}, /* O_PRIO_SD 38 */ + {LINE, 431918286, 0, 1}, /* HVAR_SD 39 */ + {ORDER, 851767375, 0, 1}, /* O_CKEY_SD 40 */ + {NATION, 606179079, 0, RNG_PER_SENT * 16}, /* N_CMNT_SD 41 */ + {REGION, 1500869201, 0, RNG_PER_SENT * 16}, /* R_CMNT_SD 42 */ + {ORDER, 1434868289, 0, 1}, /* O_LCNT_SD 43 */ + {SUPP, 263032577, 0, 1}, /* BBB offset 44 */ + {SUPP, 753643799, 0, 1}, /* BBB type 45 */ + {SUPP, 202794285, 0, 1}, /* BBB comment 46 */ + {SUPP, 715851524, 0, 1} /* BBB junk 47 */ +}; diff --git a/shared.h b/shared.h new file mode 100644 index 0000000..c1c18ce --- /dev/null +++ b/shared.h @@ -0,0 +1,140 @@ +/* + * Sccsid: @(#)shared.h 2.1.8.1 + * Modified for SSBM + */ +#define N_CMNT_LEN 72 +#define N_CMNT_MAX 152 +#define R_CMNT_LEN 72 +#define R_CMNT_MAX 152 +#define MONEY_SCL 0.01 +#define V_STR_HGH 1.6 + +#ifdef SSBM +#define P_NAME_LEN 22 +#define P_MFG_LEN 6 +#define P_COLOR_LEN 3 +#define P_COLOR_MAX 11 +#define P_TYPE_MAX 25 +#define P_CAT_LEN 7 +#define P_CAT_MIN 1 +#define P_CAT_MAX 5 +#define P_CAT_SD 97 +#define S_NATION_NAME_LEN 15 +#define S_REGION_NAME_LEN 12 +#define C_NATION_NAME_LEN 15 +#define C_REGION_NAME_LEN 12 +#define C_NAT_SD 16 +#define C_REG_SD 3 +#define O_SHIP_STRU_LEN 25 +#define O_SHIP_MODE_LEN 10 +#define O_SHIP_PRIO_LEN 1 +#define D_DATE_LEN 18 +#define D_DAYWEEK_LEN 9 +#define D_YEARMONTH_LEN 7 +#define D_SEASON_LEN 12 +#define D_MONTH_LEN 9 +#define D_STARTDATE 694245661 /*corresponding to 1/1/1992 1:1:1*/ +#define NAMTION_BRIEF_LEN 9 +#define CITY_CODE_SEED 15 +#define NUM_DAYS 2556 +#define NUM_SEASONS 5 +#define NUM_HOLIDAYS 10 +#define CITY_FIX 10 +#else + +#define P_NAME_LEN 55 +#define P_MFG_LEN 25 + +#endif + +#define P_BRND_LEN 10 + +#ifdef SSBM +#define P_TYPE_LEN 12 + +#else + +#define P_TYPE_LEN 25 + +#endif + +#define P_CNTR_LEN 10 +#define P_CMNT_LEN 14 +#define P_CMNT_MAX 23 +#define P_CAT_SEED 25 + +#define S_NAME_LEN 25 + +#ifdef SSBM +#define S_ADDR_LEN 15 +#define S_ADDR_MAX 25 +#else + +#define S_ADDR_LEN 25 +#define S_ADDR_MAX 40 +#endif + +#define S_CMNT_LEN 63 +#define S_CMNT_MAX 101 +#define PS_CMNT_LEN 124 +#define PS_CMNT_MAX 199 + +#ifdef SSBM +#define C_NAME_LEN 25 +#define C_MSEG_MIN 1 +#define C_MSEG_MAX 5 +#define C_ADDR_LEN 15 +#define C_ADDR_MAX 25 +#else +#define C_NAME_LEN 18 +#define C_ADDR_LEN 25 +#define C_ADDR_MAX 40 +#endif + +#define C_MSEG_LEN 10 +#define C_CMNT_LEN 73 +#define C_CMNT_MAX 117 + +#ifdef SSBM +#define O_OPRIO_LEN 8 + +#else +#define O_OPRIO_LEN 15 + +#endif + +#define O_CLRK_LEN 15 +#define O_CMNT_LEN 49 +#define O_CMNT_MAX 79 +#define L_CMNT_LEN 27 +#define L_CMNT_MAX 44 +#define L_INST_LEN 25 +#define L_SMODE_LEN 10 +#define T_ALPHA_LEN 10 +#define DATE_LEN 13 /* long enough to hold either date format */ +#define NATION_LEN 25 +#define REGION_LEN 25 +#define PHONE_LEN 15 + +#ifdef SSBM +#define MAXAGG_LEN 10 /* max component length for a agg str */ + +#else +#define MAXAGG_LEN 20 /* max component length for a agg str */ + +#endif + +#define P_CMNT_SD 6 +#define PS_CMNT_SD 9 +#define O_CMNT_SD 12 +#define C_ADDR_SD 26 +#define C_CMNT_SD 31 +#define S_ADDR_SD 32 +#define S_CMNT_SD 36 +#define L_CMNT_SD 25 + + + + + + diff --git a/speed_seed.c b/speed_seed.c new file mode 100644 index 0000000..402b7de --- /dev/null +++ b/speed_seed.c @@ -0,0 +1,325 @@ +/* @(#)speed_seed.c 2.1.8.2 */ +#include +#include +#include "dss.h" + +/* _tal long RandSeed = "Random^SeedFromTimestamp" (void); */ + +#define FAKE_V_STR(avg, sd, cnt) \ + ADVANCE_STREAM(sd, \ + (long)(Seed[sd].boundary*cnt)) +#define ADVANCE_STREAM(stream_id, num_calls) \ + NthElement(num_calls, &Seed[stream_id].value) + +#define MAX_COLOR 92 +long name_bits[MAX_COLOR / BITS_PER_LONG]; +extern seed_t Seed[]; + +/* WARNING! This routine assumes the existence of 64-bit */ +/* integers. The notation used here- "HUGE" is *not* ANSI standard. */ +/* Hopefully, you have this extension as well. If not, use whatever */ +/* nonstandard trick you need to in order to get 64 bit integers. */ +/* The book says that this will work if MAXINT for the type you choose */ +/* is at least 2**46 - 1, so 64 bits is more than you *really* need */ + +static DSS_HUGE Multiplier = 16807; /* or whatever nonstandard */ +static DSS_HUGE Modulus = 2147483647; /* trick you use to get 64 bit int */ + +/* Advances value of Seed after N applications of the random number generator + with multiplier Mult and given Modulus. + NthElement(Seed[],count); + + Theory: We are using a generator of the form + X_n = [Mult * X_(n-1)] mod Modulus. It turns out that + X_n = [(Mult ** n) X_0] mod Modulus. + This can be computed using a divide-and-conquer technique, see + the code below. + + In words, this means that if you want the value of the Seed after n + applications of the generator, you multiply the initial value of the + Seed by the "super multiplier" which is the basic multiplier raised + to the nth power, and then take mod Modulus. +*/ + +/* Nth Element of sequence starting with StartSeed */ +/* Warning, needs 64-bit integers */ +#ifdef SUPPORT_64BITS +void NthElement (long N, long *StartSeed) + { + DSS_HUGE Z; + DSS_HUGE Mult; + static int ln=-1; + int i; + + if ((verbose > 0) && ++ln % 1000 == 0) + { + i = ln % LN_CNT; + fprintf(stderr, "%c\b", lnoise[i]); + } + Mult = Multiplier; + Z = (DSS_HUGE) *StartSeed; + while (N > 0 ) + { + if (N % 2 != 0) /* testing for oddness, this seems portable */ + Z = (Mult * Z) % Modulus; + N = N / 2; /* integer division, truncates */ + Mult = (Mult * Mult) % Modulus; + } + *StartSeed = (long)Z; + + return; + } +#else +/* add 32 bit version of NthElement HERE */ +/* + * MODMULT.C + * R. M. Shelton -- Unisys + * July 26, 1995 + * + * RND_seed: Computes the nth seed in the total sequence + * RND_shift: Shifts a random number by a given number of seeds + * RND_ModMult: Multiplies two numbers mod (2^31 - 1) + * + */ + + + +#include +#include /* required only for F_FatalError */ + +typedef signed long RND; +typedef unsigned long URND; + +#define FatalError(e) F_FatalError( (e), __FILE__, __LINE__ ) +void F_FatalError( int x, char *y, int z ) {fprintf(stderr, "Bang!\n");} + + +/* Prototypes */ +RND RND_seed( RND ); +RND RND_shift( RND, RND ); +static RND RND_ModMult( RND, RND ); + + + +RND +RND_seed ( RND Order ) +{ +static const RND TopMask = 0x40000000; +RND Mask; +RND Result; + + +if (Order <= -Modulus || Order >= Modulus) + FatalError(1023); + +if (Order < 0) Order = Modulus - 1L + Order; + +Mask = TopMask; +Result = 1L; + +while (Mask > Order) Mask >>= 1; + +while (Mask > 0) + { + if (Mask & Order) + { + Result = RND_ModMult( Result, Result); + Result = RND_ModMult( Result, Multiplier ); + } + else + { + Result = RND_ModMult( Result, Result ); + } + Mask >>= 1; + } + +return (Result); + +} /* RND_seed */ + + + +/*********************************************************************** + + RND_shift: Shifts a random number by a given number of seeds + +***********************************************************************/ + +void +NthElement ( long Shift, long *Seed) + +{ + RND Power; + static int ln=-1; + int i; + + if ((verbose > 0) && ++ln % 100 == 0) + { + i = (ln/100) % LN_CNT; + fprintf(stderr, "%c\b", lnoise[i]); + } + + +if (*Seed <= 0 || *Seed >= Modulus) + FatalError(1023); +if (Shift <= -Modulus || Shift >= Modulus) + FatalError(1023); + +Power = RND_seed( Shift ); + +*Seed = RND_ModMult( *Seed, Power ); + +return; +} /* RND_shift */ + + + +/********************************************************************* + + RND_ModMult: Multiplies two numbers mod (2^31 - 1) + +*********************************************************************/ + +static RND +RND_ModMult ( RND nA, RND nB) + +{ + +static const double dTwoPowPlus31 = 2147483648.; +static const double dTwoPowMinus31 = 1./2147483648.; +static const double dTwoPowPlus15 = 32768.; +static const double dTwoPowMinus15 = 1./32768.; +static const RND nLowMask = 0xFFFFL; +static const URND ulBit31 = 1uL << 31; + +double dAH, dAL, dX, dY, dZ, dW; +RND nH, nL; +URND ulP, ulQ, ulResult; + +nL = nB & nLowMask; +nH = (nB - nL) >> 16; +dAH = (double)nA * (double)nH; +dAL = (double)nA * (double)nL; +dX = floor( dAH * dTwoPowMinus15 ); +dY = dAH - dX*dTwoPowPlus15; +dZ = floor( dAL * dTwoPowMinus31 ); +dW = dAL - dZ*dTwoPowPlus31; + +ulQ = (URND)dW + ((URND)dY << 16); +ulP = (URND)dX + (URND)dZ; +if (ulQ & ulBit31) { ulQ -= ulBit31; ulP++; } + +ulResult = ulP + ulQ; +if (ulResult & ulBit31) { ulResult -= ulBit31; ulResult++; } + +return (RND)ulResult; +} +#endif /* SUPPORT_64BITS */ + +/* updates Seed[column] using the a_rnd algorithm */ +void +fake_a_rnd(int min, int max, int column) +{ + long len, itcount; + RANDOM(len, (long)min, (long)max, (long)column); + if (len % 5L == 0) + itcount = len/5; + else itcount = len/5 + 1L; + NthElement(itcount, &Seed[column].usage); + return; +} + + +long +sd_part(int child, long skip_count) +{ + int i; + + for (i=P_MFG_SD; i<= P_CNTR_SD; i++) + ADVANCE_STREAM(i, skip_count); + + FAKE_V_STR(P_CMNT_LEN, P_CMNT_SD, skip_count); + ADVANCE_STREAM(P_NAME_SD, skip_count * 92); + + return(0L); +} + +long +sd_line(int child, long skip_count) + { + int i,j; + + for (j=0; j < O_LCNT_MAX; j++) + { + for (i=L_QTY_SD; i<= L_RFLG_SD; i++) + ADVANCE_STREAM(i, skip_count); + } + + FAKE_V_STR(L_CMNT_LEN, L_CMNT_SD, skip_count); + /* need to special case this as the link between master and detail */ + if (child == 1) + { + ADVANCE_STREAM(O_ODATE_SD, skip_count); + ADVANCE_STREAM(O_LCNT_SD, skip_count); + } + + return(0L); + } + +long +sd_order(int child, long skip_count) +{ + ADVANCE_STREAM(O_LCNT_SD, skip_count); + ADVANCE_STREAM(O_CKEY_SD, skip_count); + FAKE_V_STR(O_CMNT_LEN, O_CMNT_SD, skip_count); + ADVANCE_STREAM(O_SUPP_SD, skip_count); + ADVANCE_STREAM(O_CLRK_SD, skip_count); + ADVANCE_STREAM(O_PRIO_SD, skip_count); + ADVANCE_STREAM(O_ODATE_SD, skip_count); + + return (0L); +} + +long +sd_psupp(int child, long skip_count) + { + int j; + + for (j=0; j < SUPP_PER_PART; j++) + { + ADVANCE_STREAM(PS_QTY_SD, skip_count); + ADVANCE_STREAM(PS_SCST_SD, skip_count); + } + FAKE_V_STR(PS_CMNT_LEN, PS_CMNT_SD, skip_count); + + return(0L); + } + +long +sd_cust(int child, long skip_count) +{ + + FAKE_V_STR(C_ADDR_LEN, C_ADDR_SD, skip_count); + FAKE_V_STR(C_CMNT_LEN, C_CMNT_SD, skip_count); + ADVANCE_STREAM(C_NTRG_SD, skip_count); + ADVANCE_STREAM(C_PHNE_SD, 3L * skip_count); + ADVANCE_STREAM(C_ABAL_SD, skip_count); + ADVANCE_STREAM(C_MSEG_SD, skip_count); + return(0L); +} + +long +sd_supp(int child, long skip_count) +{ + ADVANCE_STREAM(S_NTRG_SD, skip_count); + ADVANCE_STREAM(S_PHNE_SD, 3L * skip_count); + ADVANCE_STREAM(S_ABAL_SD, skip_count); + FAKE_V_STR(S_ADDR_LEN, S_ADDR_SD, skip_count); + FAKE_V_STR(S_CMNT_LEN, S_CMNT_SD, skip_count); + ADVANCE_STREAM(BBB_CMNT_SD, skip_count); + ADVANCE_STREAM(BBB_JNK_SD, skip_count); + ADVANCE_STREAM(BBB_OFFSET_SD, skip_count); + ADVANCE_STREAM(BBB_TYPE_SD, skip_count); /* avoid one trudge */ + + return(0L); +} diff --git a/text.c b/text.c new file mode 100644 index 0000000..ef4df3c --- /dev/null +++ b/text.c @@ -0,0 +1,313 @@ +/* @(#)text.c 2.1.8.1 */ +/* + * text.c --- pseaudo text generator for use in DBGEN 2.0 + * + * Defined Routines: + * dbg_text() -- select and translate a sentance form + */ + +#ifdef TEST +#define DECLARER +#endif /* TEST */ + +#include "config.h" +#include +#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ +/*#include +#include */ +#endif /* WIN32 */ +#include /* */ +#include +#include +#include +#include +#include +#include +#ifdef HP +#include +#endif +#if (defined(WIN32)&&!defined(_POSIX_)) +#include +#pragma warning(disable:4201) +#pragma warning(disable:4214) +#pragma warning(disable:4514) +#define WIN32_LEAN_AND_MEAN +#define NOATOM +#define NOGDICAPMASKS +#define NOMETAFILE +#define NOMINMAX +#define NOMSG +#define NOOPENFILE +#define NORASTEROPS +#define NOSCROLL +#define NOSOUND +#define NOSYSMETRICS +#define NOTEXTMETRIC +#define NOWH +#define NOCOMM +#define NOKANJI +#define NOMCX +#include +#pragma warning(default:4201) +#pragma warning(default:4214) +#endif + +#include "dss.h" +#include "dsstypes.h" + +/* + * txt_vp() -- + * generate a verb phrase by + * 1) selecting a verb phrase form + * 2) parsing it to select parts of speech + * 3) selecting appropriate words + * 4) adding punctuation as required + * + * Returns: length of generated phrase + * Called By: txt_sentence() + * Calls: pick_str() + */ +static int +txt_vp(char *dest, int sd) +{ + char syntax[MAX_GRAMMAR_LEN + 1], + *cptr, + *parse_target; + distribution *src; + int i, + res = 0; + + + pick_str(&vp, sd, &syntax[0]); + parse_target = syntax; + while ((cptr = strtok(parse_target, " ")) != NULL) + { + src = NULL; + switch(*cptr) + { + case 'D': + src = &adverbs; + break; + case 'V': + src = &verbs; + break; + case 'X': + src = &auxillaries; + break; + } /* end of POS switch statement */ + i = pick_str(src, sd, dest); + i = strlen(DIST_MEMBER(src, i)); + dest += i; + res += i; + if (*(++cptr)) /* miscelaneous fillagree, like punctuation */ + { + dest += 1; + res += 1; + *dest = *cptr; + } + *dest = ' '; + dest++; + res++; + parse_target = NULL; + } /* end of while loop */ + + return(res); +} + +/* + * txt_np() -- + * generate a noun phrase by + * 1) selecting a noun phrase form + * 2) parsing it to select parts of speech + * 3) selecting appropriate words + * 4) adding punctuation as required + * + * Returns: length of generated phrase + * Called By: txt_sentence() + * Calls: pick_str(), + */ +static int +txt_np(char *dest, int sd) +{ + char syntax[MAX_GRAMMAR_LEN + 1], + *cptr, + *parse_target; + distribution *src; + int i, + res = 0; + + + pick_str(&np, sd, &syntax[0]); + parse_target = syntax; + while ((cptr = strtok(parse_target, " ")) != NULL) + { + src = NULL; + switch(*cptr) + { + case 'A': + src = &articles; + break; + case 'J': + src = &adjectives; + break; + case 'D': + src = &adverbs; + break; + case 'N': + src = &nouns; + break; + } /* end of POS switch statement */ + i = pick_str(src, sd, dest); + i = strlen(DIST_MEMBER(src, i)); + dest += i; + res += i; + if (*(++cptr)) /* miscelaneous fillagree, like punctuation */ + { + *dest = *cptr; + dest += 1; + res += 1; + } + *dest = ' '; + dest++; + res++; + parse_target = NULL; + } /* end of while loop */ + + return(res); +} + +/* + * txt_sentence() -- + * generate a sentence by + * 1) selecting a sentence form + * 2) parsing it to select parts of speech or phrase types + * 3) selecting appropriate words + * 4) adding punctuation as required + * + * Returns: length of generated sentence + * Called By: dbg_text() + * Calls: pick_str(), txt_np(), txt_vp() + */ +static int +txt_sentence(char *dest, int sd) +{ + char syntax[MAX_GRAMMAR_LEN + 1], + *cptr; + int i, + res = 0, + len = 0; + + + pick_str(&grammar, sd, syntax); + cptr = syntax; + +next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */ + while (*cptr && *cptr == ' ') + cptr++; + if (*cptr == '\0') + goto done; + switch(*cptr) + { + case 'V': + len = txt_vp(dest, sd); + break; + case 'N': + len = txt_np(dest, sd); + break; + case 'P': + i = pick_str(&prepositions, sd, dest); + len = strlen(DIST_MEMBER(&prepositions, i)); + strcpy((dest + len), " the "); + len += 5; + len += txt_np(dest + len, sd); + break; + case 'T': + i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */ + len = strlen(DIST_MEMBER(&terminators, i)); + break; + } /* end of POS switch statement */ + dest += len; + res += len; + cptr++; + if (*cptr && *cptr != ' ') /* miscelaneous fillagree, like punctuation */ + { + dest += 1; + res += 1; + *dest = *cptr; + } + goto next_token; +done: + *dest = '\0'; + return(--res); +} + +/* + * dbg_text() -- + * produce ELIZA-like text of random, bounded length, truncating the last + * generated sentence as required + */ +int +dbg_text(char *tgt, int min, int max, int sd) +{ + long length = 0; + int wordlen = 0, + needed, + s_len; + char sentence[MAX_SENT_LEN + 1]; + + RANDOM(length, min, max, sd); + + while (wordlen < length) + { + s_len = txt_sentence(sentence, sd); + if ( s_len < 0) + INTERNAL_ERROR("Bad sentence formation"); + needed = length - wordlen; + if (needed >= s_len + 1) /* need the entire sentence */ + { + strcpy(tgt, sentence); + tgt += s_len; + wordlen += s_len + 1; + *(tgt++) = ' '; + } + else /* chop the new sentence off to match the length target */ + { + sentence[needed] = '\0'; + strcpy(tgt, sentence); + wordlen += needed; + tgt += needed; + } + } + *tgt = '\0'; + + return(wordlen); +} + +#ifdef TEST +tdef tdefs = { NULL }; + +main() +{ + char prattle[401]; + + read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns); + read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs); + read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives); + read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs); + read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries); + read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators); + read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles); + read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions); + read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar); + read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np); + read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp); + + while (1) + { + dbg_text(&prattle[0], 300, 400, 0); + printf("<%s>\n", prattle); + } + + return(0); +} +#endif /* TEST */ diff --git a/tpcd.h b/tpcd.h new file mode 100644 index 0000000..31b5f98 --- /dev/null +++ b/tpcd.h @@ -0,0 +1,103 @@ +/***************************************************************** + * Title: tpcd.h for TPC D + * Sccsid: @(#)tpcd.h 2.1.8.1 + * Description: + * X + * + ***************************************************************** + */ +#define DFLT 0x0001 +#define OUTPUT 0x0002 +#define EXPLAIN 0x0004 +#define DBASE 0x0008 +#define VERBOSE 0x0010 +#define TIMING 0x0020 +#define LOG 0x0040 +#define QUERY 0x0080 +#define REFRESH 0x0100 +#define ANSI 0x0200 +#define SEED 0x0400 +#define COMMENT 0x0800 +#define INIT 0x1000 +#define TERMINATE 0x2000 +#define DFLT_NUM 0x4000 + +/* + * general defines + */ +#define VTAG ':' /* flags a variable substitution */ +#define ofp stdout /* make the routine a filter */ +#define QDIR_TAG "DSS_QUERY" /* variable to point to queries */ +#define QDIR_DFLT "." /* and its default */ + +/* + * database portability defines + */ +#ifdef DB2 +#define GEN_QUERY_PLAN "SET CURRENT EXPLAIN SNAPSHOT ON;" +#define START_TRAN "" +#define END_TRAN "COMMIT WORK;" +#define SET_OUTPUT "" +#define SET_ROWCOUNT "--#SET ROWS_FETCH %d\n" +#define SET_DBASE "CONNECT TO %s ;\n" +#endif + +#ifdef INFORMIX +#define GEN_QUERY_PLAN "SET EXPLAIN ON;" +#define START_TRAN "BEGIN WORK;" +#define END_TRAN "COMMIT WORK;" +#define SET_OUTPUT "OUTPUT TO " +#define SET_ROWCOUNT "FIRST %d" +#define SET_DBASE "database %s ;\n" +#endif + +#ifdef SQLSERVER +#define GEN_QUERY_PLAN "set showplan on\nset noexec on\ngo\n" +#define START_TRAN "begin transaction\ngo\n" +#define END_TRAN "commit transaction\ngo\n" +#define SET_OUTPUT "" +#define SET_ROWCOUNT "set rowcount %d\ngo\n\n" +#define SET_DBASE "use %s\ngo\n" +#endif + +#ifdef SYBASE +#define GEN_QUERY_PLAN "set showplan on\nset noexec on\ngo\n" +#define START_TRAN "begin transaction\ngo\n" +#define END_TRAN "commit transaction\ngo\n" +#define SET_OUTPUT "" +#define SET_ROWCOUNT "set rowcount %d\ngo\n\n" +#define SET_DBASE "use %s\ngo\n" +#endif + +#ifdef TDAT +#define GEN_QUERY_PLAN "EXPLAIN" +#define START_TRAN "BEGIN TRANSACTION" +#define END_TRAN "END TRANSACTION" +#define SET_OUTPUT ".SET FORMAT OFF\n.EXPORT REPORT file=" +#define SET_ROWCOUNT ".SET RETCANCEL ON\n.SET RETLIMIT %d\n" +#define SET_DBASE ".LOGON %s\n" +#endif + +#define MAX_VARS 8 /* max number of host vars in any query */ +#define QLEN_MAX 2048 /* max length of any query */ +#define QUERIES_PER_SET 22 +#define MAX_PIDS 50 + +EXTERN int flags; +EXTERN int s_cnt; +EXTERN char *osuff; +EXTERN int stream; +EXTERN char *lfile; +EXTERN char *ifile; +EXTERN char *tfile; + +#define MAX_PERMUTE 41 +#ifdef DECLARER +int rowcnt_dflt[QUERIES_PER_SET + 1] = + {-1,-1,100,10,-1,-1,-1,-1,-1,-1,20,-1,-1,-1,-1,-1,-1,-1,100,-1,-1,100,-1}; +int rowcnt; +#define SEQUENCE(stream, query) permutation[stream % MAX_PERMUTE][query - 1] +#else +extern int rowcnt_dflt[]; +extern int rowcnt; +#endif diff --git a/varsub.c b/varsub.c new file mode 100644 index 0000000..36adf91 --- /dev/null +++ b/varsub.c @@ -0,0 +1,314 @@ +/* Sccsid: @(#)varsub.c 2.1.8.3 */ +#include +#ifndef _POSIX_SOURCE +#include +#endif /* POSIX_SOURCE */ +#if (defined(_POSIX_)||!defined(WIN32)) +#ifndef DOS +#include +#endif +#endif /* WIN32 */ +#include +#include "config.h" +#include "dss.h" +#include "tpcd.h" +#ifdef ADHOC +#include "adhoc.h" +extern adhoc_t adhocs[]; +#endif /* ADHOC */ + +#define MAX_PARAM 10 /* maximum number of parameter substitutions in a query */ + +extern long Seed[]; +extern char **asc_date; +extern double flt_scale; +extern distribution q13a, q13b; +long *permute(long *set, int cnt, long stream); + +long brands[25] = {11,12,13,14,15,21,22,23,24,25,31,32,33,34,35, + 41,42,43,44,45,51,52,53,54,55}; +long sizes[50] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20, + 21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50}; +long ccode[25] = {10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34}; +char *defaults[24][11] = +{ + {"90", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 1 */ + {"15", "BRASS", "EUROPE", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 2 */ + {"BUILDING", "1995-03-15", NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 3 */ + {"1993-07-01", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 4 */ + {"ASIA", "1994-01-01", NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 5 */ + {"1994-01-01", ".06", "24", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 6 */ + {"FRANCE", "GERMANY", NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 7 */ + {"BRAZIL", "AMERICA", "ECONOMY ANODIZED STEEL", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},/* 8 */ + {"green", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 9 */ + {"1993-10-01", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 10 */ + {"GERMANY", "0.0001", NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 11 */ + {"MAIL", "SHIP", "1994-01-01", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 12 */ + {"special", "requests", NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 13 */ + {"1995-09-01", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 14 */ + {"1996-01-01", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 15 */ + {"Brand#45", "MEDIUM POLISHED", "49", + "14","23","45","19","3","36","9", NULL}, /* 16 */ + {"Brand#23", "MED BOX", NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 17 */ + {"300", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 18 */ + {"Brand#12", "Brand#23", "Brand#34", "1", "10", "20", NULL, NULL, NULL, NULL, NULL}, /* 19 */ + {"forest", "1994-01-01", "CANADA", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 20 */ + {"SAUDI ARABIA", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 21 */ + {"13","31","23", "29", "30", "18", "17", NULL, NULL, NULL, NULL}, /* 22 */ + {NULL,NULL,NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* UF1 */ + {NULL,NULL,NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* UF2 */ +}; +void +varsub(int qnum, int vnum, int flags) +{ + static char param[11][128]; + static FILE *lfp = NULL; + long *lptr; + char *ptr; + int i = 0, + tmp_date; + long tmp1, + tmp2; + + if (vnum == 0) + { + if ((flags & DFLT) == 0) + { + switch(qnum) + { + case 1: + sprintf(param[1], "%d", UnifInt((long)60,(long)120,(long)qnum)); + param[2][0] = '\0'; + break; + case 2: + sprintf(param[1], "%d", + UnifInt((long)P_SIZE_MIN, (long)P_SIZE_MAX, qnum)); + pick_str(&p_types_set, qnum, param[3]); + ptr = param[3] + strlen(param[3]); + while (*(ptr - 1) != ' ') ptr--; + strcpy(param[2], ptr); + pick_str(®ions, qnum, param[3]); + param[4][0] = '\0'; + break; + case 3: + pick_str(&c_mseg_set, qnum, param[1]); + /* + * pick a random offset within the month of march and add the + * appropriate magic numbers to position the output functions + * at the start of March '95 + */ + tmp_date = UnifInt((long)0, (long)30, (long)qnum); + strcpy(param[2], *(asc_date + tmp_date + 1155)); + param[3][0] = '\0'; + break; + case 4: + tmp_date = UnifInt(1,58,qnum); + sprintf(param[1],"19%02d-%02d-01", + 93 + tmp_date/12, tmp_date%12 + 1); + param[2][0] = '\0'; + break; + case 5: + pick_str(®ions, qnum, param[1]); + tmp_date = UnifInt((long)93,(long)97,(long)qnum); + sprintf(param[2], "19%d-01-01", tmp_date); + param[3][0] = '\0'; + break; + case 6: + tmp_date = UnifInt(93,97,qnum); + sprintf(param[1], "19%d-01-01", tmp_date); + sprintf(param[2], "0.0%d", UnifInt(2, 9, qnum)); + sprintf(param[3], "%d", UnifInt((long)24, (long)25, (long)qnum)); + param[4][0] = '\0'; + break; + case 7: + tmp_date = pick_str(&nations2, qnum, param[1]); + while (pick_str(&nations2, qnum, param[2]) == tmp_date); + param[3][0] = '\0'; + break; + case 8: + tmp_date = pick_str(&nations2, qnum, param[1]); + tmp_date = nations.list[tmp_date].weight; + strcpy(param[2], regions.list[tmp_date].text); + pick_str(&p_types_set, qnum, param[3]); + param[4][0] = '\0'; + break; + case 9: + pick_str(&colors, qnum, param[1]); + param[2][0] = '\0'; + break; + case 10: + tmp_date = UnifInt(1,24,qnum); + sprintf(param[1],"19%02d-%02d-01", + 93 + tmp_date/12, tmp_date%12 + 1); + param[2][0] = '\0'; + break; + case 11: + pick_str(&nations2, qnum, param[1]); + sprintf(param[2], "%11.10f", Q11_FRACTION / flt_scale ); + param[3][0] = '\0'; + break; + case 12: + tmp_date = pick_str(&l_smode_set, qnum, param[1]); + while (tmp_date == pick_str(&l_smode_set, qnum, param[2])); + tmp_date = UnifInt(93,97,qnum); + sprintf(param[3], "19%d-01-01", tmp_date); + param[4][0] = '\0'; + break; + case 13: + pick_str(&q13a, qnum, param[1]); + pick_str(&q13b, qnum, param[2]); + param[3][0] = '\0'; + break; + case 14: + tmp_date = UnifInt(1,60,qnum); + sprintf(param[1],"19%02d-%02d-01", + 93 + tmp_date/12, tmp_date%12 + 1); + param[2][0] = '\0'; + break; + case 15: + tmp_date = UnifInt(1,58,qnum); + sprintf(param[1],"19%02d-%02d-01", + 93 + tmp_date/12, tmp_date%12 + 1); + param[2][0] = '\0'; + break; + case 16: + tmp1 = UnifInt(1, 5, qnum); + tmp2 = UnifInt(1, 5, qnum); + sprintf(param[1], "Brand#%d%d", tmp1, tmp2); + pick_str(&p_types_set, qnum, param[2]); + ptr = param[2] + strlen(param[2]); + while (*(--ptr) != ' '); + *ptr = '\0'; + lptr = &sizes[0]; + for (i=3; i <= MAX_PARAM; i++) + { + sprintf(param[i], "%ld", *permute(lptr,50,qnum) + 1); + lptr = (long *)NULL; + } + break; + case 17: + tmp1 = UnifInt(1, 5, qnum); + tmp2 = UnifInt(1, 5, qnum); + sprintf(param[1], "Brand#%d%d", tmp1, tmp2); + pick_str(&p_cntr_set, qnum, param[2]); + param[3][0] = '\0'; + break; + case 18: + sprintf(param[1], "%ld", UnifInt(312, 315, qnum)); + param[2][0] = '\0'; + break; + case 19: + tmp1 = UnifInt(1, 5, qnum); + tmp2 = UnifInt(1, 5, qnum); + sprintf(param[1], "Brand#%d%d", tmp1, tmp2); + tmp1 = UnifInt(1, 5, qnum); + tmp2 = UnifInt(1, 5, qnum); + sprintf(param[2], "Brand#%d%d", tmp1, tmp2); + tmp1 = UnifInt(1, 5, qnum); + tmp2 = UnifInt(1, 5, qnum); + sprintf(param[3], "Brand#%d%d", tmp1, tmp2); + sprintf(param[4], "%ld", UnifInt(1, 10, qnum)); + sprintf(param[5], "%ld", UnifInt(10, 20, qnum)); + sprintf(param[6], "%ld", UnifInt(20, 30, qnum)); + param[7][0] = '\0'; + break; + case 20: + pick_str(&colors, qnum, param[1]); + tmp_date = UnifInt(93,97,qnum); + sprintf(param[2], "19%d-01-01", tmp_date); + pick_str(&nations2, qnum, param[3]); + param[4][0] = '\0'; + break; + case 21: + pick_str(&nations2, qnum, param[1]); + param[2][0] = '\0'; + break; + case 22: + lptr = &ccode[0]; + for (i=0; i <= 7; i++) + { + sprintf(param[i+1], "%ld", 10 + *permute(lptr,25, qnum)); + lptr = (long *)NULL; + } + param[8][0] = '\0'; + break; + case 23: + case 24: + break; + default: + fprintf(stderr, + "No variable definitions available for query %d\n", + qnum); + return; + } + } + + if (flags & LOG) + { + if (lfp == NULL) + { + lfp = fopen(lfile, "a"); + OPEN_CHECK(lfp, lfile); + } + fprintf(lfp, "%d", qnum); + for (i=1; i <= 10; i++) + if (flags & DFLT) + { + if (defaults[qnum - 1][i - 1] == NULL) + break; + else + fprintf(lfp, "\t%s", defaults[qnum - 1][i - 1]); + } + else + { + if (param[i][0] == '\0') + break; + else + fprintf(lfp, "\t%s", param[i]); + } + fprintf(lfp, "\n"); + } + } + else + { + if (flags & DFLT) + { + /* to allow -d to work at all scale factors */ + if (qnum == 11 && vnum == 2) + fprintf(ofp, "%11.10f", Q11_FRACTION/flt_scale); + else + if (defaults[qnum - 1][vnum - 1]) + fprintf(ofp, "%s", defaults[qnum - 1][vnum - 1]); + else + fprintf(stderr, + "Bad default request (q: %d, p: %d)\n", + qnum, vnum); + } + else + { + if (param[vnum] && vnum <= MAX_PARAM) + fprintf(ofp, "%s", param[vnum]); + else + fprintf(stderr, "Bad parameter request (q: %d, p: %d)\n", + qnum, vnum); + } + } + return; +}