MR23 - [TDVT] Helper script for parsing expecteds

- by @tabsdavis
tableau · Dec 15, 2023 · f5da9a7 · f5da9a7
1 parent 4e8c267
commit f5da9a7
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 2 deletions.
diff --git a/tdvt/CHANGELOG.md b/tdvt/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [2.9.7] - 2023-08-28
+- Added formatting for generated expected files.
+
 ## [2.9.6] - 2023-08-28
 - Address Spark SQL test issue
 

diff --git a/tdvt/tdvt/tdvt_core.py b/tdvt/tdvt/tdvt_core.py
@@ -338,7 +338,28 @@ def try_move(srcfile, destfile):
             time.sleep(0.05)
 
 
+def try_clean_and_move(srcfile, destfile):
+    file_xml = get_cleaned_results(srcfile)
 
+    with open(destfile, 'w') as file2:
+        file2.write(file_xml)
+
+    os.remove(srcfile)
+
+
+def get_cleaned_results(srcfile):
+    file_xml = ''
+    try:
+        with open(srcfile, 'r') as file1:
+            file_xml = file1.read()
+            file_xml = re.sub(r"<sql>[\S\s]*?<\/sql>\s*\n    ", '', file_xml)
+            file_xml = re.sub(r"<query-time>[\S\s]*?<\/query-time>\s*\n    ", '', file_xml)
+
+    except FileNotFoundError:
+        msg = "The file " + srcfile + "does not exist."
+        print(msg)
+
+    return file_xml
 
 
 def save_results_diff(actual_file, diff_file, expected_file, diff_string):
@@ -406,7 +427,7 @@ def compare_results(test_name, test_file, full_test_file, work):
                 logging.warning("No actual file found, generating and moving expected file.")
                 logging.debug(
                     work.get_thread_msg() + "Copying actual [{}] to expected [{}]".format(actual_file, expected_file))
-                try_move(actual_file, expected_file)
+                try_clean_and_move(actual_file, expected_file)
             result.error_status = TestErrorMissingActual()
             return result
         # Try other possible expected files. These are numbered like 'expected.setup.math.1.txt', 'expected.setup.math.2.txt' etc.
@@ -444,7 +465,7 @@ def compare_results(test_name, test_file, full_test_file, work):
                                                                                               base_test_file,
                                                                                               test_config.output_dir)
         logging.debug(work.get_thread_msg() + "Copying actual [{}] to expected [{}]".format(actual_file, next_path))
-        try_move(actual_file, next_path)
+        try_clean_and_move(actual_file, next_path)
     # This will re-diff the results against the best expected file to ensure the test pass indicator and diff count is correct.
     diff_count, diff_string = result.diff_test_results(result.best_matching_expected_results)
     save_results_diff(actual_file, actual_diff_file, result.path_to_expected, diff_string)

diff --git a/tdvt/test/tdvt_test.py b/tdvt/test/tdvt_test.py
@@ -36,6 +36,7 @@
 
 from tdvt.config_gen.test_creator import TestCreator
 from tdvt.setup_env import updated_tds_as_str
+from tdvt.tdvt_core import get_cleaned_results
 
 
 class DiffTest(unittest.TestCase):
@@ -979,6 +980,12 @@ def check_errors(self, expected_message, expected_state, mock_batch, error_count
                             "Expected [{0}] got [{1}]".format(expected_message, actual_message))
             self.assertIsInstance(mock_batch.results[test_file].error_status, expected_state)
 
+    def test_get_cleaned_results(self):
+        srcfile = TEST_DIRECTORY + '/exprtests/expected.setup.agg.avg.txt'
+        cleaned_results = get_cleaned_results(srcfile)
+        self.assertIn("AVG([int0])'>\n    <table>", cleaned_results)
+        self.assertIn("AVG([num4])'>\n    <table>", cleaned_results)
+
 
 class TabQueryPathTest(unittest.TestCase):
     def test_init(self):

diff --git a/tdvt/test/tool_test/exprtests/expected.setup.agg.avg.txt b/tdvt/test/tool_test/exprtests/expected.setup.agg.avg.txt
@@ -0,0 +1,30 @@
+<results>
+  <test name='AVG([int0])'>
+    <sql>
+      SELECT AVG(&quot;Calcs&quot;.&quot;int0&quot;) AS &quot;TEMP(Test)(3952218057)(0)&quot;&#10;FROM &quot;Calcs&quot;&#10;HAVING (COUNT(1) &gt; 0)&#10;    </sql>
+    <query-time>
+      308.000&#10;    </query-time>
+    <table>
+      <schema>
+        <column>[cast_calcs.postgres].[TEMP(Test)(3952218057)(0)]</column>
+      </schema>
+      <tuple>
+        <value>6.1818182</value>
+      </tuple>
+    </table>
+  </test>
+  <test name='AVG([num4])'>
+    <sql>
+      SELECT AVG(&quot;Calcs&quot;.&quot;num4&quot;) AS &quot;TEMP(Test)(1371989636)(0)&quot;&#10;FROM &quot;Calcs&quot;&#10;HAVING (COUNT(1) &gt; 0)&#10;    </sql>
+    <query-time>
+      232.000&#10;    </query-time>
+    <table>
+      <schema>
+        <column>[cast_calcs.postgres].[TEMP(Test)(1371989636)(0)]</column>
+      </schema>
+      <tuple>
+        <value>2.0016667</value>
+      </tuple>
+    </table>
+  </test>
+</results>