From e5c89f3da3bdb71d35e70a488becf99bf82158ec Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Mon, 18 Nov 2024 14:27:52 -0500
Subject: [PATCH] feat(pageserver): drop disposable keys during gc-compaction
 (#9765)

close https://github.com/neondatabase/neon/issues/9552, close
https://github.com/neondatabase/neon/issues/8920, part of
https://github.com/neondatabase/neon/issues/9114

## Summary of changes

* Drop keys not belonging to this shard during gc-compaction to avoid
constructing history that might have been truncated during shard
compaction.
* Run gc-compaction at the end of shard compaction test.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant/timeline/compaction.rs |  8 +++++++
 test_runner/regress/test_compaction.py       | 24 ++++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index e6ef1aae2b7b..b30e380de592 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -2021,6 +2021,14 @@ impl Timeline {
             if cancel.is_cancelled() {
                 return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
             }
+            if self.shard_identity.is_key_disposable(&key) {
+                // If this shard does not need to store this key, simply skip it.
+                //
+                // This is not handled in the filter iterator because shard is determined by hash.
+                // Therefore, it does not give us any performance benefit to do things like skip
+                // a whole layer file as handling key spaces (ranges).
+                continue;
+            }
             if !job_desc.compaction_key_range.contains(&key) {
                 if !desc.is_delta {
                     continue;
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index 370df3c3792e..a02d0f6b988a 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -122,10 +122,19 @@ def test_pageserver_compaction_smoke(neon_env_builder: NeonEnvBuilder):
 
 
 @pytest.mark.parametrize(
-    "shard_count,stripe_size", [(None, None), (4, TINY_STRIPES), (4, LARGE_STRIPES)]
+    "shard_count,stripe_size,gc_compaction",
+    [
+        (None, None, False),
+        (4, TINY_STRIPES, False),
+        (4, LARGE_STRIPES, False),
+        (4, LARGE_STRIPES, True),
+    ],
 )
 def test_sharding_compaction(
-    neon_env_builder: NeonEnvBuilder, stripe_size: int, shard_count: Optional[int]
+    neon_env_builder: NeonEnvBuilder,
+    stripe_size: int,
+    shard_count: Optional[int],
+    gc_compaction: bool,
 ):
     """
     Use small stripes, small layers, and small compaction thresholds to exercise how compaction
@@ -217,6 +226,17 @@ def test_sharding_compaction(
     # Assert that everything is still readable
     workload.validate()
 
+    if gc_compaction:
+        # trigger gc compaction to get more coverage for that, piggyback on the existing workload
+        for shard in env.storage_controller.locate(tenant_id):
+            pageserver = env.get_pageserver(shard["node_id"])
+            tenant_shard_id = shard["shard_id"]
+            pageserver.http_client().timeline_compact(
+                tenant_shard_id,
+                timeline_id,
+                enhanced_gc_bottom_most_compaction=True,
+            )
+
 
 class CompactionAlgorithm(str, enum.Enum):
     LEGACY = "legacy"