diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index a5b452da83b2..a50707a1b89c 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -313,20 +313,17 @@ pub struct MetadataHealthUpdateRequest { pub struct MetadataHealthUpdateResponse {} #[derive(Serialize, Deserialize, Debug)] - pub struct MetadataHealthListUnhealthyResponse { pub unhealthy_tenant_shards: Vec, } #[derive(Serialize, Deserialize, Debug)] - pub struct MetadataHealthListOutdatedRequest { #[serde(with = "humantime_serde")] pub not_scrubbed_for: Duration, } #[derive(Serialize, Deserialize, Debug)] - pub struct MetadataHealthListOutdatedResponse { pub health_records: Vec, } diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index e8513b31eb96..e755aaed195b 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -500,7 +500,7 @@ async fn handle_node_configure(mut req: Request) -> Result, StatusCode::OK, state .service - .node_configure( + .external_node_configure( config_req.node_id, config_req.availability.map(NodeAvailability::from), config_req.scheduling, diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index ef4cd91efddd..d717924ae6ea 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -4912,6 +4912,26 @@ impl Service { Ok(()) } + /// Wrapper around [`Self::node_configure`] which only allows changes while there is no ongoing + /// operation for HTTP api. + pub(crate) async fn external_node_configure( + &self, + node_id: NodeId, + availability: Option, + scheduling: Option, + ) -> Result<(), ApiError> { + { + let locked = self.inner.read().unwrap(); + if let Some(op) = locked.ongoing_operation.as_ref().map(|op| op.operation) { + return Err(ApiError::PreconditionFailed( + format!("Ongoing background operation forbids configuring: {op}").into(), + )); + } + } + + self.node_configure(node_id, availability, scheduling).await + } + pub(crate) async fn start_node_drain( self: &Arc, node_id: NodeId, @@ -5017,14 +5037,14 @@ impl Service { } pub(crate) async fn cancel_node_drain(&self, node_id: NodeId) -> Result<(), ApiError> { - let (node_available, node_policy) = { + let node_available = { let locked = self.inner.read().unwrap(); let nodes = &locked.nodes; let node = nodes.get(&node_id).ok_or(ApiError::NotFound( anyhow::anyhow!("Node {} not registered", node_id).into(), ))?; - (node.is_available(), node.get_scheduling()) + node.is_available() }; if !node_available { @@ -5033,12 +5053,6 @@ impl Service { )); } - if !matches!(node_policy, NodeSchedulingPolicy::Draining) { - return Err(ApiError::PreconditionFailed( - format!("Node {node_id} has no drain in progress").into(), - )); - } - if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() { if let Operation::Drain(drain) = op_handler.operation { if drain.node_id == node_id { @@ -5152,14 +5166,14 @@ impl Service { } pub(crate) async fn cancel_node_fill(&self, node_id: NodeId) -> Result<(), ApiError> { - let (node_available, node_policy) = { + let node_available = { let locked = self.inner.read().unwrap(); let nodes = &locked.nodes; let node = nodes.get(&node_id).ok_or(ApiError::NotFound( anyhow::anyhow!("Node {} not registered", node_id).into(), ))?; - (node.is_available(), node.get_scheduling()) + node.is_available() }; if !node_available { @@ -5168,12 +5182,6 @@ impl Service { )); } - if !matches!(node_policy, NodeSchedulingPolicy::Filling) { - return Err(ApiError::PreconditionFailed( - format!("Node {node_id} has no fill in progress").into(), - )); - } - if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() { if let Operation::Fill(fill) = op_handler.operation { if fill.node_id == node_id { @@ -5982,7 +5990,7 @@ impl Service { .await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT) .await; - failpoint_support::sleep_millis_async!("sleepy-drain-loop"); + failpoint_support::sleep_millis_async!("sleepy-drain-loop", &cancel); } while !waiters.is_empty() { diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 9b2557a165ec..7d98ff29234e 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -2091,3 +2091,47 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder): ) == 0 ) + + +def test_storage_controller_ps_restarted_during_drain(neon_env_builder: NeonEnvBuilder): + # single unsharded tenant, two locations + neon_env_builder.num_pageservers = 2 + + env = neon_env_builder.init_start() + + env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 1}}) + env.storage_controller.reconcile_until_idle() + + attached_id = int(env.storage_controller.locate(env.initial_tenant)[0]["node_id"]) + attached = next((ps for ps in env.pageservers if ps.id == attached_id)) + + def attached_is_draining(): + details = env.storage_controller.node_status(attached.id) + assert details["scheduling"] == "Draining" + + env.storage_controller.configure_failpoints(("sleepy-drain-loop", "return(10000)")) + env.storage_controller.node_drain(attached.id) + + wait_until(10, 0.5, attached_is_draining) + + attached.restart() + + # we are unable to reconfigure node while the operation is still ongoing + with pytest.raises( + StorageControllerApiException, + match="Precondition failed: Ongoing background operation forbids configuring: drain.*", + ): + env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"}) + with pytest.raises( + StorageControllerApiException, + match="Precondition failed: Ongoing background operation forbids configuring: drain.*", + ): + env.storage_controller.node_configure(attached.id, {"availability": "Offline"}) + + env.storage_controller.cancel_node_drain(attached.id) + + def reconfigure_node_again(): + env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"}) + + # allow for small delay between actually having cancelled and being able reconfigure again + wait_until(4, 0.5, reconfigure_node_again)