Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve vCenter refresh stability in tests by using runWithRetry #740

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 48 additions & 8 deletions govcd/tm_common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ package govcd
import (
"fmt"
"net/url"
"regexp"
"time"

"github.com/vmware/go-vcloud-director/v3/types/v56"
"github.com/vmware/go-vcloud-director/v3/util"
. "gopkg.in/check.v1"
)

Expand Down Expand Up @@ -56,19 +58,19 @@ func getOrCreateVCenter(vcd *TestVCD, check *C) (*VCenter, func()) {
printVerbose("# Waiting for listener status to become 'CONNECTED'\n")
err = waitForListenerStatusConnected(vc)
check.Assert(err, IsNil)
printVerbose("# Sleeping after vCenter is 'CONNECTED'\n")
time.Sleep(4 * time.Second) // TODO: TM: Re-evaluate need for sleep
// Refresh connected vCenter to be sure that all artifacts are loaded
printVerbose("# Refreshing vCenter %s\n", vc.VSphereVCenter.Url)
err = vc.RefreshVcenter()

// Sometimes the refresh fails with one of 'vCenterEntityBusyRegexp' errors
printVerbose("# Attempting vCenter refresh %s\n", vc.VSphereVCenter.Url)
err = runWithRetry(vc.RefreshVcenter, vCenterEntityBusyRegexp, maximumVcenterRetryTime)
check.Assert(err, IsNil)

printVerbose("# Refreshing Storage Profiles in vCenter %s\n", vc.VSphereVCenter.Url)
err = vc.RefreshStorageProfiles()
// Refresh storage policies
printVerbose("# Attempting storage profile refresh %s\n", vc.VSphereVCenter.Url)
err = runWithRetry(vc.RefreshStorageProfiles, vCenterEntityBusyRegexp, maximumVcenterRetryTime)
check.Assert(err, IsNil)

printVerbose("# Sleeping after vCenter refreshes\n")
time.Sleep(1 * time.Minute) // TODO: TM: Re-evaluate need for sleep
time.Sleep(10 * time.Second) // TODO: TM: Re-evaluate need for sleep
vCenterCreated := true

return vc, func() {
Expand Down Expand Up @@ -369,3 +371,41 @@ func createTmIpSpace(vcd *TestVCD, region *Region, check *C, nameSuffix, octet3
check.Assert(err, IsNil)
}
}

// vCenter task is sometimes unreliable and trying to refresh it immediately after it becomes
// connected causes a "BUSY_ENTITY" error (which has a few different messages)
var maximumVcenterRetryTime = 120 * time.Second // The maximum time a single operation will be retried before giving up
var vCenterEntityBusyRegexp = regexp.MustCompile(`(is currently busy|400|BUSY_ENTITY)`) // Regexp to match entity busy error
lvirbalas marked this conversation as resolved.
Show resolved Hide resolved

func runWithRetry(runOperation func() error, errRegexp *regexp.Regexp, duration time.Duration) error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We decided to pull this logic out of tests, and out of Terraform provider level https://github.com/vmware/terraform-provider-vcfa/pull/11/files#diff-f4455ddd79cb9237ff3f5c05d6d2c74431b01fa40dd231c43adcc48a00e419c0
We'll move to SDK level instead.

startTime := time.Now()
endTime := startTime.Add(duration)
util.Logger.Printf("[DEBUG] runWithRetry - running with retry for %f seconds if error contains '%s' ", duration.Seconds(), errRegexp)
count := 1
for {
err := runOperation()
util.Logger.Printf("[DEBUG] runWithRetry - ran attempt %d, got error: %s ", count, err)
// Operation had no error - it succeeded
if err == nil {
util.Logger.Printf("[DEBUG] runWithRetry - no error occurred after attempt %d, got error: %s ", count, err)
return nil
}
// If there is an error, but it doesn't contain the retryIfErrContains value - exit it
if !errRegexp.MatchString(err.Error()) {
util.Logger.Printf("[DEBUG] runWithRetry - returning error after attempt %d, got error: %s ", count, err)
return err
}

// If time limit is exceeded - return error containing statistics and original error
if time.Now().After(endTime) {
util.Logger.Printf("[DEBUG] runWithRetry - exceeded time after attempt %d, got error: %s ", count, err)
return fmt.Errorf("error attempting to wait until error does not contain '%s' after %f seconds: %s", errRegexp, duration.Seconds(), err)
}

// Sleep and continue
util.Logger.Printf("[DEBUG] runWithRetry - sleeping after attempt %d, will retry", count)
// Sleep 2 seconds and attempt once more if the timeout is not excdeeded
time.Sleep(2 * time.Second)
count++
}
}
7 changes: 6 additions & 1 deletion govcd/vsphere_vcenter_tm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ func (vcd *TestVCD) Test_VCenter(check *C) {
err = waitForListenerStatusConnected(v)
check.Assert(err, IsNil)

err = v.RefreshVcenter()
// Sometimes the refresh fails with one of 'vCenterEntityBusyRegexp' errors
err = runWithRetry(v.RefreshVcenter, vCenterEntityBusyRegexp, maximumVcenterRetryTime)
check.Assert(err, IsNil)

// Refresh storage policies
err = runWithRetry(v.RefreshStorageProfiles, vCenterEntityBusyRegexp, maximumVcenterRetryTime)
check.Assert(err, IsNil)

// Get By Name
Expand Down
Loading