Skip to content

Commit

Permalink
Fix cases of syncing different SHAs back to back
Browse files Browse the repository at this point in the history
Prior to this, it would fail if the 2nd SHA wasn't in the local repo.
Now it doesn't care what the local SHA for rev is, it only cares what is
checked out at HEAD.

Also deref tags on ls-remote

The short story: `ls-remote` for a tag gets us the SHA of the tag, but
`rev-parse HEAD` gets us the SHA of the commit to which that tag is
attached.  Those are never equal, so we detect "update needed" every
loop.

Now we ask `ls-remote` for the rev and the dereferenced rev.  If that
rev is a branch, the deref does nothing.  If that rev is a tag it
produces both results.  ls-remote does its own sort, so the deref (if
found) comes after the non-deref.  This means that, in both cases, the
last line is the one we want.
  • Loading branch information
thockin committed Feb 11, 2023
1 parent 39ab896 commit 3eb34e0
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 21 deletions.
91 changes: 70 additions & 21 deletions cmd/git-sync/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -747,23 +747,28 @@ func cleanupWorkTree(ctx context.Context, gitRoot, worktree string) error {
func addWorktreeAndSwap(ctx context.Context, repo, gitRoot, dest, branch, rev string, depth int, hash string, submoduleMode string) error {
log.V(0).Info("syncing git", "rev", rev, "hash", hash)

args := []string{"fetch", "-f", "--tags"}
if depth != 0 {
args = append(args, "--depth", strconv.Itoa(depth))
}
args = append(args, repo, branch)
// If we don't have this hash, we need to fetch it.
if _, err := revIsHash(ctx, hash, gitRoot); err != nil {
log.V(2).Info("can't resolve commit, will try fetch", "rev", rev, "hash", hash)

// Update from the remote.
if _, err := cmdRunner.Run(ctx, gitRoot, nil, *flGitCmd, args...); err != nil {
return err
}
args := []string{"fetch", "-f", "--tags"}
if depth != 0 {
args = append(args, "--depth", strconv.Itoa(depth))
}
args = append(args, repo, branch)

// With shallow fetches, it's possible to race with the upstream repo and
// end up NOT fetching the hash we wanted. If we can't resolve that hash
// to a commit we can just end early and leave it for the next sync period.
if _, err := revIsHash(ctx, hash, gitRoot); err != nil {
log.Error(err, "can't resolve commit, will retry", "rev", rev, "hash", hash)
return nil
// Update from the remote.
if _, err := cmdRunner.Run(ctx, gitRoot, nil, *flGitCmd, args...); err != nil {
return err
}

// With shallow fetches, it's possible to race with the upstream repo and
// end up NOT fetching the hash we wanted. If we can't resolve that hash
// to a commit we can just end early and leave it for the next sync period.
if _, err := revIsHash(ctx, hash, gitRoot); err != nil {
log.Error(err, "can't resolve commit, will retry", "rev", rev, "hash", hash)
return nil
}
}

// Make a worktree for this exact git hash.
Expand Down Expand Up @@ -1002,19 +1007,44 @@ func localHashForRev(ctx context.Context, rev, gitRoot string) (string, error) {
if err != nil {
return "", err
}
result := strings.Trim(string(output), "\n")
if result == rev {
// It appears to be a SHA, so we need to verify that we have it. We
// don't care what cat-file says, just whether it succeeds or fails.
_, err := cmdRunner.Run(ctx, gitRoot, nil, *flGitCmd, "cat-file", "-t", rev)
if err != nil {
// Indicate that we do not have a local hash for this hash.
return "", err
}
}
return strings.Trim(string(output), "\n"), nil
}

// remoteHashForRef returns the upstream hash for a given ref.
func remoteHashForRef(ctx context.Context, repo, ref, gitRoot string) (string, error) {
output, err := cmdRunner.Run(ctx, gitRoot, nil, *flGitCmd, "ls-remote", "-q", repo, ref)
// Fetch both the bare and dereferenced rev. git sorts the results and
// prints the dereferenced result, if present, after the bare result, so we
// always want the last result it produces.
output, err := cmdRunner.Run(ctx, gitRoot, nil, *flGitCmd, "ls-remote", "-q", repo, ref, ref+"^{}")
if err != nil {
return "", err
}
parts := strings.Split(string(output), "\t")
lines := strings.Split(string(output), "\n") // guaranteed to have at least 1 element
line := lastNonEmpty(lines)
parts := strings.Split(line, "\t") // guaranteed to have at least 1 element
return parts[0], nil
}

func lastNonEmpty(lines []string) string {
last := ""
for _, line := range lines {
if line != "" {
last = line
}
}
return last
}

func revIsHash(ctx context.Context, rev, gitRoot string) (bool, error) {
// If git doesn't identify rev as a commit, we're done.
output, err := cmdRunner.Run(ctx, gitRoot, nil, *flGitCmd, "cat-file", "-t", rev)
Expand Down Expand Up @@ -1075,15 +1105,20 @@ func syncRepo(ctx context.Context, repo, branch, rev string, depth int, gitRoot,
return true, hash, addWorktreeAndSwap(ctx, repo, gitRoot, dest, branch, rev, depth, hash, submoduleMode)
}

// getRevs returns the local and upstream hashes for rev.
// getRevs returns the current HEAD and upstream hash for rev. Normally the
// current HEAD is a previous or current version of rev, but if the app was
// started with one rev and then restarted with a different one, HEAD could be
// anything.
func getRevs(ctx context.Context, repo, localDir, branch, rev string) (string, string, error) {
// Ask git what the exact hash is for rev.
local, err := localHashForRev(ctx, rev, localDir)
// Find the currently synced HEAD.
local, err := localHashForRev(ctx, "HEAD", localDir)
if err != nil {
return "", "", err
}

// Build a ref string, depending on whether the user asked to track HEAD or a tag.
// Build a ref string, depending on whether the user asked to track HEAD or
// a tag. We can't really tell if it is a SHA yet, so we will catch that
// case later.
ref := ""
if rev == "HEAD" {
ref = "refs/heads/" + branch
Expand All @@ -1097,6 +1132,20 @@ func getRevs(ctx context.Context, repo, localDir, branch, rev string) (string, s
return "", "", err
}

// If we couldn't find a remote hash, it might have been a SHA literal.
if remote == "" {
// If git thinks it tastes like a SHA, we just return that and if it
// is wrong, we will fail later.
output, err := cmdRunner.Run(ctx, localDir, nil, *flGitCmd, "rev-parse", rev)
if err != nil {
return "", "", err
}
result := strings.Trim(string(output), "\n")
if result == rev {
remote = rev
}
}

return local, remote, nil
}

Expand Down
51 changes: 51 additions & 0 deletions test_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,57 @@ function e2e::sync_sha_once() {
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
#
##############################################
# Test sha-HEAD-sha syncing
##############################################
function e2e::sync_sha_head_sha() {
# First sync
echo "$FUNCNAME 1" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME 1"
REV=$(git -C "$REPO" rev-list -n1 HEAD)
echo "$FUNCNAME 2" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME 2"

# SHA
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev="$REV" \
--root="$ROOT" \
--dest="link" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME 1"

# HEAD
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev="HEAD" \
--root="$ROOT" \
--dest="link" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME 2"

# SHA
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev="$REV" \
--root="$ROOT" \
--dest="link" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME 1"
}

##############################################
# Test syncing after a crash
Expand Down

0 comments on commit 3eb34e0

Please sign in to comment.