Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-27 09:39:11 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-27 13:50:26 +0300
commit1ffa31f8b5f0dfe7f3e75762a092617e918ec99e (patch)
treee40b3da5186aece4bf2a4aa58325ef5e4e9c227f
parent2f772164e075c2fc9c56d475a035330267c3189a (diff)
ssh: Handle timeout waiting on packfile negotiation with proper errorspks-ssh-cancelled-packfile-negotiation
Before the introduction of 3c6bc2db7 (Limit the negotiation phase for certain Gitaly RPCs, 2019-10-15), SSHUploadPack and SSHUploadArchive had been vulnerable to a TOCTOU race: the client could open a session to the server and keep it up almost indefinitely without starting the packfile negotiation yet, so that access checks have already been performed but the client can still ask for an arbitrary set of objects to be sent to it. So even if the client's access got rejected meanwhile, it could still ask for arbitrary objects after the fact by starting the packfile negotiation at whatever point the client wants to. This TOCTOU race has been plugged by listening in on the negotiation and putting a limit on how long that phase may span. The underlying thought is that it's fine to keep the actual packfile streaming open for an arbitrary amount of time because at that point the objects have already been negotiated anyway. We have observed some flakiness in this area though in our CI pipelines, where the timeout would sometimes return a `codes.Canceled` instead of our currently-assumed `codes.Internal` error. But taking a deeper look it is actually surprising that we'd return `codes.Internal` in the first place: it's a well-defined error case and we should provide proper information to the client to distinguish the case where we timed out waiting for the packfile negotiation to complete. Instead, we just return an error that is indistinguishable from other errors. Fix this shortcoming by properly handling the case where we cancel the context ourselves due to the packfile negotiation timing out. We now return a proper error message as well as an error code to the client that makes it easy to see what's happening. Chances are that this will also fix the flakiness in our CI, but it's hard to tell. Changelog: fixed
-rw-r--r--internal/gitaly/service/ssh/upload_archive.go12
-rw-r--r--internal/gitaly/service/ssh/upload_archive_test.go2
-rw-r--r--internal/gitaly/service/ssh/upload_pack.go16
-rw-r--r--internal/gitaly/service/ssh/upload_pack_test.go2
4 files changed, 28 insertions, 4 deletions
diff --git a/internal/gitaly/service/ssh/upload_archive.go b/internal/gitaly/service/ssh/upload_archive.go
index 7fef9bb2d..3dc362a91 100644
--- a/internal/gitaly/service/ssh/upload_archive.go
+++ b/internal/gitaly/service/ssh/upload_archive.go
@@ -69,6 +69,18 @@ func (s *server) sshUploadArchive(stream gitalypb.SSHService_SSHUploadArchiveSer
go monitor.Monitor(ctx, pktline.PktFlush(), timeoutTicker, cancelCtx)
if err := cmd.Wait(); err != nil {
+ // When waiting for the packfile negotiation to end times out we'll cancel the local
+ // context, but not cancel the overall RPC's context. Our cancelhandler middleware
+ // thus cannot observe the fact that we're cancelling the context, and neither do we
+ // provide any valuable information to the caller that we do indeed kill the command
+ // because of our own internal timeout.
+ //
+ // We thus need to special-case the situation where we cancel our own context in
+ // order to provide that information and return a proper gRPC error code.
+ if ctx.Err() != nil && stream.Context().Err() == nil {
+ return helper.ErrDeadlineExceededf("waiting for packfile negotiation: %w", ctx.Err())
+ }
+
if status, ok := command.ExitStatus(err); ok {
if sendErr := stream.Send(&gitalypb.SSHUploadArchiveResponse{
ExitStatus: &gitalypb.ExitStatus{Value: int32(status)},
diff --git a/internal/gitaly/service/ssh/upload_archive_test.go b/internal/gitaly/service/ssh/upload_archive_test.go
index c0dbf593c..af2699a90 100644
--- a/internal/gitaly/service/ssh/upload_archive_test.go
+++ b/internal/gitaly/service/ssh/upload_archive_test.go
@@ -42,7 +42,7 @@ func TestFailedUploadArchiveRequestDueToTimeout(t *testing.T) {
// Because the client says nothing, the server would block. Because of
// the timeout, it won't block forever, and return with a non-zero exit
// code instead.
- requireFailedSSHStream(t, helper.ErrInternalf("SSHUploadPack: signal: terminated"), func() (int32, error) {
+ requireFailedSSHStream(t, helper.ErrDeadlineExceededf("waiting for packfile negotiation: context canceled"), func() (int32, error) {
resp, err := stream.Recv()
if err != nil {
return 0, err
diff --git a/internal/gitaly/service/ssh/upload_pack.go b/internal/gitaly/service/ssh/upload_pack.go
index d17fafcf5..e468c834c 100644
--- a/internal/gitaly/service/ssh/upload_pack.go
+++ b/internal/gitaly/service/ssh/upload_pack.go
@@ -72,8 +72,8 @@ type sshUploadPackRequest interface {
GetGitProtocol() string
}
-func (s *server) sshUploadPack(ctx context.Context, req sshUploadPackRequest, stdin io.Reader, stdout, stderr io.Writer) (int, error) {
- ctx, cancelCtx := context.WithCancel(ctx)
+func (s *server) sshUploadPack(rpcContext context.Context, req sshUploadPackRequest, stdin io.Reader, stdout, stderr io.Writer) (int, error) {
+ ctx, cancelCtx := context.WithCancel(rpcContext)
defer cancelCtx()
stdoutCounter := &helper.CountingWriter{W: stdout}
@@ -147,6 +147,18 @@ func (s *server) sshUploadPack(ctx context.Context, req sshUploadPackRequest, st
if err := cmd.Wait(); err != nil {
status, _ := command.ExitStatus(err)
+ // When waiting for the packfile negotiation to end times out we'll cancel the local
+ // context, but not cancel the overall RPC's context. Our cancelhandler middleware
+ // thus cannot observe the fact that we're cancelling the context, and neither do we
+ // provide any valuable information to the caller that we do indeed kill the command
+ // because of our own internal timeout.
+ //
+ // We thus need to special-case the situation where we cancel our own context in
+ // order to provide that information and return a proper gRPC error code.
+ if ctx.Err() != nil && rpcContext.Err() == nil {
+ return status, helper.ErrDeadlineExceededf("waiting for packfile negotiation: %w", ctx.Err())
+ }
+
// A common error case is that the client is terminating the request prematurely,
// e.g. by killing their git-fetch(1) process because it's taking too long. This is
// an expected failure, but we're not in a position to easily tell this error apart
diff --git a/internal/gitaly/service/ssh/upload_pack_test.go b/internal/gitaly/service/ssh/upload_pack_test.go
index e2041fd4f..f821fc128 100644
--- a/internal/gitaly/service/ssh/upload_pack_test.go
+++ b/internal/gitaly/service/ssh/upload_pack_test.go
@@ -120,7 +120,7 @@ func testUploadPackTimeout(t *testing.T, opts ...testcfg.Option) {
// Because the client says nothing, the server would block. Because of
// the timeout, it won't block forever, and return with a non-zero exit
// code instead.
- requireFailedSSHStream(t, helper.ErrInternalf("cmd wait: signal: terminated, stderr: \"\""), func() (int32, error) {
+ requireFailedSSHStream(t, helper.ErrDeadlineExceededf("waiting for packfile negotiation: context canceled"), func() (int32, error) {
resp, err := stream.Recv()
if err != nil {
return 0, err