blob: 740745c45f630ee82ad254c1c54ec8e140442d54 [file] [log] [blame]
khenaidoo59ce9dd2019-11-11 13:05:32 -05001/*
2 *
3 * Copyright 2018 gRPC authors.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19package health
20
21import (
22 "context"
23 "fmt"
24 "io"
25 "time"
26
27 "google.golang.org/grpc"
28 "google.golang.org/grpc/codes"
29 "google.golang.org/grpc/connectivity"
30 healthpb "google.golang.org/grpc/health/grpc_health_v1"
31 "google.golang.org/grpc/internal"
32 "google.golang.org/grpc/internal/backoff"
33 "google.golang.org/grpc/status"
34)
35
khenaidoo26721882021-08-11 17:42:52 -040036var (
37 backoffStrategy = backoff.DefaultExponential
38 backoffFunc = func(ctx context.Context, retries int) bool {
39 d := backoffStrategy.Backoff(retries)
40 timer := time.NewTimer(d)
41 select {
42 case <-timer.C:
43 return true
44 case <-ctx.Done():
45 timer.Stop()
46 return false
47 }
khenaidoo59ce9dd2019-11-11 13:05:32 -050048 }
khenaidoo26721882021-08-11 17:42:52 -040049)
khenaidoo59ce9dd2019-11-11 13:05:32 -050050
51func init() {
52 internal.HealthCheckFunc = clientHealthCheck
53}
54
55const healthCheckMethod = "/grpc.health.v1.Health/Watch"
56
57// This function implements the protocol defined at:
58// https://github.com/grpc/grpc/blob/master/doc/health-checking.md
Abhay Kumar40252eb2025-10-13 13:25:53 +000059func clientHealthCheck(ctx context.Context, newStream func(string) (any, error), setConnectivityState func(connectivity.State, error), service string) error {
khenaidoo59ce9dd2019-11-11 13:05:32 -050060 tryCnt := 0
61
62retryConnection:
63 for {
64 // Backs off if the connection has failed in some way without receiving a message in the previous retry.
65 if tryCnt > 0 && !backoffFunc(ctx, tryCnt-1) {
66 return nil
67 }
68 tryCnt++
69
70 if ctx.Err() != nil {
71 return nil
72 }
Abhay Kumar40252eb2025-10-13 13:25:53 +000073 setConnectivityState(connectivity.Connecting, nil)
khenaidoo59ce9dd2019-11-11 13:05:32 -050074 rawS, err := newStream(healthCheckMethod)
75 if err != nil {
76 continue retryConnection
77 }
78
79 s, ok := rawS.(grpc.ClientStream)
80 // Ideally, this should never happen. But if it happens, the server is marked as healthy for LBing purposes.
81 if !ok {
Abhay Kumar40252eb2025-10-13 13:25:53 +000082 setConnectivityState(connectivity.Ready, nil)
khenaidoo59ce9dd2019-11-11 13:05:32 -050083 return fmt.Errorf("newStream returned %v (type %T); want grpc.ClientStream", rawS, rawS)
84 }
85
86 if err = s.SendMsg(&healthpb.HealthCheckRequest{Service: service}); err != nil && err != io.EOF {
87 // Stream should have been closed, so we can safely continue to create a new stream.
88 continue retryConnection
89 }
90 s.CloseSend()
91
92 resp := new(healthpb.HealthCheckResponse)
93 for {
94 err = s.RecvMsg(resp)
95
96 // Reports healthy for the LBing purposes if health check is not implemented in the server.
97 if status.Code(err) == codes.Unimplemented {
Abhay Kumar40252eb2025-10-13 13:25:53 +000098 setConnectivityState(connectivity.Ready, nil)
khenaidoo59ce9dd2019-11-11 13:05:32 -050099 return err
100 }
101
102 // Reports unhealthy if server's Watch method gives an error other than UNIMPLEMENTED.
103 if err != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000104 setConnectivityState(connectivity.TransientFailure, fmt.Errorf("connection active but received health check RPC error: %v", err))
khenaidoo59ce9dd2019-11-11 13:05:32 -0500105 continue retryConnection
106 }
107
khenaidoo26721882021-08-11 17:42:52 -0400108 // As a message has been received, removes the need for backoff for the next retry by resetting the try count.
khenaidoo59ce9dd2019-11-11 13:05:32 -0500109 tryCnt = 0
110 if resp.Status == healthpb.HealthCheckResponse_SERVING {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000111 setConnectivityState(connectivity.Ready, nil)
khenaidoo59ce9dd2019-11-11 13:05:32 -0500112 } else {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000113 setConnectivityState(connectivity.TransientFailure, fmt.Errorf("connection active but health check failed. status=%s", resp.Status))
khenaidoo59ce9dd2019-11-11 13:05:32 -0500114 }
115 }
116 }
117}