| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 1 | /* |
| 2 | * |
| 3 | * Copyright 2017 gRPC authors. |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | * |
| 17 | */ |
| 18 | |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 19 | // Package health provides a service that exposes server's health and it must be |
| 20 | // imported to enable support for client-side health checks. |
| 21 | package health |
| 22 | |
| 23 | import ( |
| 24 | "context" |
| 25 | "sync" |
| 26 | |
| 27 | "google.golang.org/grpc/codes" |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 28 | healthgrpc "google.golang.org/grpc/health/grpc_health_v1" |
| 29 | healthpb "google.golang.org/grpc/health/grpc_health_v1" |
| 30 | "google.golang.org/grpc/status" |
| 31 | ) |
| 32 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 33 | const ( |
| 34 | // maxAllowedServices defines the maximum number of resources a List |
| 35 | // operation can return. An error is returned if the number of services |
| 36 | // exceeds this limit. |
| 37 | maxAllowedServices = 100 |
| 38 | ) |
| 39 | |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 40 | // Server implements `service Health`. |
| 41 | type Server struct { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 42 | healthgrpc.UnimplementedHealthServer |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 43 | mu sync.RWMutex |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 44 | // If shutdown is true, it's expected all serving status is NOT_SERVING, and |
| 45 | // will stay in NOT_SERVING. |
| 46 | shutdown bool |
| 47 | // statusMap stores the serving status of the services this Server monitors. |
| 48 | statusMap map[string]healthpb.HealthCheckResponse_ServingStatus |
| 49 | updates map[string]map[healthgrpc.Health_WatchServer]chan healthpb.HealthCheckResponse_ServingStatus |
| 50 | } |
| 51 | |
| 52 | // NewServer returns a new Server. |
| 53 | func NewServer() *Server { |
| 54 | return &Server{ |
| 55 | statusMap: map[string]healthpb.HealthCheckResponse_ServingStatus{"": healthpb.HealthCheckResponse_SERVING}, |
| 56 | updates: make(map[string]map[healthgrpc.Health_WatchServer]chan healthpb.HealthCheckResponse_ServingStatus), |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | // Check implements `service Health`. |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 61 | func (s *Server) Check(_ context.Context, in *healthpb.HealthCheckRequest) (*healthpb.HealthCheckResponse, error) { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 62 | s.mu.RLock() |
| 63 | defer s.mu.RUnlock() |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 64 | if servingStatus, ok := s.statusMap[in.Service]; ok { |
| 65 | return &healthpb.HealthCheckResponse{ |
| 66 | Status: servingStatus, |
| 67 | }, nil |
| 68 | } |
| 69 | return nil, status.Error(codes.NotFound, "unknown service") |
| 70 | } |
| 71 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 72 | // List implements `service Health`. |
| 73 | func (s *Server) List(_ context.Context, _ *healthpb.HealthListRequest) (*healthpb.HealthListResponse, error) { |
| 74 | s.mu.RLock() |
| 75 | defer s.mu.RUnlock() |
| 76 | |
| 77 | if len(s.statusMap) > maxAllowedServices { |
| 78 | return nil, status.Errorf(codes.ResourceExhausted, "server health list exceeds maximum capacity: %d", maxAllowedServices) |
| 79 | } |
| 80 | |
| 81 | statusMap := make(map[string]*healthpb.HealthCheckResponse, len(s.statusMap)) |
| 82 | for k, v := range s.statusMap { |
| 83 | statusMap[k] = &healthpb.HealthCheckResponse{Status: v} |
| 84 | } |
| 85 | |
| 86 | return &healthpb.HealthListResponse{Statuses: statusMap}, nil |
| 87 | } |
| 88 | |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 89 | // Watch implements `service Health`. |
| 90 | func (s *Server) Watch(in *healthpb.HealthCheckRequest, stream healthgrpc.Health_WatchServer) error { |
| 91 | service := in.Service |
| 92 | // update channel is used for getting service status updates. |
| 93 | update := make(chan healthpb.HealthCheckResponse_ServingStatus, 1) |
| 94 | s.mu.Lock() |
| 95 | // Puts the initial status to the channel. |
| 96 | if servingStatus, ok := s.statusMap[service]; ok { |
| 97 | update <- servingStatus |
| 98 | } else { |
| 99 | update <- healthpb.HealthCheckResponse_SERVICE_UNKNOWN |
| 100 | } |
| 101 | |
| 102 | // Registers the update channel to the correct place in the updates map. |
| 103 | if _, ok := s.updates[service]; !ok { |
| 104 | s.updates[service] = make(map[healthgrpc.Health_WatchServer]chan healthpb.HealthCheckResponse_ServingStatus) |
| 105 | } |
| 106 | s.updates[service][stream] = update |
| 107 | defer func() { |
| 108 | s.mu.Lock() |
| 109 | delete(s.updates[service], stream) |
| 110 | s.mu.Unlock() |
| 111 | }() |
| 112 | s.mu.Unlock() |
| 113 | |
| 114 | var lastSentStatus healthpb.HealthCheckResponse_ServingStatus = -1 |
| 115 | for { |
| 116 | select { |
| 117 | // Status updated. Sends the up-to-date status to the client. |
| 118 | case servingStatus := <-update: |
| 119 | if lastSentStatus == servingStatus { |
| 120 | continue |
| 121 | } |
| 122 | lastSentStatus = servingStatus |
| 123 | err := stream.Send(&healthpb.HealthCheckResponse{Status: servingStatus}) |
| 124 | if err != nil { |
| 125 | return status.Error(codes.Canceled, "Stream has ended.") |
| 126 | } |
| 127 | // Context done. Removes the update channel from the updates map. |
| 128 | case <-stream.Context().Done(): |
| 129 | return status.Error(codes.Canceled, "Stream has ended.") |
| 130 | } |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | // SetServingStatus is called when need to reset the serving status of a service |
| 135 | // or insert a new service entry into the statusMap. |
| 136 | func (s *Server) SetServingStatus(service string, servingStatus healthpb.HealthCheckResponse_ServingStatus) { |
| 137 | s.mu.Lock() |
| 138 | defer s.mu.Unlock() |
| 139 | if s.shutdown { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 140 | logger.Infof("health: status changing for %s to %v is ignored because health service is shutdown", service, servingStatus) |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 141 | return |
| 142 | } |
| 143 | |
| 144 | s.setServingStatusLocked(service, servingStatus) |
| 145 | } |
| 146 | |
| 147 | func (s *Server) setServingStatusLocked(service string, servingStatus healthpb.HealthCheckResponse_ServingStatus) { |
| 148 | s.statusMap[service] = servingStatus |
| 149 | for _, update := range s.updates[service] { |
| 150 | // Clears previous updates, that are not sent to the client, from the channel. |
| 151 | // This can happen if the client is not reading and the server gets flow control limited. |
| 152 | select { |
| 153 | case <-update: |
| 154 | default: |
| 155 | } |
| 156 | // Puts the most recent update to the channel. |
| 157 | update <- servingStatus |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | // Shutdown sets all serving status to NOT_SERVING, and configures the server to |
| 162 | // ignore all future status changes. |
| 163 | // |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 164 | // This changes serving status for all services. To set status for a particular |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 165 | // services, call SetServingStatus(). |
| 166 | func (s *Server) Shutdown() { |
| 167 | s.mu.Lock() |
| 168 | defer s.mu.Unlock() |
| 169 | s.shutdown = true |
| 170 | for service := range s.statusMap { |
| 171 | s.setServingStatusLocked(service, healthpb.HealthCheckResponse_NOT_SERVING) |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | // Resume sets all serving status to SERVING, and configures the server to |
| 176 | // accept all future status changes. |
| 177 | // |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 178 | // This changes serving status for all services. To set status for a particular |
| khenaidoo | 59ce9dd | 2019-11-11 13:05:32 -0500 | [diff] [blame] | 179 | // services, call SetServingStatus(). |
| 180 | func (s *Server) Resume() { |
| 181 | s.mu.Lock() |
| 182 | defer s.mu.Unlock() |
| 183 | s.shutdown = false |
| 184 | for service := range s.statusMap { |
| 185 | s.setServingStatusLocked(service, healthpb.HealthCheckResponse_SERVING) |
| 186 | } |
| 187 | } |