[VOL-5486] Fix deprecated versions
Change-Id: I3e03ea246020547ae75fa92ce8cf5cbba7e8f3bb
Signed-off-by: Abhay Kumar <abhay.kumar@radisys.com>
diff --git a/vendor/google.golang.org/grpc/clientconn.go b/vendor/google.golang.org/grpc/clientconn.go
index 4414ba8..a3c315f 100644
--- a/vendor/google.golang.org/grpc/clientconn.go
+++ b/vendor/google.golang.org/grpc/clientconn.go
@@ -23,8 +23,8 @@
"errors"
"fmt"
"math"
- "net"
- "reflect"
+ "net/url"
+ "slices"
"strings"
"sync"
"sync/atomic"
@@ -32,13 +32,15 @@
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/balancer/base"
+ "google.golang.org/grpc/balancer/pickfirst"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/connectivity"
- "google.golang.org/grpc/credentials"
- "google.golang.org/grpc/grpclog"
- "google.golang.org/grpc/internal/backoff"
+ "google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/channelz"
"google.golang.org/grpc/internal/grpcsync"
+ "google.golang.org/grpc/internal/idle"
+ iresolver "google.golang.org/grpc/internal/resolver"
+ "google.golang.org/grpc/internal/stats"
"google.golang.org/grpc/internal/transport"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/resolver"
@@ -46,15 +48,14 @@
"google.golang.org/grpc/status"
_ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin.
- _ "google.golang.org/grpc/internal/resolver/dns" // To register dns resolver.
_ "google.golang.org/grpc/internal/resolver/passthrough" // To register passthrough resolver.
+ _ "google.golang.org/grpc/internal/resolver/unix" // To register unix resolver.
+ _ "google.golang.org/grpc/resolver/dns" // To register dns resolver.
)
const (
// minimum time to give a connection to complete
minConnectTimeout = 20 * time.Second
- // must match grpclbName in grpclb/grpclb.go
- grpclbName = "grpclb"
)
var (
@@ -68,11 +69,14 @@
errConnDrain = errors.New("grpc: the connection is drained")
// errConnClosing indicates that the connection is closing.
errConnClosing = errors.New("grpc: the connection is closing")
- // errBalancerClosed indicates that the balancer is closed.
- errBalancerClosed = errors.New("grpc: balancer is closed")
+ // errConnIdling indicates the connection is being closed as the channel
+ // is moving to an idle mode due to inactivity.
+ errConnIdling = errors.New("grpc: the connection is closing due to channel idleness")
// invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default
// service config.
invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid"
+ // PickFirstBalancerName is the name of the pick_first balancer.
+ PickFirstBalancerName = pickfirst.Name
)
// The following errors are returned from Dial and DialContext
@@ -80,17 +84,17 @@
// errNoTransportSecurity indicates that there is no transport security
// being set for ClientConn. Users should either set one or explicitly
// call WithInsecure DialOption to disable security.
- errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
+ errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithTransportCredentials(insecure.NewCredentials()) explicitly or set credentials)")
// errTransportCredsAndBundle indicates that creds bundle is used together
// with other individual Transport Credentials.
errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials")
- // errTransportCredentialsMissing indicates that users want to transmit security
- // information (e.g., OAuth2 token) which requires secure connection on an insecure
- // connection.
+ // errNoTransportCredsInBundle indicated that the configured creds bundle
+ // returned a transport credentials which was nil.
+ errNoTransportCredsInBundle = errors.New("grpc: credentials.Bundle must return non-nil transport credentials")
+ // errTransportCredentialsMissing indicates that users want to transmit
+ // security information (e.g., OAuth2 token) which requires secure
+ // connection on an insecure connection.
errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)")
- // errCredentialsConflict indicates that grpc.WithTransportCredentials()
- // and grpc.WithInsecure() are both called for a connection.
- errCredentialsConflict = errors.New("grpc: transport credentials are set for an insecure connection (grpc.WithTransportCredentials() and grpc.WithInsecure() are both called)")
)
const (
@@ -101,114 +105,168 @@
defaultReadBufSize = 32 * 1024
)
-// Dial creates a client connection to the given target.
-func Dial(target string, opts ...DialOption) (*ClientConn, error) {
- return DialContext(context.Background(), target, opts...)
+type defaultConfigSelector struct {
+ sc *ServiceConfig
}
-// DialContext creates a client connection to the given target. By default, it's
-// a non-blocking dial (the function won't wait for connections to be
-// established, and connecting happens in the background). To make it a blocking
-// dial, use WithBlock() dial option.
-//
-// In the non-blocking case, the ctx does not act against the connection. It
-// only controls the setup steps.
-//
-// In the blocking case, ctx can be used to cancel or expire the pending
-// connection. Once this function returns, the cancellation and expiration of
-// ctx will be noop. Users should call ClientConn.Close to terminate all the
-// pending operations after this function returns.
+func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RPCConfig, error) {
+ return &iresolver.RPCConfig{
+ Context: rpcInfo.Context,
+ MethodConfig: getMethodConfig(dcs.sc, rpcInfo.Method),
+ }, nil
+}
+
+// NewClient creates a new gRPC "channel" for the target URI provided. No I/O
+// is performed. Use of the ClientConn for RPCs will automatically cause it to
+// connect. The Connect method may be called to manually create a connection,
+// but for most users this should be unnecessary.
//
// The target name syntax is defined in
-// https://github.com/grpc/grpc/blob/master/doc/naming.md.
-// e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
-func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
+// https://github.com/grpc/grpc/blob/master/doc/naming.md. E.g. to use the dns
+// name resolver, a "dns:///" prefix may be applied to the target. The default
+// name resolver will be used if no scheme is detected, or if the parsed scheme
+// is not a registered name resolver. The default resolver is "dns" but can be
+// overridden using the resolver package's SetDefaultScheme.
+//
+// Examples:
+//
+// - "foo.googleapis.com:8080"
+// - "dns:///foo.googleapis.com:8080"
+// - "dns:///foo.googleapis.com"
+// - "dns:///10.0.0.213:8080"
+// - "dns:///%5B2001:db8:85a3:8d3:1319:8a2e:370:7348%5D:443"
+// - "dns://8.8.8.8/foo.googleapis.com:8080"
+// - "dns://8.8.8.8/foo.googleapis.com"
+// - "zookeeper://zk.example.com:9900/example_service"
+//
+// The DialOptions returned by WithBlock, WithTimeout,
+// WithReturnConnectionError, and FailOnNonTempDialError are ignored by this
+// function.
+func NewClient(target string, opts ...DialOption) (conn *ClientConn, err error) {
cc := &ClientConn{
- target: target,
- csMgr: &connectivityStateManager{},
- conns: make(map[*addrConn]struct{}),
- dopts: defaultDialOptions(),
- blockingpicker: newPickerWrapper(),
- czData: new(channelzData),
- firstResolveEvent: grpcsync.NewEvent(),
+ target: target,
+ conns: make(map[*addrConn]struct{}),
+ dopts: defaultDialOptions(),
}
+
cc.retryThrottler.Store((*retryThrottler)(nil))
+ cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil})
cc.ctx, cc.cancel = context.WithCancel(context.Background())
+ // Apply dial options.
+ disableGlobalOpts := false
+ for _, opt := range opts {
+ if _, ok := opt.(*disableGlobalDialOptions); ok {
+ disableGlobalOpts = true
+ break
+ }
+ }
+
+ if !disableGlobalOpts {
+ for _, opt := range globalDialOptions {
+ opt.apply(&cc.dopts)
+ }
+ }
+
for _, opt := range opts {
opt.apply(&cc.dopts)
}
+ // Determine the resolver to use.
+ if err := cc.initParsedTargetAndResolverBuilder(); err != nil {
+ return nil, err
+ }
+
+ for _, opt := range globalPerTargetDialOptions {
+ opt.DialOptionForTarget(cc.parsedTarget.URL).apply(&cc.dopts)
+ }
+
chainUnaryClientInterceptors(cc)
chainStreamClientInterceptors(cc)
+ if err := cc.validateTransportCredentials(); err != nil {
+ return nil, err
+ }
+
+ if cc.dopts.defaultServiceConfigRawJSON != nil {
+ scpr := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON, cc.dopts.maxCallAttempts)
+ if scpr.Err != nil {
+ return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, scpr.Err)
+ }
+ cc.dopts.defaultServiceConfig, _ = scpr.Config.(*ServiceConfig)
+ }
+ cc.keepaliveParams = cc.dopts.copts.KeepaliveParams
+
+ if err = cc.initAuthority(); err != nil {
+ return nil, err
+ }
+
+ // Register ClientConn with channelz. Note that this is only done after
+ // channel creation cannot fail.
+ cc.channelzRegistration(target)
+ channelz.Infof(logger, cc.channelz, "parsed dial target is: %#v", cc.parsedTarget)
+ channelz.Infof(logger, cc.channelz, "Channel authority set to %q", cc.authority)
+
+ cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelz)
+ cc.pickerWrapper = newPickerWrapper()
+
+ cc.metricsRecorderList = stats.NewMetricsRecorderList(cc.dopts.copts.StatsHandlers)
+
+ cc.initIdleStateLocked() // Safe to call without the lock, since nothing else has a reference to cc.
+ cc.idlenessMgr = idle.NewManager((*idler)(cc), cc.dopts.idleTimeout)
+
+ return cc, nil
+}
+
+// Dial calls DialContext(context.Background(), target, opts...).
+//
+// Deprecated: use NewClient instead. Will be supported throughout 1.x.
+func Dial(target string, opts ...DialOption) (*ClientConn, error) {
+ return DialContext(context.Background(), target, opts...)
+}
+
+// DialContext calls NewClient and then exits idle mode. If WithBlock(true) is
+// used, it calls Connect and WaitForStateChange until either the context
+// expires or the state of the ClientConn is Ready.
+//
+// One subtle difference between NewClient and Dial and DialContext is that the
+// former uses "dns" as the default name resolver, while the latter use
+// "passthrough" for backward compatibility. This distinction should not matter
+// to most users, but could matter to legacy users that specify a custom dialer
+// and expect it to receive the target string directly.
+//
+// Deprecated: use NewClient instead. Will be supported throughout 1.x.
+func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
+ // At the end of this method, we kick the channel out of idle, rather than
+ // waiting for the first rpc.
+ //
+ // WithLocalDNSResolution dial option in `grpc.Dial` ensures that it
+ // preserves behavior: when default scheme passthrough is used, skip
+ // hostname resolution, when "dns" is used for resolution, perform
+ // resolution on the client.
+ opts = append([]DialOption{withDefaultScheme("passthrough"), WithLocalDNSResolution()}, opts...)
+ cc, err := NewClient(target, opts...)
+ if err != nil {
+ return nil, err
+ }
+
+ // We start the channel off in idle mode, but kick it out of idle now,
+ // instead of waiting for the first RPC. This is the legacy behavior of
+ // Dial.
defer func() {
if err != nil {
cc.Close()
}
}()
- if channelz.IsOn() {
- if cc.dopts.channelzParentID != 0 {
- cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
- channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
- Desc: "Channel Created",
- Severity: channelz.CtINFO,
- Parent: &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID),
- Severity: channelz.CtINFO,
- },
- })
- } else {
- cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target)
- channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
- Desc: "Channel Created",
- Severity: channelz.CtINFO,
- })
- }
- cc.csMgr.channelzID = cc.channelzID
+ // This creates the name resolver, load balancer, etc.
+ if err := cc.idlenessMgr.ExitIdleMode(); err != nil {
+ return nil, err
}
- if !cc.dopts.insecure {
- if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
- return nil, errNoTransportSecurity
- }
- if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
- return nil, errTransportCredsAndBundle
- }
- } else {
- if cc.dopts.copts.TransportCredentials != nil || cc.dopts.copts.CredsBundle != nil {
- return nil, errCredentialsConflict
- }
- for _, cd := range cc.dopts.copts.PerRPCCredentials {
- if cd.RequireTransportSecurity() {
- return nil, errTransportCredentialsMissing
- }
- }
- }
-
- if cc.dopts.defaultServiceConfigRawJSON != nil {
- scpr := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON)
- if scpr.Err != nil {
- return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, scpr.Err)
- }
- cc.dopts.defaultServiceConfig, _ = scpr.Config.(*ServiceConfig)
- }
- cc.mkp = cc.dopts.copts.KeepaliveParams
-
- if cc.dopts.copts.Dialer == nil {
- cc.dopts.copts.Dialer = newProxyDialer(
- func(ctx context.Context, addr string) (net.Conn, error) {
- network, addr := parseDialTarget(addr)
- return (&net.Dialer{}).DialContext(ctx, network, addr)
- },
- )
- }
-
- if cc.dopts.copts.UserAgent != "" {
- cc.dopts.copts.UserAgent += " " + grpcUA
- } else {
- cc.dopts.copts.UserAgent = grpcUA
+ // Return now for non-blocking dials.
+ if !cc.dopts.block {
+ return cc, nil
}
if cc.dopts.timeout > 0 {
@@ -219,116 +277,186 @@
defer func() {
select {
case <-ctx.Done():
- conn, err = nil, ctx.Err()
+ switch {
+ case ctx.Err() == err:
+ conn = nil
+ case err == nil || !cc.dopts.returnLastError:
+ conn, err = nil, ctx.Err()
+ default:
+ conn, err = nil, fmt.Errorf("%v: %v", ctx.Err(), err)
+ }
default:
}
}()
- scSet := false
- if cc.dopts.scChan != nil {
- // Try to get an initial service config.
- select {
- case sc, ok := <-cc.dopts.scChan:
- if ok {
- cc.sc = &sc
- scSet = true
- }
- default:
+ // A blocking dial blocks until the clientConn is ready.
+ for {
+ s := cc.GetState()
+ if s == connectivity.Idle {
+ cc.Connect()
}
- }
- if cc.dopts.bs == nil {
- cc.dopts.bs = backoff.DefaultExponential
- }
- if cc.dopts.resolverBuilder == nil {
- // Only try to parse target when resolver builder is not already set.
- cc.parsedTarget = parseTarget(cc.target)
- grpclog.Infof("parsed scheme: %q", cc.parsedTarget.Scheme)
- cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
- if cc.dopts.resolverBuilder == nil {
- // If resolver builder is still nil, the parsed target's scheme is
- // not registered. Fallback to default resolver and set Endpoint to
- // the original target.
- grpclog.Infof("scheme %q not registered, fallback to default scheme", cc.parsedTarget.Scheme)
- cc.parsedTarget = resolver.Target{
- Scheme: resolver.GetDefaultScheme(),
- Endpoint: target,
+ if s == connectivity.Ready {
+ return cc, nil
+ } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
+ if err = cc.connectionError(); err != nil {
+ terr, ok := err.(interface {
+ Temporary() bool
+ })
+ if ok && !terr.Temporary() {
+ return nil, err
+ }
}
- cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
}
- } else {
- cc.parsedTarget = resolver.Target{Endpoint: target}
- }
- creds := cc.dopts.copts.TransportCredentials
- if creds != nil && creds.Info().ServerName != "" {
- cc.authority = creds.Info().ServerName
- } else if cc.dopts.insecure && cc.dopts.authority != "" {
- cc.authority = cc.dopts.authority
- } else {
- // Use endpoint from "scheme://authority/endpoint" as the default
- // authority for ClientConn.
- cc.authority = cc.parsedTarget.Endpoint
- }
-
- if cc.dopts.scChan != nil && !scSet {
- // Blocking wait for the initial service config.
- select {
- case sc, ok := <-cc.dopts.scChan:
- if ok {
- cc.sc = &sc
+ if !cc.WaitForStateChange(ctx, s) {
+ // ctx got timeout or canceled.
+ if err = cc.connectionError(); err != nil && cc.dopts.returnLastError {
+ return nil, err
}
- case <-ctx.Done():
return nil, ctx.Err()
}
}
- if cc.dopts.scChan != nil {
- go cc.scWatcher()
- }
+}
- var credsClone credentials.TransportCredentials
- if creds := cc.dopts.copts.TransportCredentials; creds != nil {
- credsClone = creds.Clone()
+// addTraceEvent is a helper method to add a trace event on the channel. If the
+// channel is a nested one, the same event is also added on the parent channel.
+func (cc *ClientConn) addTraceEvent(msg string) {
+ ted := &channelz.TraceEvent{
+ Desc: fmt.Sprintf("Channel %s", msg),
+ Severity: channelz.CtInfo,
}
- cc.balancerBuildOpts = balancer.BuildOptions{
- DialCreds: credsClone,
- CredsBundle: cc.dopts.copts.CredsBundle,
- Dialer: cc.dopts.copts.Dialer,
- ChannelzParentID: cc.channelzID,
- Target: cc.parsedTarget,
+ if cc.dopts.channelzParent != nil {
+ ted.Parent = &channelz.TraceEvent{
+ Desc: fmt.Sprintf("Nested channel(id:%d) %s", cc.channelz.ID, msg),
+ Severity: channelz.CtInfo,
+ }
}
+ channelz.AddTraceEvent(logger, cc.channelz, 0, ted)
+}
- // Build the resolver.
- rWrapper, err := newCCResolverWrapper(cc)
- if err != nil {
- return nil, fmt.Errorf("failed to build resolver: %v", err)
- }
+type idler ClientConn
+func (i *idler) EnterIdleMode() {
+ (*ClientConn)(i).enterIdleMode()
+}
+
+func (i *idler) ExitIdleMode() error {
+ return (*ClientConn)(i).exitIdleMode()
+}
+
+// exitIdleMode moves the channel out of idle mode by recreating the name
+// resolver and load balancer. This should never be called directly; use
+// cc.idlenessMgr.ExitIdleMode instead.
+func (cc *ClientConn) exitIdleMode() (err error) {
cc.mu.Lock()
- cc.resolverWrapper = rWrapper
+ if cc.conns == nil {
+ cc.mu.Unlock()
+ return errConnClosing
+ }
cc.mu.Unlock()
- // A blocking dial blocks until the clientConn is ready.
- if cc.dopts.block {
- for {
- s := cc.GetState()
- if s == connectivity.Ready {
- break
- } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
- if err = cc.blockingpicker.connectionError(); err != nil {
- terr, ok := err.(interface {
- Temporary() bool
- })
- if ok && !terr.Temporary() {
- return nil, err
- }
- }
- }
- if !cc.WaitForStateChange(ctx, s) {
- // ctx got timeout or canceled.
- return nil, ctx.Err()
+
+ // This needs to be called without cc.mu because this builds a new resolver
+ // which might update state or report error inline, which would then need to
+ // acquire cc.mu.
+ if err := cc.resolverWrapper.start(); err != nil {
+ return err
+ }
+
+ cc.addTraceEvent("exiting idle mode")
+ return nil
+}
+
+// initIdleStateLocked initializes common state to how it should be while idle.
+func (cc *ClientConn) initIdleStateLocked() {
+ cc.resolverWrapper = newCCResolverWrapper(cc)
+ cc.balancerWrapper = newCCBalancerWrapper(cc)
+ cc.firstResolveEvent = grpcsync.NewEvent()
+ // cc.conns == nil is a proxy for the ClientConn being closed. So, instead
+ // of setting it to nil here, we recreate the map. This also means that we
+ // don't have to do this when exiting idle mode.
+ cc.conns = make(map[*addrConn]struct{})
+}
+
+// enterIdleMode puts the channel in idle mode, and as part of it shuts down the
+// name resolver, load balancer, and any subchannels. This should never be
+// called directly; use cc.idlenessMgr.EnterIdleMode instead.
+func (cc *ClientConn) enterIdleMode() {
+ cc.mu.Lock()
+
+ if cc.conns == nil {
+ cc.mu.Unlock()
+ return
+ }
+
+ conns := cc.conns
+
+ rWrapper := cc.resolverWrapper
+ rWrapper.close()
+ cc.pickerWrapper.reset()
+ bWrapper := cc.balancerWrapper
+ bWrapper.close()
+ cc.csMgr.updateState(connectivity.Idle)
+ cc.addTraceEvent("entering idle mode")
+
+ cc.initIdleStateLocked()
+
+ cc.mu.Unlock()
+
+ // Block until the name resolver and LB policy are closed.
+ <-rWrapper.serializer.Done()
+ <-bWrapper.serializer.Done()
+
+ // Close all subchannels after the LB policy is closed.
+ for ac := range conns {
+ ac.tearDown(errConnIdling)
+ }
+}
+
+// validateTransportCredentials performs a series of checks on the configured
+// transport credentials. It returns a non-nil error if any of these conditions
+// are met:
+// - no transport creds and no creds bundle is configured
+// - both transport creds and creds bundle are configured
+// - creds bundle is configured, but it lacks a transport credentials
+// - insecure transport creds configured alongside call creds that require
+// transport level security
+//
+// If none of the above conditions are met, the configured credentials are
+// deemed valid and a nil error is returned.
+func (cc *ClientConn) validateTransportCredentials() error {
+ if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
+ return errNoTransportSecurity
+ }
+ if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
+ return errTransportCredsAndBundle
+ }
+ if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil {
+ return errNoTransportCredsInBundle
+ }
+ transportCreds := cc.dopts.copts.TransportCredentials
+ if transportCreds == nil {
+ transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials()
+ }
+ if transportCreds.Info().SecurityProtocol == "insecure" {
+ for _, cd := range cc.dopts.copts.PerRPCCredentials {
+ if cd.RequireTransportSecurity() {
+ return errTransportCredentialsMissing
}
}
}
+ return nil
+}
- return cc, nil
+// channelzRegistration registers the newly created ClientConn with channelz and
+// stores the returned identifier in `cc.channelz`. A channelz trace event is
+// emitted for ClientConn creation. If the newly created ClientConn is a nested
+// one, i.e a valid parent ClientConn ID is specified via a dial option, the
+// trace event is also added to the parent.
+//
+// Doesn't grab cc.mu as this method is expected to be called only at Dial time.
+func (cc *ClientConn) channelzRegistration(target string) {
+ parentChannel, _ := cc.dopts.channelzParent.(*channelz.Channel)
+ cc.channelz = channelz.RegisterChannel(parentChannel, target)
+ cc.addTraceEvent(fmt.Sprintf("created for target %q", target))
}
// chainUnaryClientInterceptors chains all unary client interceptors into one.
@@ -345,7 +473,7 @@
} else if len(interceptors) == 1 {
chainedInt = interceptors[0]
} else {
- chainedInt = func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error {
+ chainedInt = func(ctx context.Context, method string, req, reply any, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error {
return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...)
}
}
@@ -357,7 +485,7 @@
if curr == len(interceptors)-1 {
return finalInvoker
}
- return func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error {
+ return func(ctx context.Context, method string, req, reply any, cc *ClientConn, opts ...CallOption) error {
return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...)
}
}
@@ -393,13 +521,27 @@
}
}
+// newConnectivityStateManager creates an connectivityStateManager with
+// the specified channel.
+func newConnectivityStateManager(ctx context.Context, channel *channelz.Channel) *connectivityStateManager {
+ return &connectivityStateManager{
+ channelz: channel,
+ pubSub: grpcsync.NewPubSub(ctx),
+ }
+}
+
// connectivityStateManager keeps the connectivity.State of ClientConn.
// This struct will eventually be exported so the balancers can access it.
+//
+// TODO: If possible, get rid of the `connectivityStateManager` type, and
+// provide this functionality using the `PubSub`, to avoid keeping track of
+// the connectivity state at two places.
type connectivityStateManager struct {
mu sync.Mutex
state connectivity.State
notifyChan chan struct{}
- channelzID int64
+ channelz *channelz.Channel
+ pubSub *grpcsync.PubSub
}
// updateState updates the connectivity.State of ClientConn.
@@ -415,12 +557,10 @@
return
}
csm.state = state
- if channelz.IsOn() {
- channelz.AddTraceEvent(csm.channelzID, &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Channel Connectivity change to %v", state),
- Severity: channelz.CtINFO,
- })
- }
+ csm.channelz.ChannelMetrics.State.Store(&state)
+ csm.pubSub.Publish(state)
+
+ channelz.Infof(logger, csm.channelz, "Channel Connectivity change to %v", state)
if csm.notifyChan != nil {
// There are other goroutines waiting on this channel.
close(csm.notifyChan)
@@ -443,6 +583,20 @@
return csm.notifyChan
}
+// ClientConnInterface defines the functions clients need to perform unary and
+// streaming RPCs. It is implemented by *ClientConn, and is only intended to
+// be referenced by generated code.
+type ClientConnInterface interface {
+ // Invoke performs a unary RPC and returns after the response is received
+ // into reply.
+ Invoke(ctx context.Context, method string, args any, reply any, opts ...CallOption) error
+ // NewStream begins a streaming RPC.
+ NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error)
+}
+
+// Assert *ClientConn implements ClientConnInterface.
+var _ ClientConnInterface = (*ClientConn)(nil)
+
// ClientConn represents a virtual connection to a conceptual endpoint, to
// perform RPCs.
//
@@ -456,37 +610,47 @@
// handshakes. It also handles errors on established connections by
// re-resolving the name and reconnecting.
type ClientConn struct {
- ctx context.Context
- cancel context.CancelFunc
+ ctx context.Context // Initialized using the background context at dial time.
+ cancel context.CancelFunc // Cancelled on close.
- target string
- parsedTarget resolver.Target
- authority string
- dopts dialOptions
- csMgr *connectivityStateManager
+ // The following are initialized at dial time, and are read-only after that.
+ target string // User's dial target.
+ parsedTarget resolver.Target // See initParsedTargetAndResolverBuilder().
+ authority string // See initAuthority().
+ dopts dialOptions // Default and user specified dial options.
+ channelz *channelz.Channel // Channelz object.
+ resolverBuilder resolver.Builder // See initParsedTargetAndResolverBuilder().
+ idlenessMgr *idle.Manager
+ metricsRecorderList *stats.MetricsRecorderList
- balancerBuildOpts balancer.BuildOptions
- blockingpicker *pickerWrapper
+ // The following provide their own synchronization, and therefore don't
+ // require cc.mu to be held to access them.
+ csMgr *connectivityStateManager
+ pickerWrapper *pickerWrapper
+ safeConfigSelector iresolver.SafeConfigSelector
+ retryThrottler atomic.Value // Updated from service config.
+ // mu protects the following fields.
+ // TODO: split mu so the same mutex isn't used for everything.
mu sync.RWMutex
- resolverWrapper *ccResolverWrapper
- sc *ServiceConfig
- conns map[*addrConn]struct{}
- // Keepalive parameter can be updated if a GoAway is received.
- mkp keepalive.ClientParameters
- curBalancerName string
- balancerWrapper *ccBalancerWrapper
- retryThrottler atomic.Value
-
+ resolverWrapper *ccResolverWrapper // Always recreated whenever entering idle to simplify Close.
+ balancerWrapper *ccBalancerWrapper // Always recreated whenever entering idle to simplify Close.
+ sc *ServiceConfig // Latest service config received from the resolver.
+ conns map[*addrConn]struct{} // Set to nil on close.
+ keepaliveParams keepalive.ClientParameters // May be updated upon receipt of a GoAway.
+ // firstResolveEvent is used to track whether the name resolver sent us at
+ // least one update. RPCs block on this event. May be accessed without mu
+ // if we know we cannot be asked to enter idle mode while accessing it (e.g.
+ // when the idle manager has already been closed, or if we are already
+ // entering idle mode).
firstResolveEvent *grpcsync.Event
- channelzID int64 // channelz unique identification number
- czData *channelzData
+ lceMu sync.Mutex // protects lastConnectionError
+ lastConnectionError error
}
// WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
// ctx expires. A true value is returned in former case and false in latter.
-// This is an EXPERIMENTAL API.
func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool {
ch := cc.csMgr.getNotifyChan()
if cc.csMgr.getState() != sourceState {
@@ -501,73 +665,92 @@
}
// GetState returns the connectivity.State of ClientConn.
-// This is an EXPERIMENTAL API.
func (cc *ClientConn) GetState() connectivity.State {
return cc.csMgr.getState()
}
-func (cc *ClientConn) scWatcher() {
- for {
- select {
- case sc, ok := <-cc.dopts.scChan:
- if !ok {
- return
- }
- cc.mu.Lock()
- // TODO: load balance policy runtime change is ignored.
- // We may revisit this decision in the future.
- cc.sc = &sc
- cc.mu.Unlock()
- case <-cc.ctx.Done():
- return
- }
+// Connect causes all subchannels in the ClientConn to attempt to connect if
+// the channel is idle. Does not wait for the connection attempts to begin
+// before returning.
+//
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a later
+// release.
+func (cc *ClientConn) Connect() {
+ if err := cc.idlenessMgr.ExitIdleMode(); err != nil {
+ cc.addTraceEvent(err.Error())
+ return
}
+ // If the ClientConn was not in idle mode, we need to call ExitIdle on the
+ // LB policy so that connections can be created.
+ cc.mu.Lock()
+ cc.balancerWrapper.exitIdle()
+ cc.mu.Unlock()
}
-// waitForResolvedAddrs blocks until the resolver has provided addresses or the
-// context expires. Returns nil unless the context expires first; otherwise
-// returns a status error based on the context.
-func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error {
+// waitForResolvedAddrs blocks until the resolver provides addresses or the
+// context expires, whichever happens first.
+//
+// Error is nil unless the context expires first; otherwise returns a status
+// error based on the context.
+//
+// The returned boolean indicates whether it did block or not. If the
+// resolution has already happened once before, it returns false without
+// blocking. Otherwise, it wait for the resolution and return true if
+// resolution has succeeded or return false along with error if resolution has
+// failed.
+func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) (bool, error) {
// This is on the RPC path, so we use a fast path to avoid the
// more-expensive "select" below after the resolver has returned once.
if cc.firstResolveEvent.HasFired() {
- return nil
+ return false, nil
}
+ internal.NewStreamWaitingForResolver()
select {
case <-cc.firstResolveEvent.Done():
- return nil
+ return true, nil
case <-ctx.Done():
- return status.FromContextError(ctx.Err()).Err()
+ return false, status.FromContextError(ctx.Err()).Err()
case <-cc.ctx.Done():
- return ErrClientConnClosing
+ return false, ErrClientConnClosing
}
}
var emptyServiceConfig *ServiceConfig
func init() {
- cfg := parseServiceConfig("{}")
+ cfg := parseServiceConfig("{}", defaultMaxCallAttempts)
if cfg.Err != nil {
panic(fmt.Sprintf("impossible error parsing empty service config: %v", cfg.Err))
}
emptyServiceConfig = cfg.Config.(*ServiceConfig)
+
+ internal.SubscribeToConnectivityStateChanges = func(cc *ClientConn, s grpcsync.Subscriber) func() {
+ return cc.csMgr.pubSub.Subscribe(s)
+ }
+ internal.EnterIdleModeForTesting = func(cc *ClientConn) {
+ cc.idlenessMgr.EnterIdleModeForTesting()
+ }
+ internal.ExitIdleModeForTesting = func(cc *ClientConn) error {
+ return cc.idlenessMgr.ExitIdleMode()
+ }
}
-func (cc *ClientConn) maybeApplyDefaultServiceConfig(addrs []resolver.Address) {
+func (cc *ClientConn) maybeApplyDefaultServiceConfig() {
if cc.sc != nil {
- cc.applyServiceConfigAndBalancer(cc.sc, addrs)
+ cc.applyServiceConfigAndBalancer(cc.sc, nil)
return
}
if cc.dopts.defaultServiceConfig != nil {
- cc.applyServiceConfigAndBalancer(cc.dopts.defaultServiceConfig, addrs)
+ cc.applyServiceConfigAndBalancer(cc.dopts.defaultServiceConfig, &defaultConfigSelector{cc.dopts.defaultServiceConfig})
} else {
- cc.applyServiceConfigAndBalancer(emptyServiceConfig, addrs)
+ cc.applyServiceConfigAndBalancer(emptyServiceConfig, &defaultConfigSelector{emptyServiceConfig})
}
}
-func (cc *ClientConn) updateResolverState(s resolver.State, err error) error {
+func (cc *ClientConn) updateResolverStateAndUnlock(s resolver.State, err error) error {
defer cc.firstResolveEvent.Fire()
- cc.mu.Lock()
// Check if the ClientConn is already closed. Some fields (e.g.
// balancerWrapper) are set to nil when closing the ClientConn, and could
// cause nil pointer panic if we don't have this check.
@@ -580,11 +763,9 @@
// May need to apply the initial service config in case the resolver
// doesn't support service configs, or doesn't provide a service config
// with the new addresses.
- cc.maybeApplyDefaultServiceConfig(nil)
+ cc.maybeApplyDefaultServiceConfig()
- if cc.balancerWrapper != nil {
- cc.balancerWrapper.resolverError(err)
- }
+ cc.balancerWrapper.resolverError(err)
// No addresses are valid with err set; return early.
cc.mu.Unlock()
@@ -592,49 +773,40 @@
}
var ret error
- if cc.dopts.disableServiceConfig || s.ServiceConfig == nil {
- cc.maybeApplyDefaultServiceConfig(s.Addresses)
+ if cc.dopts.disableServiceConfig {
+ channelz.Infof(logger, cc.channelz, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig)
+ cc.maybeApplyDefaultServiceConfig()
+ } else if s.ServiceConfig == nil {
+ cc.maybeApplyDefaultServiceConfig()
// TODO: do we need to apply a failing LB policy if there is no
// default, per the error handling design?
} else {
if sc, ok := s.ServiceConfig.Config.(*ServiceConfig); s.ServiceConfig.Err == nil && ok {
- cc.applyServiceConfigAndBalancer(sc, s.Addresses)
+ configSelector := iresolver.GetConfigSelector(s)
+ if configSelector != nil {
+ if len(s.ServiceConfig.Config.(*ServiceConfig).Methods) != 0 {
+ channelz.Infof(logger, cc.channelz, "method configs in service config will be ignored due to presence of config selector")
+ }
+ } else {
+ configSelector = &defaultConfigSelector{sc}
+ }
+ cc.applyServiceConfigAndBalancer(sc, configSelector)
} else {
ret = balancer.ErrBadResolverState
- if cc.balancerWrapper == nil {
- var err error
- if s.ServiceConfig.Err != nil {
- err = status.Errorf(codes.Unavailable, "error parsing service config: %v", s.ServiceConfig.Err)
- } else {
- err = status.Errorf(codes.Unavailable, "illegal service config type: %T", s.ServiceConfig.Config)
- }
- cc.blockingpicker.updatePicker(base.NewErrPicker(err))
- cc.csMgr.updateState(connectivity.TransientFailure)
+ if cc.sc == nil {
+ // Apply the failing LB only if we haven't received valid service config
+ // from the name resolver in the past.
+ cc.applyFailingLBLocked(s.ServiceConfig)
cc.mu.Unlock()
return ret
}
}
}
- var balCfg serviceconfig.LoadBalancingConfig
- if cc.dopts.balancerBuilder == nil && cc.sc != nil && cc.sc.lbConfig != nil {
- balCfg = cc.sc.lbConfig.cfg
- }
-
- cbn := cc.curBalancerName
+ balCfg := cc.sc.lbConfig
bw := cc.balancerWrapper
cc.mu.Unlock()
- if cbn != grpclbName {
- // Filter any grpclb addresses since we don't have the grpclb balancer.
- for i := 0; i < len(s.Addresses); {
- if s.Addresses[i].Type == resolver.GRPCLB {
- copy(s.Addresses[i:], s.Addresses[i+1:])
- s.Addresses = s.Addresses[:len(s.Addresses)-1]
- continue
- }
- i++
- }
- }
+
uccsErr := bw.updateClientConnState(&balancer.ClientConnState{ResolverState: s, BalancerConfig: balCfg})
if ret == nil {
ret = uccsErr // prefer ErrBadResolver state since any other error is
@@ -643,95 +815,65 @@
return ret
}
-// switchBalancer starts the switching from current balancer to the balancer
-// with the given name.
-//
-// It will NOT send the current address list to the new balancer. If needed,
-// caller of this function should send address list to the new balancer after
-// this function returns.
-//
-// Caller must hold cc.mu.
-func (cc *ClientConn) switchBalancer(name string) {
- if strings.EqualFold(cc.curBalancerName, name) {
- return
+// applyFailingLBLocked is akin to configuring an LB policy on the channel which
+// always fails RPCs. Here, an actual LB policy is not configured, but an always
+// erroring picker is configured, which returns errors with information about
+// what was invalid in the received service config. A config selector with no
+// service config is configured, and the connectivity state of the channel is
+// set to TransientFailure.
+func (cc *ClientConn) applyFailingLBLocked(sc *serviceconfig.ParseResult) {
+ var err error
+ if sc.Err != nil {
+ err = status.Errorf(codes.Unavailable, "error parsing service config: %v", sc.Err)
+ } else {
+ err = status.Errorf(codes.Unavailable, "illegal service config type: %T", sc.Config)
}
-
- grpclog.Infof("ClientConn switching balancer to %q", name)
- if cc.dopts.balancerBuilder != nil {
- grpclog.Infoln("ignoring balancer switching: Balancer DialOption used instead")
- return
- }
- if cc.balancerWrapper != nil {
- cc.balancerWrapper.close()
- }
-
- builder := balancer.Get(name)
- if channelz.IsOn() {
- if builder == nil {
- channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName),
- Severity: channelz.CtWarning,
- })
- } else {
- channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Channel switches to new LB policy %q", name),
- Severity: channelz.CtINFO,
- })
- }
- }
- if builder == nil {
- grpclog.Infof("failed to get balancer builder for: %v, using pick_first instead", name)
- builder = newPickfirstBuilder()
- }
-
- cc.curBalancerName = builder.Name()
- cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts)
+ cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil})
+ cc.pickerWrapper.updatePicker(base.NewErrPicker(err))
+ cc.csMgr.updateState(connectivity.TransientFailure)
}
-func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State) {
- cc.mu.Lock()
- if cc.conns == nil {
- cc.mu.Unlock()
- return
- }
- // TODO(bar switching) send updates to all balancer wrappers when balancer
- // gracefully switching is supported.
- cc.balancerWrapper.handleSubConnStateChange(sc, s)
- cc.mu.Unlock()
+// Makes a copy of the input addresses slice. Addresses are passed during
+// subconn creation and address update operations.
+func copyAddresses(in []resolver.Address) []resolver.Address {
+ out := make([]resolver.Address, len(in))
+ copy(out, in)
+ return out
}
-// newAddrConn creates an addrConn for addrs and adds it to cc.conns.
+// newAddrConnLocked creates an addrConn for addrs and adds it to cc.conns.
//
// Caller needs to make sure len(addrs) > 0.
-func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
+func (cc *ClientConn) newAddrConnLocked(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
+ if cc.conns == nil {
+ return nil, ErrClientConnClosing
+ }
+
ac := &addrConn{
+ state: connectivity.Idle,
cc: cc,
- addrs: addrs,
+ addrs: copyAddresses(addrs),
scopts: opts,
dopts: cc.dopts,
- czData: new(channelzData),
+ channelz: channelz.RegisterSubChannel(cc.channelz, ""),
resetBackoff: make(chan struct{}),
}
ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
+ // Start with our address set to the first address; this may be updated if
+ // we connect to different addresses.
+ ac.channelz.ChannelMetrics.Target.Store(&addrs[0].Addr)
+
+ channelz.AddTraceEvent(logger, ac.channelz, 0, &channelz.TraceEvent{
+ Desc: "Subchannel created",
+ Severity: channelz.CtInfo,
+ Parent: &channelz.TraceEvent{
+ Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelz.ID),
+ Severity: channelz.CtInfo,
+ },
+ })
+
// Track ac in cc. This needs to be done before any getTransport(...) is called.
- cc.mu.Lock()
- if cc.conns == nil {
- cc.mu.Unlock()
- return nil, ErrClientConnClosing
- }
- if channelz.IsOn() {
- ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "")
- channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
- Desc: "Subchannel Created",
- Severity: channelz.CtINFO,
- Parent: &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID),
- Severity: channelz.CtINFO,
- },
- })
- }
cc.conns[ac] = struct{}{}
- cc.mu.Unlock()
return ac, nil
}
@@ -748,34 +890,33 @@
ac.tearDown(err)
}
-func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric {
- return &channelz.ChannelInternalMetric{
- State: cc.GetState(),
- Target: cc.target,
- CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted),
- CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded),
- CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed),
- LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)),
- }
-}
-
// Target returns the target string of the ClientConn.
-// This is an EXPERIMENTAL API.
func (cc *ClientConn) Target() string {
return cc.target
}
+// CanonicalTarget returns the canonical target string used when creating cc.
+//
+// This always has the form "<scheme>://[authority]/<endpoint>". For example:
+//
+// - "dns:///example.com:42"
+// - "dns://8.8.8.8/example.com:42"
+// - "unix:///path/to/socket"
+func (cc *ClientConn) CanonicalTarget() string {
+ return cc.parsedTarget.String()
+}
+
func (cc *ClientConn) incrCallsStarted() {
- atomic.AddInt64(&cc.czData.callsStarted, 1)
- atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano())
+ cc.channelz.ChannelMetrics.CallsStarted.Add(1)
+ cc.channelz.ChannelMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano())
}
func (cc *ClientConn) incrCallsSucceeded() {
- atomic.AddInt64(&cc.czData.callsSucceeded, 1)
+ cc.channelz.ChannelMetrics.CallsSucceeded.Add(1)
}
func (cc *ClientConn) incrCallsFailed() {
- atomic.AddInt64(&cc.czData.callsFailed, 1)
+ cc.channelz.ChannelMetrics.CallsFailed.Add(1)
}
// connect starts creating a transport.
@@ -784,89 +925,146 @@
func (ac *addrConn) connect() error {
ac.mu.Lock()
if ac.state == connectivity.Shutdown {
+ if logger.V(2) {
+ logger.Infof("connect called on shutdown addrConn; ignoring.")
+ }
ac.mu.Unlock()
return errConnClosing
}
if ac.state != connectivity.Idle {
+ if logger.V(2) {
+ logger.Infof("connect called on addrConn in non-idle state (%v); ignoring.", ac.state)
+ }
ac.mu.Unlock()
return nil
}
- // Update connectivity state within the lock to prevent subsequent or
- // concurrent calls from resetting the transport more than once.
- ac.updateConnectivityState(connectivity.Connecting)
- ac.mu.Unlock()
- // Start a goroutine connecting to the server asynchronously.
- go ac.resetTransport()
+ ac.resetTransportAndUnlock()
return nil
}
-// tryUpdateAddrs tries to update ac.addrs with the new addresses list.
-//
-// If ac is Connecting, it returns false. The caller should tear down the ac and
-// create a new one. Note that the backoff will be reset when this happens.
-//
-// If ac is TransientFailure, it updates ac.addrs and returns true. The updated
-// addresses will be picked up by retry in the next iteration after backoff.
-//
-// If ac is Shutdown or Idle, it updates ac.addrs and returns true.
-//
-// If ac is Ready, it checks whether current connected address of ac is in the
-// new addrs list.
-// - If true, it updates ac.addrs and returns true. The ac will keep using
-// the existing connection.
-// - If false, it does nothing and returns false.
-func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool {
+// equalAddressIgnoringBalAttributes returns true is a and b are considered equal.
+// This is different from the Equal method on the resolver.Address type which
+// considers all fields to determine equality. Here, we only consider fields
+// that are meaningful to the subConn.
+func equalAddressIgnoringBalAttributes(a, b *resolver.Address) bool {
+ return a.Addr == b.Addr && a.ServerName == b.ServerName &&
+ a.Attributes.Equal(b.Attributes) &&
+ a.Metadata == b.Metadata
+}
+
+func equalAddressesIgnoringBalAttributes(a, b []resolver.Address) bool {
+ return slices.EqualFunc(a, b, func(a, b resolver.Address) bool { return equalAddressIgnoringBalAttributes(&a, &b) })
+}
+
+// updateAddrs updates ac.addrs with the new addresses list and handles active
+// connections or connection attempts.
+func (ac *addrConn) updateAddrs(addrs []resolver.Address) {
+ addrs = copyAddresses(addrs)
+ limit := len(addrs)
+ if limit > 5 {
+ limit = 5
+ }
+ channelz.Infof(logger, ac.channelz, "addrConn: updateAddrs addrs (%d of %d): %v", limit, len(addrs), addrs[:limit])
+
ac.mu.Lock()
- defer ac.mu.Unlock()
- grpclog.Infof("addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
+ if equalAddressesIgnoringBalAttributes(ac.addrs, addrs) {
+ ac.mu.Unlock()
+ return
+ }
+
+ ac.addrs = addrs
+
if ac.state == connectivity.Shutdown ||
ac.state == connectivity.TransientFailure ||
ac.state == connectivity.Idle {
- ac.addrs = addrs
- return true
+ // We were not connecting, so do nothing but update the addresses.
+ ac.mu.Unlock()
+ return
}
- if ac.state == connectivity.Connecting {
- return false
- }
-
- // ac.state is Ready, try to find the connected address.
- var curAddrFound bool
- for _, a := range addrs {
- if reflect.DeepEqual(ac.curAddr, a) {
- curAddrFound = true
- break
+ if ac.state == connectivity.Ready {
+ // Try to find the connected address.
+ for _, a := range addrs {
+ a.ServerName = ac.cc.getServerName(a)
+ if equalAddressIgnoringBalAttributes(&a, &ac.curAddr) {
+ // We are connected to a valid address, so do nothing but
+ // update the addresses.
+ ac.mu.Unlock()
+ return
+ }
}
}
- grpclog.Infof("addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound)
- if curAddrFound {
- ac.addrs = addrs
+
+ // We are either connected to the wrong address or currently connecting.
+ // Stop the current iteration and restart.
+
+ ac.cancel()
+ ac.ctx, ac.cancel = context.WithCancel(ac.cc.ctx)
+
+ // We have to defer here because GracefulClose => onClose, which requires
+ // locking ac.mu.
+ if ac.transport != nil {
+ defer ac.transport.GracefulClose()
+ ac.transport = nil
}
- return curAddrFound
+ if len(addrs) == 0 {
+ ac.updateConnectivityState(connectivity.Idle, nil)
+ }
+
+ // Since we were connecting/connected, we should start a new connection
+ // attempt.
+ go ac.resetTransportAndUnlock()
+}
+
+// getServerName determines the serverName to be used in the connection
+// handshake. The default value for the serverName is the authority on the
+// ClientConn, which either comes from the user's dial target or through an
+// authority override specified using the WithAuthority dial option. Name
+// resolvers can specify a per-address override for the serverName through the
+// resolver.Address.ServerName field which is used only if the WithAuthority
+// dial option was not used. The rationale is that per-address authority
+// overrides specified by the name resolver can represent a security risk, while
+// an override specified by the user is more dependable since they probably know
+// what they are doing.
+func (cc *ClientConn) getServerName(addr resolver.Address) string {
+ if cc.dopts.authority != "" {
+ return cc.dopts.authority
+ }
+ if addr.ServerName != "" {
+ return addr.ServerName
+ }
+ return cc.authority
+}
+
+func getMethodConfig(sc *ServiceConfig, method string) MethodConfig {
+ if sc == nil {
+ return MethodConfig{}
+ }
+ if m, ok := sc.Methods[method]; ok {
+ return m
+ }
+ i := strings.LastIndex(method, "/")
+ if m, ok := sc.Methods[method[:i+1]]; ok {
+ return m
+ }
+ return sc.Methods[""]
}
// GetMethodConfig gets the method config of the input method.
// If there's an exact match for input method (i.e. /service/method), we return
// the corresponding MethodConfig.
-// If there isn't an exact match for the input method, we look for the default config
-// under the service (i.e /service/). If there is a default MethodConfig for
-// the service, we return it.
+// If there isn't an exact match for the input method, we look for the service's default
+// config under the service (i.e /service/) and then for the default for all services (empty string).
+//
+// If there is a default MethodConfig for the service, we return it.
// Otherwise, we return an empty MethodConfig.
func (cc *ClientConn) GetMethodConfig(method string) MethodConfig {
// TODO: Avoid the locking here.
cc.mu.RLock()
defer cc.mu.RUnlock()
- if cc.sc == nil {
- return MethodConfig{}
- }
- m, ok := cc.sc.Methods[method]
- if !ok {
- i := strings.LastIndex(method, "/")
- m = cc.sc.Methods[method[:i+1]]
- }
- return m
+ return getMethodConfig(cc.sc, method)
}
func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
@@ -878,22 +1076,15 @@
return cc.sc.healthCheckConfig
}
-func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
- t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickOptions{
- FullMethodName: method,
- })
- if err != nil {
- return nil, nil, toRPCErr(err)
- }
- return t, done, nil
-}
-
-func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, addrs []resolver.Address) {
+func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSelector iresolver.ConfigSelector) {
if sc == nil {
// should never reach here.
return
}
cc.sc = sc
+ if configSelector != nil {
+ cc.safeConfigSelector.UpdateConfigSelector(configSelector)
+ }
if cc.sc.retryThrottling != nil {
newThrottler := &retryThrottler{
@@ -906,46 +1097,16 @@
} else {
cc.retryThrottler.Store((*retryThrottler)(nil))
}
-
- if cc.dopts.balancerBuilder == nil {
- // Only look at balancer types and switch balancer if balancer dial
- // option is not set.
- var newBalancerName string
- if cc.sc != nil && cc.sc.lbConfig != nil {
- newBalancerName = cc.sc.lbConfig.name
- } else {
- var isGRPCLB bool
- for _, a := range addrs {
- if a.Type == resolver.GRPCLB {
- isGRPCLB = true
- break
- }
- }
- if isGRPCLB {
- newBalancerName = grpclbName
- } else if cc.sc != nil && cc.sc.LB != nil {
- newBalancerName = *cc.sc.LB
- } else {
- newBalancerName = PickFirstBalancerName
- }
- }
- cc.switchBalancer(newBalancerName)
- } else if cc.balancerWrapper == nil {
- // Balancer dial option was set, and this is the first time handling
- // resolved addresses. Build a balancer with dopts.balancerBuilder.
- cc.curBalancerName = cc.dopts.balancerBuilder.Name()
- cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts)
- }
}
-func (cc *ClientConn) resolveNow(o resolver.ResolveNowOption) {
+func (cc *ClientConn) resolveNow(o resolver.ResolveNowOptions) {
cc.mu.RLock()
- r := cc.resolverWrapper
+ cc.resolverWrapper.resolveNow(o)
cc.mu.RUnlock()
- if r == nil {
- return
- }
- go r.resolveNow(o)
+}
+
+func (cc *ClientConn) resolveNowLocked(o resolver.ResolveNowOptions) {
+ cc.resolverWrapper.resolveNow(o)
}
// ResetConnectBackoff wakes up all subchannels in transient failure and causes
@@ -957,7 +1118,10 @@
// However, if a previously unavailable network becomes available, this may be
// used to trigger an immediate reconnect.
//
-// This API is EXPERIMENTAL.
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a
+// later release.
func (cc *ClientConn) ResetConnectBackoff() {
cc.mu.Lock()
conns := cc.conns
@@ -969,51 +1133,52 @@
// Close tears down the ClientConn and all underlying connections.
func (cc *ClientConn) Close() error {
- defer cc.cancel()
+ defer func() {
+ cc.cancel()
+ <-cc.csMgr.pubSub.Done()
+ }()
+
+ // Prevent calls to enter/exit idle immediately, and ensure we are not
+ // currently entering/exiting idle mode.
+ cc.idlenessMgr.Close()
cc.mu.Lock()
if cc.conns == nil {
cc.mu.Unlock()
return ErrClientConnClosing
}
+
conns := cc.conns
cc.conns = nil
cc.csMgr.updateState(connectivity.Shutdown)
- rWrapper := cc.resolverWrapper
- cc.resolverWrapper = nil
- bWrapper := cc.balancerWrapper
- cc.balancerWrapper = nil
+ // We can safely unlock and continue to access all fields now as
+ // cc.conns==nil, preventing any further operations on cc.
cc.mu.Unlock()
- cc.blockingpicker.close()
+ cc.resolverWrapper.close()
+ // The order of closing matters here since the balancer wrapper assumes the
+ // picker is closed before it is closed.
+ cc.pickerWrapper.close()
+ cc.balancerWrapper.close()
- if rWrapper != nil {
- rWrapper.close()
- }
- if bWrapper != nil {
- bWrapper.close()
- }
-
+ <-cc.resolverWrapper.serializer.Done()
+ <-cc.balancerWrapper.serializer.Done()
+ var wg sync.WaitGroup
for ac := range conns {
- ac.tearDown(ErrClientConnClosing)
+ wg.Add(1)
+ go func(ac *addrConn) {
+ defer wg.Done()
+ ac.tearDown(ErrClientConnClosing)
+ }(ac)
}
- if channelz.IsOn() {
- ted := &channelz.TraceEventDesc{
- Desc: "Channel Deleted",
- Severity: channelz.CtINFO,
- }
- if cc.dopts.channelzParentID != 0 {
- ted.Parent = &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID),
- Severity: channelz.CtINFO,
- }
- }
- channelz.AddTraceEvent(cc.channelzID, ted)
- // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
- // the entity being deleted, and thus prevent it from being deleted right away.
- channelz.RemoveEntry(cc.channelzID)
- }
+ wg.Wait()
+ cc.addTraceEvent("deleted")
+ // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add
+ // trace reference to the entity being deleted, and thus prevent it from being
+ // deleted right away.
+ channelz.RemoveEntry(cc.channelz.ID)
+
return nil
}
@@ -1024,7 +1189,7 @@
cc *ClientConn
dopts dialOptions
- acbw balancer.SubConn
+ acbw *acBalancerWrapper
scopts balancer.NewSubConnOptions
// transport is set when there's a viable transport (note: ac state may not be READY as LB channel
@@ -1033,6 +1198,10 @@
// is received, transport is closed, ac has been torn down).
transport transport.ClientTransport // The current transport.
+ // This mutex is used on the RPC path, so its usage should be minimized as
+ // much as possible.
+ // TODO: Find a lock-free way to retrieve the transport and state from the
+ // addrConn.
mu sync.Mutex
curAddr resolver.Address // The current address.
addrs []resolver.Address // All addresses that the resolver resolved to.
@@ -1043,151 +1212,128 @@
backoffIdx int // Needs to be stateful for resetConnectBackoff.
resetBackoff chan struct{}
- channelzID int64 // channelz unique identification number.
- czData *channelzData
+ channelz *channelz.SubChannel
}
// Note: this requires a lock on ac.mu.
-func (ac *addrConn) updateConnectivityState(s connectivity.State) {
+func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) {
if ac.state == s {
return
}
-
- updateMsg := fmt.Sprintf("Subchannel Connectivity change to %v", s)
ac.state = s
- if channelz.IsOn() {
- channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
- Desc: updateMsg,
- Severity: channelz.CtINFO,
- })
+ ac.channelz.ChannelMetrics.State.Store(&s)
+ if lastErr == nil {
+ channelz.Infof(logger, ac.channelz, "Subchannel Connectivity change to %v", s)
+ } else {
+ channelz.Infof(logger, ac.channelz, "Subchannel Connectivity change to %v, last error: %s", s, lastErr)
}
- ac.cc.handleSubConnStateChange(ac.acbw, s)
+ ac.acbw.updateState(s, ac.curAddr, lastErr)
}
// adjustParams updates parameters used to create transports upon
// receiving a GoAway.
func (ac *addrConn) adjustParams(r transport.GoAwayReason) {
- switch r {
- case transport.GoAwayTooManyPings:
+ if r == transport.GoAwayTooManyPings {
v := 2 * ac.dopts.copts.KeepaliveParams.Time
ac.cc.mu.Lock()
- if v > ac.cc.mkp.Time {
- ac.cc.mkp.Time = v
+ if v > ac.cc.keepaliveParams.Time {
+ ac.cc.keepaliveParams.Time = v
}
ac.cc.mu.Unlock()
}
}
-func (ac *addrConn) resetTransport() {
- for i := 0; ; i++ {
- if i > 0 {
- ac.cc.resolveNow(resolver.ResolveNowOption{})
- }
-
- ac.mu.Lock()
- if ac.state == connectivity.Shutdown {
- ac.mu.Unlock()
- return
- }
-
- addrs := ac.addrs
- backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx)
- // This will be the duration that dial gets to finish.
- dialDuration := minConnectTimeout
- if ac.dopts.minConnectTimeout != nil {
- dialDuration = ac.dopts.minConnectTimeout()
- }
-
- if dialDuration < backoffFor {
- // Give dial more time as we keep failing to connect.
- dialDuration = backoffFor
- }
- // We can potentially spend all the time trying the first address, and
- // if the server accepts the connection and then hangs, the following
- // addresses will never be tried.
- //
- // The spec doesn't mention what should be done for multiple addresses.
- // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm
- connectDeadline := time.Now().Add(dialDuration)
-
- ac.updateConnectivityState(connectivity.Connecting)
- ac.transport = nil
+// resetTransportAndUnlock unconditionally connects the addrConn.
+//
+// ac.mu must be held by the caller, and this function will guarantee it is released.
+func (ac *addrConn) resetTransportAndUnlock() {
+ acCtx := ac.ctx
+ if acCtx.Err() != nil {
ac.mu.Unlock()
-
- newTr, addr, reconnect, err := ac.tryAllAddrs(addrs, connectDeadline)
- if err != nil {
- // After exhausting all addresses, the addrConn enters
- // TRANSIENT_FAILURE.
- ac.mu.Lock()
- if ac.state == connectivity.Shutdown {
- ac.mu.Unlock()
- return
- }
- ac.updateConnectivityState(connectivity.TransientFailure)
-
- // Backoff.
- b := ac.resetBackoff
- ac.mu.Unlock()
-
- timer := time.NewTimer(backoffFor)
- select {
- case <-timer.C:
- ac.mu.Lock()
- ac.backoffIdx++
- ac.mu.Unlock()
- case <-b:
- timer.Stop()
- case <-ac.ctx.Done():
- timer.Stop()
- return
- }
- continue
- }
-
- ac.mu.Lock()
- if ac.state == connectivity.Shutdown {
- ac.mu.Unlock()
- newTr.Close()
- return
- }
- ac.curAddr = addr
- ac.transport = newTr
- ac.backoffIdx = 0
-
- hctx, hcancel := context.WithCancel(ac.ctx)
- ac.startHealthCheck(hctx)
- ac.mu.Unlock()
-
- // Block until the created transport is down. And when this happens,
- // we restart from the top of the addr list.
- <-reconnect.Done()
- hcancel()
- // restart connecting - the top of the loop will set state to
- // CONNECTING. This is against the current connectivity semantics doc,
- // however it allows for graceful behavior for RPCs not yet dispatched
- // - unfortunate timing would otherwise lead to the RPC failing even
- // though the TRANSIENT_FAILURE state (called for by the doc) would be
- // instantaneous.
- //
- // Ideally we should transition to Idle here and block until there is
- // RPC activity that leads to the balancer requesting a reconnect of
- // the associated SubConn.
+ return
}
+
+ addrs := ac.addrs
+ backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx)
+ // This will be the duration that dial gets to finish.
+ dialDuration := minConnectTimeout
+ if ac.dopts.minConnectTimeout != nil {
+ dialDuration = ac.dopts.minConnectTimeout()
+ }
+
+ if dialDuration < backoffFor {
+ // Give dial more time as we keep failing to connect.
+ dialDuration = backoffFor
+ }
+ // We can potentially spend all the time trying the first address, and
+ // if the server accepts the connection and then hangs, the following
+ // addresses will never be tried.
+ //
+ // The spec doesn't mention what should be done for multiple addresses.
+ // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm
+ connectDeadline := time.Now().Add(dialDuration)
+
+ ac.updateConnectivityState(connectivity.Connecting, nil)
+ ac.mu.Unlock()
+
+ if err := ac.tryAllAddrs(acCtx, addrs, connectDeadline); err != nil {
+ // TODO: #7534 - Move re-resolution requests into the pick_first LB policy
+ // to ensure one resolution request per pass instead of per subconn failure.
+ ac.cc.resolveNow(resolver.ResolveNowOptions{})
+ ac.mu.Lock()
+ if acCtx.Err() != nil {
+ // addrConn was torn down.
+ ac.mu.Unlock()
+ return
+ }
+ // After exhausting all addresses, the addrConn enters
+ // TRANSIENT_FAILURE.
+ ac.updateConnectivityState(connectivity.TransientFailure, err)
+
+ // Backoff.
+ b := ac.resetBackoff
+ ac.mu.Unlock()
+
+ timer := time.NewTimer(backoffFor)
+ select {
+ case <-timer.C:
+ ac.mu.Lock()
+ ac.backoffIdx++
+ ac.mu.Unlock()
+ case <-b:
+ timer.Stop()
+ case <-acCtx.Done():
+ timer.Stop()
+ return
+ }
+
+ ac.mu.Lock()
+ if acCtx.Err() == nil {
+ ac.updateConnectivityState(connectivity.Idle, err)
+ }
+ ac.mu.Unlock()
+ return
+ }
+ // Success; reset backoff.
+ ac.mu.Lock()
+ ac.backoffIdx = 0
+ ac.mu.Unlock()
}
-// tryAllAddrs tries to creates a connection to the addresses, and stop when at the
-// first successful one. It returns the transport, the address and a Event in
-// the successful case. The Event fires when the returned transport disconnects.
-func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) (transport.ClientTransport, resolver.Address, *grpcsync.Event, error) {
+// tryAllAddrs tries to create a connection to the addresses, and stop when at
+// the first successful one. It returns an error if no address was successfully
+// connected, or updates ac appropriately with the new transport.
+func (ac *addrConn) tryAllAddrs(ctx context.Context, addrs []resolver.Address, connectDeadline time.Time) error {
+ var firstConnErr error
for _, addr := range addrs {
- ac.mu.Lock()
- if ac.state == connectivity.Shutdown {
- ac.mu.Unlock()
- return nil, resolver.Address{}, nil, errConnClosing
+ ac.channelz.ChannelMetrics.Target.Store(&addr.Addr)
+ if ctx.Err() != nil {
+ return errConnClosing
}
+ ac.mu.Lock()
ac.cc.mu.RLock()
- ac.dopts.copts.KeepaliveParams = ac.cc.mkp
+ ac.dopts.copts.KeepaliveParams = ac.cc.keepaliveParams
ac.cc.mu.RUnlock()
copts := ac.dopts.copts
@@ -1196,108 +1342,103 @@
}
ac.mu.Unlock()
- if channelz.IsOn() {
- channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Subchannel picks a new address %q to connect", addr.Addr),
- Severity: channelz.CtINFO,
- })
- }
+ channelz.Infof(logger, ac.channelz, "Subchannel picks a new address %q to connect", addr.Addr)
- newTr, reconnect, err := ac.createTransport(addr, copts, connectDeadline)
+ err := ac.createTransport(ctx, addr, copts, connectDeadline)
if err == nil {
- return newTr, addr, reconnect, nil
+ return nil
}
- ac.cc.blockingpicker.updateConnectionError(err)
+ if firstConnErr == nil {
+ firstConnErr = err
+ }
+ ac.cc.updateConnectionError(err)
}
// Couldn't connect to any address.
- return nil, resolver.Address{}, nil, fmt.Errorf("couldn't connect to any address")
+ return firstConnErr
}
-// createTransport creates a connection to addr. It returns the transport and a
-// Event in the successful case. The Event fires when the returned transport
-// disconnects.
-func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) (transport.ClientTransport, *grpcsync.Event, error) {
- prefaceReceived := make(chan struct{})
- onCloseCalled := make(chan struct{})
- reconnect := grpcsync.NewEvent()
+// createTransport creates a connection to addr. It returns an error if the
+// address was not successfully connected, or updates ac appropriately with the
+// new transport.
+func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error {
+ addr.ServerName = ac.cc.getServerName(addr)
+ hctx, hcancel := context.WithCancel(ctx)
- authority := ac.cc.authority
- // addr.ServerName takes precedent over ClientConn authority, if present.
- if addr.ServerName != "" {
- authority = addr.ServerName
- }
-
- target := transport.TargetInfo{
- Addr: addr.Addr,
- Metadata: addr.Metadata,
- Authority: authority,
- }
-
- once := sync.Once{}
- onGoAway := func(r transport.GoAwayReason) {
+ onClose := func(r transport.GoAwayReason) {
ac.mu.Lock()
+ defer ac.mu.Unlock()
+ // adjust params based on GoAwayReason
ac.adjustParams(r)
- once.Do(func() {
- if ac.state == connectivity.Ready {
- // Prevent this SubConn from being used for new RPCs by setting its
- // state to Connecting.
- //
- // TODO: this should be Idle when grpc-go properly supports it.
- ac.updateConnectivityState(connectivity.Connecting)
- }
- })
- ac.mu.Unlock()
- reconnect.Fire()
+ if ctx.Err() != nil {
+ // Already shut down or connection attempt canceled. tearDown() or
+ // updateAddrs() already cleared the transport and canceled hctx
+ // via ac.ctx, and we expected this connection to be closed, so do
+ // nothing here.
+ return
+ }
+ hcancel()
+ if ac.transport == nil {
+ // We're still connecting to this address, which could error. Do
+ // not update the connectivity state or resolve; these will happen
+ // at the end of the tryAllAddrs connection loop in the event of an
+ // error.
+ return
+ }
+ ac.transport = nil
+ // Refresh the name resolver on any connection loss.
+ ac.cc.resolveNow(resolver.ResolveNowOptions{})
+ // Always go idle and wait for the LB policy to initiate a new
+ // connection attempt.
+ ac.updateConnectivityState(connectivity.Idle, nil)
}
- onClose := func() {
- ac.mu.Lock()
- once.Do(func() {
- if ac.state == connectivity.Ready {
- // Prevent this SubConn from being used for new RPCs by setting its
- // state to Connecting.
- //
- // TODO: this should be Idle when grpc-go properly supports it.
- ac.updateConnectivityState(connectivity.Connecting)
- }
- })
- ac.mu.Unlock()
- close(onCloseCalled)
- reconnect.Fire()
- }
-
- onPrefaceReceipt := func() {
- close(prefaceReceived)
- }
-
- connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline)
+ connectCtx, cancel := context.WithDeadline(ctx, connectDeadline)
defer cancel()
- if channelz.IsOn() {
- copts.ChannelzParentID = ac.channelzID
- }
+ copts.ChannelzParent = ac.channelz
- newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
+ newTr, err := transport.NewHTTP2Client(connectCtx, ac.cc.ctx, addr, copts, onClose)
if err != nil {
+ if logger.V(2) {
+ logger.Infof("Creating new client transport to %q: %v", addr, err)
+ }
// newTr is either nil, or closed.
- grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
- return nil, nil, err
+ hcancel()
+ channelz.Warningf(logger, ac.channelz, "grpc: addrConn.createTransport failed to connect to %s. Err: %v", addr, err)
+ return err
}
- select {
- case <-time.After(connectDeadline.Sub(time.Now())):
- // We didn't get the preface in time.
- newTr.Close()
- grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v: didn't receive server preface in time. Reconnecting...", addr)
- return nil, nil, errors.New("timed out waiting for server handshake")
- case <-prefaceReceived:
- // We got the preface - huzzah! things are good.
- case <-onCloseCalled:
- // The transport has already closed - noop.
- return nil, nil, errors.New("connection closed")
- // TODO(deklerk) this should bail on ac.ctx.Done(). Add a test and fix.
+ ac.mu.Lock()
+ defer ac.mu.Unlock()
+ if ctx.Err() != nil {
+ // This can happen if the subConn was removed while in `Connecting`
+ // state. tearDown() would have set the state to `Shutdown`, but
+ // would not have closed the transport since ac.transport would not
+ // have been set at that point.
+ //
+ // We run this in a goroutine because newTr.Close() calls onClose()
+ // inline, which requires locking ac.mu.
+ //
+ // The error we pass to Close() is immaterial since there are no open
+ // streams at this point, so no trailers with error details will be sent
+ // out. We just need to pass a non-nil error.
+ //
+ // This can also happen when updateAddrs is called during a connection
+ // attempt.
+ go newTr.Close(transport.ErrConnClosing)
+ return nil
}
- return newTr, reconnect, nil
+ if hctx.Err() != nil {
+ // onClose was already called for this connection, but the connection
+ // was successfully established first. Consider it a success and set
+ // the new state to Idle.
+ ac.updateConnectivityState(connectivity.Idle, nil)
+ return nil
+ }
+ ac.curAddr = addr
+ ac.transport = newTr
+ ac.startHealthCheck(hctx) // Will set state to READY if appropriate.
+ return nil
}
// startHealthCheck starts the health checking stream (RPC) to watch the health
@@ -1305,7 +1446,7 @@
//
// LB channel health checking is enabled when all requirements below are met:
// 1. it is not disabled by the user with the WithDisableHealthCheck DialOption
-// 2. internal.HealthCheckFunc is set by importing the grpc/healthcheck package
+// 2. internal.HealthCheckFunc is set by importing the grpc/health package
// 3. a service config with non-empty healthCheckConfig field is provided
// 4. the load balancer requests it
//
@@ -1316,7 +1457,7 @@
var healthcheckManagingState bool
defer func() {
if !healthcheckManagingState {
- ac.updateConnectivityState(connectivity.Ready)
+ ac.updateConnectivityState(connectivity.Ready, nil)
}
}()
@@ -1330,12 +1471,12 @@
if !ac.scopts.HealthCheckEnabled {
return
}
- healthCheckFunc := ac.cc.dopts.healthCheckFunc
+ healthCheckFunc := internal.HealthCheckFunc
if healthCheckFunc == nil {
// The health package is not imported to set health check function.
//
// TODO: add a link to the health check doc in the error message.
- grpclog.Error("Health check is requested but health check function is not set.")
+ channelz.Error(logger, ac.channelz, "Health check is requested but health check function is not set.")
return
}
@@ -1343,7 +1484,7 @@
// Set up the health check helper functions.
currentTr := ac.transport
- newStream := func(method string) (interface{}, error) {
+ newStream := func(method string) (any, error) {
ac.mu.Lock()
if ac.transport != currentTr {
ac.mu.Unlock()
@@ -1352,28 +1493,22 @@
ac.mu.Unlock()
return newNonRetryClientStream(ctx, &StreamDesc{ServerStreams: true}, method, currentTr, ac)
}
- setConnectivityState := func(s connectivity.State) {
+ setConnectivityState := func(s connectivity.State, lastErr error) {
ac.mu.Lock()
defer ac.mu.Unlock()
if ac.transport != currentTr {
return
}
- ac.updateConnectivityState(s)
+ ac.updateConnectivityState(s, lastErr)
}
// Start the health checking stream.
go func() {
- err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName)
+ err := healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName)
if err != nil {
if status.Code(err) == codes.Unimplemented {
- if channelz.IsOn() {
- channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
- Desc: "Subchannel health check is unimplemented at server side, thus health check is disabled",
- Severity: channelz.CtError,
- })
- }
- grpclog.Error("Subchannel health check is unimplemented at server side, thus health check is disabled")
+ channelz.Error(logger, ac.channelz, "Subchannel health check is unimplemented at server side, thus health check is disabled")
} else {
- grpclog.Errorf("HealthCheckFunc exits with unexpected error %v", err)
+ channelz.Errorf(logger, ac.channelz, "Health checking failed: %v", err)
}
}
}()
@@ -1387,33 +1522,20 @@
ac.mu.Unlock()
}
-// getReadyTransport returns the transport if ac's state is READY.
-// Otherwise it returns nil, false.
-// If ac's state is IDLE, it will trigger ac to connect.
-func (ac *addrConn) getReadyTransport() (transport.ClientTransport, bool) {
+// getReadyTransport returns the transport if ac's state is READY or nil if not.
+func (ac *addrConn) getReadyTransport() transport.ClientTransport {
ac.mu.Lock()
- if ac.state == connectivity.Ready && ac.transport != nil {
- t := ac.transport
- ac.mu.Unlock()
- return t, true
+ defer ac.mu.Unlock()
+ if ac.state == connectivity.Ready {
+ return ac.transport
}
- var idle bool
- if ac.state == connectivity.Idle {
- idle = true
- }
- ac.mu.Unlock()
- // Trigger idle ac to connect.
- if idle {
- ac.connect()
- }
- return nil, false
+ return nil
}
// tearDown starts to tear down the addrConn.
-// TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
-// some edge cases (e.g., the caller opens and closes many addrConn's in a
-// tight loop.
-// tearDown doesn't remove ac from ac.cc.conns.
+//
+// Note that tearDown doesn't remove ac from ac.cc.conns, so the addrConn struct
+// will leak. In most cases, call cc.removeAddrConn() instead.
func (ac *addrConn) tearDown(err error) {
ac.mu.Lock()
if ac.state == connectivity.Shutdown {
@@ -1424,68 +1546,48 @@
ac.transport = nil
// We have to set the state to Shutdown before anything else to prevent races
// between setting the state and logic that waits on context cancellation / etc.
- ac.updateConnectivityState(connectivity.Shutdown)
+ ac.updateConnectivityState(connectivity.Shutdown, nil)
ac.cancel()
ac.curAddr = resolver.Address{}
- if err == errConnDrain && curTr != nil {
- // GracefulClose(...) may be executed multiple times when
- // i) receiving multiple GoAway frames from the server; or
- // ii) there are concurrent name resolver/Balancer triggered
- // address removal and GoAway.
- // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
- ac.mu.Unlock()
- curTr.GracefulClose()
- ac.mu.Lock()
- }
- if channelz.IsOn() {
- channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
- Desc: "Subchannel Deleted",
- Severity: channelz.CtINFO,
- Parent: &channelz.TraceEventDesc{
- Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID),
- Severity: channelz.CtINFO,
- },
- })
- // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
- // the entity being deleted, and thus prevent it from being deleted right away.
- channelz.RemoveEntry(ac.channelzID)
- }
+
+ channelz.AddTraceEvent(logger, ac.channelz, 0, &channelz.TraceEvent{
+ Desc: "Subchannel deleted",
+ Severity: channelz.CtInfo,
+ Parent: &channelz.TraceEvent{
+ Desc: fmt.Sprintf("Subchannel(id:%d) deleted", ac.channelz.ID),
+ Severity: channelz.CtInfo,
+ },
+ })
+ // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add
+ // trace reference to the entity being deleted, and thus prevent it from
+ // being deleted right away.
+ channelz.RemoveEntry(ac.channelz.ID)
ac.mu.Unlock()
-}
-func (ac *addrConn) getState() connectivity.State {
- ac.mu.Lock()
- defer ac.mu.Unlock()
- return ac.state
-}
-
-func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric {
- ac.mu.Lock()
- addr := ac.curAddr.Addr
- ac.mu.Unlock()
- return &channelz.ChannelInternalMetric{
- State: ac.getState(),
- Target: addr,
- CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted),
- CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded),
- CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed),
- LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)),
+ // We have to release the lock before the call to GracefulClose/Close here
+ // because both of them call onClose(), which requires locking ac.mu.
+ if curTr != nil {
+ if err == errConnDrain {
+ // Close the transport gracefully when the subConn is being shutdown.
+ //
+ // GracefulClose() may be executed multiple times if:
+ // - multiple GoAway frames are received from the server
+ // - there are concurrent name resolver or balancer triggered
+ // address removal and GoAway
+ curTr.GracefulClose()
+ } else {
+ // Hard close the transport when the channel is entering idle or is
+ // being shutdown. In the case where the channel is being shutdown,
+ // closing of transports is also taken care of by cancellation of cc.ctx.
+ // But in the case where the channel is entering idle, we need to
+ // explicitly close the transports here. Instead of distinguishing
+ // between these two cases, it is simpler to close the transport
+ // unconditionally here.
+ curTr.Close(err)
+ }
}
}
-func (ac *addrConn) incrCallsStarted() {
- atomic.AddInt64(&ac.czData.callsStarted, 1)
- atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano())
-}
-
-func (ac *addrConn) incrCallsSucceeded() {
- atomic.AddInt64(&ac.czData.callsSucceeded, 1)
-}
-
-func (ac *addrConn) incrCallsFailed() {
- atomic.AddInt64(&ac.czData.callsFailed, 1)
-}
-
type retryThrottler struct {
max float64
thresh float64
@@ -1523,12 +1625,17 @@
}
}
-type channelzChannel struct {
- cc *ClientConn
+func (ac *addrConn) incrCallsStarted() {
+ ac.channelz.ChannelMetrics.CallsStarted.Add(1)
+ ac.channelz.ChannelMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano())
}
-func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
- return c.cc.channelzMetric()
+func (ac *addrConn) incrCallsSucceeded() {
+ ac.channelz.ChannelMetrics.CallsSucceeded.Add(1)
+}
+
+func (ac *addrConn) incrCallsFailed() {
+ ac.channelz.ChannelMetrics.CallsFailed.Add(1)
}
// ErrClientConnTimeout indicates that the ClientConn cannot establish the
@@ -1537,3 +1644,189 @@
// Deprecated: This error is never returned by grpc and should not be
// referenced by users.
var ErrClientConnTimeout = errors.New("grpc: timed out when dialing")
+
+// getResolver finds the scheme in the cc's resolvers or the global registry.
+// scheme should always be lowercase (typically by virtue of url.Parse()
+// performing proper RFC3986 behavior).
+func (cc *ClientConn) getResolver(scheme string) resolver.Builder {
+ for _, rb := range cc.dopts.resolvers {
+ if scheme == rb.Scheme() {
+ return rb
+ }
+ }
+ return resolver.Get(scheme)
+}
+
+func (cc *ClientConn) updateConnectionError(err error) {
+ cc.lceMu.Lock()
+ cc.lastConnectionError = err
+ cc.lceMu.Unlock()
+}
+
+func (cc *ClientConn) connectionError() error {
+ cc.lceMu.Lock()
+ defer cc.lceMu.Unlock()
+ return cc.lastConnectionError
+}
+
+// initParsedTargetAndResolverBuilder parses the user's dial target and stores
+// the parsed target in `cc.parsedTarget`.
+//
+// The resolver to use is determined based on the scheme in the parsed target
+// and the same is stored in `cc.resolverBuilder`.
+//
+// Doesn't grab cc.mu as this method is expected to be called only at Dial time.
+func (cc *ClientConn) initParsedTargetAndResolverBuilder() error {
+ logger.Infof("original dial target is: %q", cc.target)
+
+ var rb resolver.Builder
+ parsedTarget, err := parseTarget(cc.target)
+ if err == nil {
+ rb = cc.getResolver(parsedTarget.URL.Scheme)
+ if rb != nil {
+ cc.parsedTarget = parsedTarget
+ cc.resolverBuilder = rb
+ return nil
+ }
+ }
+
+ // We are here because the user's dial target did not contain a scheme or
+ // specified an unregistered scheme. We should fallback to the default
+ // scheme, except when a custom dialer is specified in which case, we should
+ // always use passthrough scheme. For either case, we need to respect any overridden
+ // global defaults set by the user.
+ defScheme := cc.dopts.defaultScheme
+ if internal.UserSetDefaultScheme {
+ defScheme = resolver.GetDefaultScheme()
+ }
+
+ canonicalTarget := defScheme + ":///" + cc.target
+
+ parsedTarget, err = parseTarget(canonicalTarget)
+ if err != nil {
+ return err
+ }
+ rb = cc.getResolver(parsedTarget.URL.Scheme)
+ if rb == nil {
+ return fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.URL.Scheme)
+ }
+ cc.parsedTarget = parsedTarget
+ cc.resolverBuilder = rb
+ return nil
+}
+
+// parseTarget uses RFC 3986 semantics to parse the given target into a
+// resolver.Target struct containing url. Query params are stripped from the
+// endpoint.
+func parseTarget(target string) (resolver.Target, error) {
+ u, err := url.Parse(target)
+ if err != nil {
+ return resolver.Target{}, err
+ }
+
+ return resolver.Target{URL: *u}, nil
+}
+
+// encodeAuthority escapes the authority string based on valid chars defined in
+// https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.
+func encodeAuthority(authority string) string {
+ const upperhex = "0123456789ABCDEF"
+
+ // Return for characters that must be escaped as per
+ // Valid chars are mentioned here:
+ // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
+ shouldEscape := func(c byte) bool {
+ // Alphanum are always allowed.
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+ return false
+ }
+ switch c {
+ case '-', '_', '.', '~': // Unreserved characters
+ return false
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // Subdelim characters
+ return false
+ case ':', '[', ']', '@': // Authority related delimiters
+ return false
+ }
+ // Everything else must be escaped.
+ return true
+ }
+
+ hexCount := 0
+ for i := 0; i < len(authority); i++ {
+ c := authority[i]
+ if shouldEscape(c) {
+ hexCount++
+ }
+ }
+
+ if hexCount == 0 {
+ return authority
+ }
+
+ required := len(authority) + 2*hexCount
+ t := make([]byte, required)
+
+ j := 0
+ // This logic is a barebones version of escape in the go net/url library.
+ for i := 0; i < len(authority); i++ {
+ switch c := authority[i]; {
+ case shouldEscape(c):
+ t[j] = '%'
+ t[j+1] = upperhex[c>>4]
+ t[j+2] = upperhex[c&15]
+ j += 3
+ default:
+ t[j] = authority[i]
+ j++
+ }
+ }
+ return string(t)
+}
+
+// Determine channel authority. The order of precedence is as follows:
+// - user specified authority override using `WithAuthority` dial option
+// - creds' notion of server name for the authentication handshake
+// - endpoint from dial target of the form "scheme://[authority]/endpoint"
+//
+// Stores the determined authority in `cc.authority`.
+//
+// Returns a non-nil error if the authority returned by the transport
+// credentials do not match the authority configured through the dial option.
+//
+// Doesn't grab cc.mu as this method is expected to be called only at Dial time.
+func (cc *ClientConn) initAuthority() error {
+ dopts := cc.dopts
+ // Historically, we had two options for users to specify the serverName or
+ // authority for a channel. One was through the transport credentials
+ // (either in its constructor, or through the OverrideServerName() method).
+ // The other option (for cases where WithInsecure() dial option was used)
+ // was to use the WithAuthority() dial option.
+ //
+ // A few things have changed since:
+ // - `insecure` package with an implementation of the `TransportCredentials`
+ // interface for the insecure case
+ // - WithAuthority() dial option support for secure credentials
+ authorityFromCreds := ""
+ if creds := dopts.copts.TransportCredentials; creds != nil && creds.Info().ServerName != "" {
+ authorityFromCreds = creds.Info().ServerName
+ }
+ authorityFromDialOption := dopts.authority
+ if (authorityFromCreds != "" && authorityFromDialOption != "") && authorityFromCreds != authorityFromDialOption {
+ return fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption)
+ }
+
+ endpoint := cc.parsedTarget.Endpoint()
+ if authorityFromDialOption != "" {
+ cc.authority = authorityFromDialOption
+ } else if authorityFromCreds != "" {
+ cc.authority = authorityFromCreds
+ } else if auth, ok := cc.resolverBuilder.(resolver.AuthorityOverrider); ok {
+ cc.authority = auth.OverrideAuthority(cc.parsedTarget)
+ } else if strings.HasPrefix(endpoint, ":") {
+ cc.authority = "localhost" + encodeAuthority(endpoint)
+ } else {
+ cc.authority = encodeAuthority(endpoint)
+ }
+ return nil
+}