[VOL-5486] Upgrade library versions
Change-Id: I8b4e88699e03f44ee13e467867f45ae3f0a63c4b
Signed-off-by: Abhay Kumar <abhay.kumar@radisys.com>
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/adapters.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/adapters.go
new file mode 100644
index 0000000..bc9790b
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/adapters.go
@@ -0,0 +1,89 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "context"
+
+ "github.com/coreos/go-semver/semver"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/membershippb"
+ "go.etcd.io/etcd/api/v3/version"
+ serverversion "go.etcd.io/etcd/server/v3/etcdserver/version"
+ "go.etcd.io/etcd/server/v3/storage/schema"
+)
+
+// serverVersionAdapter implements the interface Server defined in package
+// go.etcd.io/etcd/server/v3/etcdserver/version, and it's needed by Monitor
+// in the same package.
+type serverVersionAdapter struct {
+ *EtcdServer
+}
+
+func NewServerVersionAdapter(s *EtcdServer) serverversion.Server {
+ return &serverVersionAdapter{
+ EtcdServer: s,
+ }
+}
+
+var _ serverversion.Server = (*serverVersionAdapter)(nil)
+
+func (s *serverVersionAdapter) UpdateClusterVersion(version string) {
+ s.GoAttach(func() { s.updateClusterVersionV3(version) })
+}
+
+func (s *serverVersionAdapter) LinearizableReadNotify(ctx context.Context) error {
+ return s.linearizableReadNotify(ctx)
+}
+
+func (s *serverVersionAdapter) DowngradeEnable(ctx context.Context, targetVersion *semver.Version) error {
+ raftRequest := membershippb.DowngradeInfoSetRequest{Enabled: true, Ver: targetVersion.String()}
+ _, err := s.raftRequest(ctx, pb.InternalRaftRequest{DowngradeInfoSet: &raftRequest})
+ return err
+}
+
+func (s *serverVersionAdapter) DowngradeCancel(ctx context.Context) error {
+ raftRequest := membershippb.DowngradeInfoSetRequest{Enabled: false}
+ _, err := s.raftRequest(ctx, pb.InternalRaftRequest{DowngradeInfoSet: &raftRequest})
+ return err
+}
+
+func (s *serverVersionAdapter) GetClusterVersion() *semver.Version {
+ return s.cluster.Version()
+}
+
+func (s *serverVersionAdapter) GetDowngradeInfo() *serverversion.DowngradeInfo {
+ return s.cluster.DowngradeInfo()
+}
+
+func (s *serverVersionAdapter) GetMembersVersions() map[string]*version.Versions {
+ return getMembersVersions(s.lg, s.cluster, s.MemberID(), s.peerRt, s.Cfg.ReqTimeout())
+}
+
+func (s *serverVersionAdapter) GetStorageVersion() *semver.Version {
+ return s.StorageVersion()
+}
+
+func (s *serverVersionAdapter) UpdateStorageVersion(target semver.Version) error {
+ // `applySnapshot` sets a new backend instance, so we need to acquire the bemu lock.
+ s.bemu.RLock()
+ defer s.bemu.RUnlock()
+
+ tx := s.be.BatchTx()
+ tx.LockOutsideApply()
+ defer tx.Unlock()
+ return schema.UnsafeMigrate(s.lg, tx, s.r.storage, target)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/capability.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/capability.go
new file mode 100644
index 0000000..cf535ec
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/capability.go
@@ -0,0 +1,96 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package api
+
+import (
+ "sync"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+ serverversion "go.etcd.io/etcd/server/v3/etcdserver/version"
+)
+
+type Capability string
+
+const (
+ AuthCapability Capability = "auth"
+ V3rpcCapability Capability = "v3rpc"
+)
+
+var (
+ // capabilityMaps is a static map of version to capability map.
+ capabilityMaps = map[string]map[Capability]bool{
+ "3.0.0": {AuthCapability: true, V3rpcCapability: true},
+ "3.1.0": {AuthCapability: true, V3rpcCapability: true},
+ "3.2.0": {AuthCapability: true, V3rpcCapability: true},
+ "3.3.0": {AuthCapability: true, V3rpcCapability: true},
+ "3.4.0": {AuthCapability: true, V3rpcCapability: true},
+ "3.5.0": {AuthCapability: true, V3rpcCapability: true},
+ "3.6.0": {AuthCapability: true, V3rpcCapability: true},
+ }
+
+ enableMapMu sync.RWMutex
+ // enabledMap points to a map in capabilityMaps
+ enabledMap map[Capability]bool
+
+ curVersion *semver.Version
+)
+
+func init() {
+ enabledMap = map[Capability]bool{
+ AuthCapability: true,
+ V3rpcCapability: true,
+ }
+}
+
+// UpdateCapability updates the enabledMap when the cluster version increases.
+func UpdateCapability(lg *zap.Logger, v *semver.Version) {
+ if v == nil {
+ // if recovered but version was never set by cluster
+ return
+ }
+ enableMapMu.Lock()
+ if curVersion != nil && !serverversion.IsValidClusterVersionChange(curVersion, v) {
+ enableMapMu.Unlock()
+ return
+ }
+ curVersion = v
+ enabledMap = capabilityMaps[curVersion.String()]
+ enableMapMu.Unlock()
+
+ if lg != nil {
+ lg.Info(
+ "enabled capabilities for version",
+ zap.String("cluster-version", version.Cluster(v.String())),
+ )
+ }
+}
+
+func IsCapabilityEnabled(c Capability) bool {
+ enableMapMu.RLock()
+ defer enableMapMu.RUnlock()
+ if enabledMap == nil {
+ return false
+ }
+ return enabledMap[c]
+}
+
+func EnableCapability(c Capability) {
+ enableMapMu.Lock()
+ defer enableMapMu.Unlock()
+ enabledMap[c] = true
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/cluster.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/cluster.go
new file mode 100644
index 0000000..f05997d
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/cluster.go
@@ -0,0 +1,38 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package api
+
+import (
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+
+ "github.com/coreos/go-semver/semver"
+)
+
+// Cluster is an interface representing a collection of members in one etcd cluster.
+type Cluster interface {
+ // ID returns the cluster ID
+ ID() types.ID
+ // ClientURLs returns an aggregate set of all URLs on which this
+ // cluster is listening for client requests
+ ClientURLs() []string
+ // Members returns a slice of members sorted by their ID
+ Members() []*membership.Member
+ // Member retrieves a particular member based on ID, or nil if the
+ // member does not exist in the cluster
+ Member(id types.ID) *membership.Member
+ // Version is the cluster-wide minimum major.minor version.
+ Version() *semver.Version
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/doc.go
new file mode 100644
index 0000000..f44881b
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package api manages the capabilities and features that are exposed to clients by the etcd cluster.
+package api
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/debug.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/debug.go
new file mode 100644
index 0000000..ab7feee
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/debug.go
@@ -0,0 +1,47 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdhttp
+
+import (
+ "expvar"
+ "fmt"
+ "net/http"
+)
+
+const (
+ varsPath = "/debug/vars"
+)
+
+func HandleDebug(mux *http.ServeMux) {
+ mux.HandleFunc(varsPath, serveVars)
+}
+
+func serveVars(w http.ResponseWriter, r *http.Request) {
+ if !allowMethod(w, r, "GET") {
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json; charset=utf-8")
+ fmt.Fprint(w, "{\n")
+ first := true
+ expvar.Do(func(kv expvar.KeyValue) {
+ if !first {
+ fmt.Fprint(w, ",\n")
+ }
+ first = false
+ fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value)
+ })
+ fmt.Fprint(w, "\n}\n")
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/doc.go
new file mode 100644
index 0000000..a03b626
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package etcdhttp implements HTTP transportation layer for etcdserver.
+package etcdhttp
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/health.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/health.go
new file mode 100644
index 0000000..26ed4ca
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/health.go
@@ -0,0 +1,447 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file defines the http endpoints for etcd health checks.
+// The endpoints include /livez, /readyz and /health.
+
+package etcdhttp
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "path"
+ "strings"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/config"
+ "go.etcd.io/raft/v3"
+)
+
+const (
+ PathHealth = "/health"
+ PathProxyHealth = "/proxy/health"
+ HealthStatusSuccess string = "success"
+ HealthStatusError string = "error"
+ checkTypeLivez = "livez"
+ checkTypeReadyz = "readyz"
+ checkTypeHealth = "health"
+)
+
+type ServerHealth interface {
+ Alarms() []*pb.AlarmMember
+ Leader() types.ID
+ Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error)
+ Config() config.ServerConfig
+ AuthStore() auth.AuthStore
+ IsLearner() bool
+}
+
+// HandleHealth registers metrics and health handlers. it checks health by using v3 range request
+// and its corresponding timeout.
+func HandleHealth(lg *zap.Logger, mux *http.ServeMux, srv ServerHealth) {
+ mux.Handle(PathHealth, NewHealthHandler(lg, func(ctx context.Context, excludedAlarms StringSet, serializable bool) Health {
+ if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" {
+ return h
+ }
+ if h := checkLeader(lg, srv, serializable); h.Health != "true" {
+ return h
+ }
+ return checkAPI(ctx, lg, srv, serializable)
+ }))
+
+ installLivezEndpoints(lg, mux, srv)
+ installReadyzEndpoints(lg, mux, srv)
+}
+
+// NewHealthHandler handles '/health' requests.
+func NewHealthHandler(lg *zap.Logger, hfunc func(ctx context.Context, excludedAlarms StringSet, Serializable bool) Health) http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ w.Header().Set("Allow", http.MethodGet)
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ lg.Warn("/health error", zap.Int("status-code", http.StatusMethodNotAllowed))
+ return
+ }
+ excludedAlarms := getQuerySet(r, "exclude")
+ // Passing the query parameter "serializable=true" ensures that the
+ // health of the local etcd is checked vs the health of the cluster.
+ // This is useful for probes attempting to validate the liveness of
+ // the etcd process vs readiness of the cluster to serve requests.
+ serializableFlag := getSerializableFlag(r)
+ h := hfunc(r.Context(), excludedAlarms, serializableFlag)
+ defer func() {
+ if h.Health == "true" {
+ healthSuccess.Inc()
+ } else {
+ healthFailed.Inc()
+ }
+ }()
+ d, _ := json.Marshal(h)
+ if h.Health != "true" {
+ http.Error(w, string(d), http.StatusServiceUnavailable)
+ lg.Warn("/health error", zap.String("output", string(d)), zap.Int("status-code", http.StatusServiceUnavailable))
+ return
+ }
+ w.WriteHeader(http.StatusOK)
+ w.Write(d)
+ lg.Debug("/health OK", zap.Int("status-code", http.StatusOK))
+ }
+}
+
+var (
+ healthSuccess = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "health_success",
+ Help: "The total number of successful health checks",
+ })
+ healthFailed = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "health_failures",
+ Help: "The total number of failed health checks",
+ })
+ healthCheckGauge = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "healthcheck",
+ Help: "The result of each kind of healthcheck.",
+ },
+ []string{"type", "name"},
+ )
+ healthCheckCounter = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "healthchecks_total",
+ Help: "The total number of each kind of healthcheck.",
+ },
+ []string{"type", "name", "status"},
+ )
+)
+
+func init() {
+ prometheus.MustRegister(healthSuccess)
+ prometheus.MustRegister(healthFailed)
+ prometheus.MustRegister(healthCheckGauge)
+ prometheus.MustRegister(healthCheckCounter)
+}
+
+// Health defines etcd server health status.
+// TODO: remove manual parsing in etcdctl cluster-health
+type Health struct {
+ Health string `json:"health"`
+ Reason string `json:"reason"`
+}
+
+// HealthStatus is used in new /readyz or /livez health checks instead of the Health struct.
+type HealthStatus struct {
+ Reason string `json:"reason"`
+ Status string `json:"status"`
+}
+
+func getQuerySet(r *http.Request, query string) StringSet {
+ querySet := make(map[string]struct{})
+ qs, found := r.URL.Query()[query]
+ if found {
+ for _, q := range qs {
+ if len(q) == 0 {
+ continue
+ }
+ querySet[q] = struct{}{}
+ }
+ }
+ return querySet
+}
+
+func getSerializableFlag(r *http.Request) bool {
+ return r.URL.Query().Get("serializable") == "true"
+}
+
+// TODO: etcdserver.ErrNoLeader in health API
+
+func checkAlarms(lg *zap.Logger, srv ServerHealth, excludedAlarms StringSet) Health {
+ h := Health{Health: "true"}
+
+ for _, v := range srv.Alarms() {
+ alarmName := v.Alarm.String()
+ if _, found := excludedAlarms[alarmName]; found {
+ lg.Debug("/health excluded alarm", zap.String("alarm", v.String()))
+ continue
+ }
+
+ h.Health = "false"
+ switch v.Alarm {
+ case pb.AlarmType_NOSPACE:
+ h.Reason = "ALARM NOSPACE"
+ case pb.AlarmType_CORRUPT:
+ h.Reason = "ALARM CORRUPT"
+ default:
+ h.Reason = "ALARM UNKNOWN"
+ }
+ lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String()))
+ return h
+ }
+
+ return h
+}
+
+func checkLeader(lg *zap.Logger, srv ServerHealth, serializable bool) Health {
+ h := Health{Health: "true"}
+ if !serializable && (uint64(srv.Leader()) == raft.None) {
+ h.Health = "false"
+ h.Reason = "RAFT NO LEADER"
+ lg.Warn("serving /health false; no leader")
+ }
+ return h
+}
+
+func checkAPI(ctx context.Context, lg *zap.Logger, srv ServerHealth, serializable bool) Health {
+ h := Health{Health: "true"}
+ cfg := srv.Config()
+ ctx = srv.AuthStore().WithRoot(ctx)
+ cctx, cancel := context.WithTimeout(ctx, cfg.ReqTimeout())
+ _, err := srv.Range(cctx, &pb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable})
+ cancel()
+ if err != nil {
+ h.Health = "false"
+ h.Reason = fmt.Sprintf("RANGE ERROR:%s", err)
+ lg.Warn("serving /health false; Range fails", zap.Error(err))
+ return h
+ }
+ lg.Debug("serving /health true")
+ return h
+}
+
+type HealthCheck func(ctx context.Context) error
+
+type CheckRegistry struct {
+ checkType string
+ checks map[string]HealthCheck
+}
+
+func installLivezEndpoints(lg *zap.Logger, mux *http.ServeMux, server ServerHealth) {
+ reg := CheckRegistry{checkType: checkTypeLivez, checks: make(map[string]HealthCheck)}
+ reg.Register("serializable_read", readCheck(server, true /* serializable */))
+ reg.InstallHTTPEndpoints(lg, mux)
+}
+
+func installReadyzEndpoints(lg *zap.Logger, mux *http.ServeMux, server ServerHealth) {
+ reg := CheckRegistry{checkType: checkTypeReadyz, checks: make(map[string]HealthCheck)}
+ reg.Register("data_corruption", activeAlarmCheck(server, pb.AlarmType_CORRUPT))
+ // serializable_read checks if local read is ok.
+ // linearizable_read checks if there is consensus in the cluster.
+ // Having both serializable_read and linearizable_read helps isolate the cause of problems if there is a read failure.
+ reg.Register("serializable_read", readCheck(server, true))
+ // linearizable_read check would be replaced by read_index check in 3.6
+ reg.Register("linearizable_read", readCheck(server, false))
+ // check if local is learner
+ reg.Register("non_learner", learnerCheck(server))
+ reg.InstallHTTPEndpoints(lg, mux)
+}
+
+func (reg *CheckRegistry) Register(name string, check HealthCheck) {
+ reg.checks[name] = check
+}
+
+func (reg *CheckRegistry) RootPath() string {
+ return "/" + reg.checkType
+}
+
+// InstallHttpEndpoints installs the http handlers for the health checks.
+//
+// Deprecated: Please use (*CheckRegistry) InstallHTTPEndpoints instead.
+//
+//revive:disable-next-line:var-naming
+func (reg *CheckRegistry) InstallHttpEndpoints(lg *zap.Logger, mux *http.ServeMux) {
+ reg.InstallHTTPEndpoints(lg, mux)
+}
+
+func (reg *CheckRegistry) InstallHTTPEndpoints(lg *zap.Logger, mux *http.ServeMux) {
+ checkNames := make([]string, 0, len(reg.checks))
+ for k := range reg.checks {
+ checkNames = append(checkNames, k)
+ }
+
+ // installs the http handler for the root path.
+ reg.installRootHTTPEndpoint(lg, mux, checkNames...)
+ for _, checkName := range checkNames {
+ // installs the http handler for the individual check sub path.
+ subpath := path.Join(reg.RootPath(), checkName)
+ check := checkName
+ mux.Handle(subpath, newHealthHandler(subpath, lg, func(r *http.Request) HealthStatus {
+ return reg.runHealthChecks(r.Context(), check)
+ }))
+ }
+}
+
+func (reg *CheckRegistry) runHealthChecks(ctx context.Context, checkNames ...string) HealthStatus {
+ h := HealthStatus{Status: HealthStatusSuccess}
+ var individualCheckOutput bytes.Buffer
+ for _, checkName := range checkNames {
+ check, found := reg.checks[checkName]
+ if !found {
+ panic(fmt.Errorf("Health check: %s not registered", checkName))
+ }
+ if err := check(ctx); err != nil {
+ fmt.Fprintf(&individualCheckOutput, "[-]%s failed: %v\n", checkName, err)
+ h.Status = HealthStatusError
+ recordMetrics(reg.checkType, checkName, HealthStatusError)
+ } else {
+ fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", checkName)
+ recordMetrics(reg.checkType, checkName, HealthStatusSuccess)
+ }
+ }
+ h.Reason = individualCheckOutput.String()
+ return h
+}
+
+// installRootHTTPEndpoint installs the http handler for the root path.
+func (reg *CheckRegistry) installRootHTTPEndpoint(lg *zap.Logger, mux *http.ServeMux, checks ...string) {
+ hfunc := func(r *http.Request) HealthStatus {
+ // extracts the health check names to be excludeList from the query param
+ excluded := getQuerySet(r, "exclude")
+
+ filteredCheckNames := filterCheckList(lg, listToStringSet(checks), excluded)
+ h := reg.runHealthChecks(r.Context(), filteredCheckNames...)
+ return h
+ }
+ mux.Handle(reg.RootPath(), newHealthHandler(reg.RootPath(), lg, hfunc))
+}
+
+// newHealthHandler generates a http HandlerFunc for a health check function hfunc.
+func newHealthHandler(path string, lg *zap.Logger, hfunc func(*http.Request) HealthStatus) http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ w.Header().Set("Allow", http.MethodGet)
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ lg.Warn("Health request error", zap.String("path", path), zap.Int("status-code", http.StatusMethodNotAllowed))
+ return
+ }
+ h := hfunc(r)
+ // Always returns detailed reason for failed checks.
+ if h.Status == HealthStatusError {
+ http.Error(w, h.Reason, http.StatusServiceUnavailable)
+ lg.Error("Health check error", zap.String("path", path), zap.String("reason", h.Reason), zap.Int("status-code", http.StatusServiceUnavailable))
+ return
+ }
+ w.Header().Set("Content-Type", "text/plain; charset=utf-8")
+ w.Header().Set("X-Content-Type-Options", "nosniff")
+ // Only writes detailed reason for verbose requests.
+ if _, found := r.URL.Query()["verbose"]; found {
+ fmt.Fprint(w, h.Reason)
+ }
+ fmt.Fprint(w, "ok\n")
+ lg.Debug("Health check OK", zap.String("path", path), zap.String("reason", h.Reason), zap.Int("status-code", http.StatusOK))
+ }
+}
+
+func filterCheckList(lg *zap.Logger, checks StringSet, excluded StringSet) []string {
+ filteredList := []string{}
+ for chk := range checks {
+ if _, found := excluded[chk]; found {
+ delete(excluded, chk)
+ continue
+ }
+ filteredList = append(filteredList, chk)
+ }
+ if len(excluded) > 0 {
+ // For version compatibility, excluding non-exist checks would not fail the request.
+ lg.Warn("some health checks cannot be excluded", zap.String("missing-health-checks", formatQuoted(excluded.List()...)))
+ }
+ return filteredList
+}
+
+// formatQuoted returns a formatted string of the health check names,
+// preserving the order passed in.
+func formatQuoted(names ...string) string {
+ quoted := make([]string, 0, len(names))
+ for _, name := range names {
+ quoted = append(quoted, fmt.Sprintf("%q", name))
+ }
+ return strings.Join(quoted, ",")
+}
+
+type StringSet map[string]struct{}
+
+func (s StringSet) List() []string {
+ keys := make([]string, 0, len(s))
+ for k := range s {
+ keys = append(keys, k)
+ }
+ return keys
+}
+
+func listToStringSet(list []string) StringSet {
+ set := make(map[string]struct{})
+ for _, s := range list {
+ set[s] = struct{}{}
+ }
+ return set
+}
+
+func recordMetrics(checkType, name string, status string) {
+ val := 0.0
+ if status == HealthStatusSuccess {
+ val = 1.0
+ }
+ healthCheckGauge.With(prometheus.Labels{
+ "type": checkType,
+ "name": name,
+ }).Set(val)
+ healthCheckCounter.With(prometheus.Labels{
+ "type": checkType,
+ "name": name,
+ "status": status,
+ }).Inc()
+}
+
+// activeAlarmCheck checks if a specific alarm type is active in the server.
+func activeAlarmCheck(srv ServerHealth, at pb.AlarmType) func(context.Context) error {
+ return func(ctx context.Context) error {
+ as := srv.Alarms()
+ for _, v := range as {
+ if v.Alarm == at {
+ return fmt.Errorf("alarm activated: %s", at.String())
+ }
+ }
+ return nil
+ }
+}
+
+func readCheck(srv ServerHealth, serializable bool) func(ctx context.Context) error {
+ return func(ctx context.Context) error {
+ ctx = srv.AuthStore().WithRoot(ctx)
+ _, err := srv.Range(ctx, &pb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable})
+ return err
+ }
+}
+
+func learnerCheck(srv ServerHealth) func(ctx context.Context) error {
+ return func(ctx context.Context) error {
+ if srv.IsLearner() {
+ return fmt.Errorf("not supported for learner")
+ }
+ return nil
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/metrics.go
new file mode 100644
index 0000000..bf7d4a4
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/metrics.go
@@ -0,0 +1,31 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdhttp
+
+import (
+ "net/http"
+
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+const (
+ PathMetrics = "/metrics"
+ PathProxyMetrics = "/proxy/metrics"
+)
+
+// HandleMetrics registers prometheus handler on '/metrics'.
+func HandleMetrics(mux *http.ServeMux) {
+ mux.Handle(PathMetrics, promhttp.Handler())
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/peer.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/peer.go
new file mode 100644
index 0000000..de5948d
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/peer.go
@@ -0,0 +1,165 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdhttp
+
+import (
+ "encoding/json"
+ errorspkg "errors"
+ "fmt"
+ "net/http"
+ "strconv"
+ "strings"
+
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/api"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ "go.etcd.io/etcd/server/v3/lease/leasehttp"
+)
+
+const (
+ peerMembersPath = "/members"
+ peerMemberPromotePrefix = "/members/promote/"
+)
+
+// NewPeerHandler generates an http.Handler to handle etcd peer requests.
+func NewPeerHandler(lg *zap.Logger, s etcdserver.ServerPeerV2) http.Handler {
+ return newPeerHandler(lg, s, s.RaftHandler(), s.LeaseHandler(), s.HashKVHandler(), s.DowngradeEnabledHandler())
+}
+
+func newPeerHandler(
+ lg *zap.Logger,
+ s etcdserver.Server,
+ raftHandler http.Handler,
+ leaseHandler http.Handler,
+ hashKVHandler http.Handler,
+ downgradeEnabledHandler http.Handler,
+) http.Handler {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ peerMembersHandler := newPeerMembersHandler(lg, s.Cluster())
+ peerMemberPromoteHandler := newPeerMemberPromoteHandler(lg, s)
+
+ mux := http.NewServeMux()
+ mux.HandleFunc("/", http.NotFound)
+ mux.Handle(rafthttp.RaftPrefix, raftHandler)
+ mux.Handle(rafthttp.RaftPrefix+"/", raftHandler)
+ mux.Handle(peerMembersPath, peerMembersHandler)
+ mux.Handle(peerMemberPromotePrefix, peerMemberPromoteHandler)
+ if leaseHandler != nil {
+ mux.Handle(leasehttp.LeasePrefix, leaseHandler)
+ mux.Handle(leasehttp.LeaseInternalPrefix, leaseHandler)
+ }
+ if downgradeEnabledHandler != nil {
+ mux.Handle(etcdserver.DowngradeEnabledPath, downgradeEnabledHandler)
+ }
+ if hashKVHandler != nil {
+ mux.Handle(etcdserver.PeerHashKVPath, hashKVHandler)
+ }
+ mux.HandleFunc(versionPath, versionHandler(s, serveVersion))
+ return mux
+}
+
+func newPeerMembersHandler(lg *zap.Logger, cluster api.Cluster) http.Handler {
+ return &peerMembersHandler{
+ lg: lg,
+ cluster: cluster,
+ }
+}
+
+type peerMembersHandler struct {
+ lg *zap.Logger
+ cluster api.Cluster
+}
+
+func newPeerMemberPromoteHandler(lg *zap.Logger, s etcdserver.Server) http.Handler {
+ return &peerMemberPromoteHandler{
+ lg: lg,
+ cluster: s.Cluster(),
+ server: s,
+ }
+}
+
+type peerMemberPromoteHandler struct {
+ lg *zap.Logger
+ cluster api.Cluster
+ server etcdserver.Server
+}
+
+func (h *peerMembersHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if !allowMethod(w, r, "GET") {
+ return
+ }
+ w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
+
+ if r.URL.Path != peerMembersPath {
+ http.Error(w, "bad path", http.StatusBadRequest)
+ return
+ }
+ ms := h.cluster.Members()
+ w.Header().Set("Content-Type", "application/json")
+ if err := json.NewEncoder(w).Encode(ms); err != nil {
+ h.lg.Warn("failed to encode membership members", zap.Error(err))
+ }
+}
+
+func (h *peerMemberPromoteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if !allowMethod(w, r, "POST") {
+ return
+ }
+ w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
+
+ if !strings.HasPrefix(r.URL.Path, peerMemberPromotePrefix) {
+ http.Error(w, "bad path", http.StatusBadRequest)
+ return
+ }
+ idStr := strings.TrimPrefix(r.URL.Path, peerMemberPromotePrefix)
+ id, err := strconv.ParseUint(idStr, 10, 64)
+ if err != nil {
+ http.Error(w, fmt.Sprintf("member %s not found in cluster", idStr), http.StatusNotFound)
+ return
+ }
+
+ resp, err := h.server.PromoteMember(r.Context(), id)
+ if err != nil {
+ switch {
+ case errorspkg.Is(err, membership.ErrIDNotFound):
+ http.Error(w, err.Error(), http.StatusNotFound)
+ case errorspkg.Is(err, membership.ErrMemberNotLearner):
+ http.Error(w, err.Error(), http.StatusPreconditionFailed)
+ case errorspkg.Is(err, errors.ErrLearnerNotReady):
+ http.Error(w, err.Error(), http.StatusPreconditionFailed)
+ default:
+ writeError(h.lg, w, r, err)
+ }
+ h.lg.Warn(
+ "failed to promote a member",
+ zap.String("member-id", types.ID(id).String()),
+ zap.Error(err),
+ )
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ if err := json.NewEncoder(w).Encode(resp); err != nil {
+ h.lg.Warn("failed to encode members response", zap.Error(err))
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types/errors.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types/errors.go
new file mode 100644
index 0000000..79e366f
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types/errors.go
@@ -0,0 +1,51 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package httptypes
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+)
+
+type HTTPError struct {
+ Message string `json:"message"`
+ // Code is the HTTP status code
+ Code int `json:"-"`
+}
+
+func (e HTTPError) Error() string {
+ return e.Message
+}
+
+func (e HTTPError) WriteTo(w http.ResponseWriter) error {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(e.Code)
+ b, err := json.Marshal(e)
+ if err != nil {
+ panic(fmt.Sprintf("failed to marshal HTTPError: %v", err))
+ }
+ if _, err := w.Write(b); err != nil {
+ return err
+ }
+ return nil
+}
+
+func NewHTTPError(code int, m string) *HTTPError {
+ return &HTTPError{
+ Message: m,
+ Code: code,
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/utils.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/utils.go
new file mode 100644
index 0000000..082fa5a
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/utils.go
@@ -0,0 +1,99 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdhttp
+
+import (
+ errorspkg "errors"
+ "net/http"
+
+ "go.uber.org/zap"
+
+ httptypes "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2error"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+)
+
+func allowMethod(w http.ResponseWriter, r *http.Request, m string) bool {
+ if m == r.Method {
+ return true
+ }
+ w.Header().Set("Allow", m)
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ return false
+}
+
+// writeError logs and writes the given Error to the ResponseWriter
+// If Error is an etcdErr, it is rendered to the ResponseWriter
+// Otherwise, it is assumed to be a StatusInternalServerError
+func writeError(lg *zap.Logger, w http.ResponseWriter, r *http.Request, err error) {
+ if err == nil {
+ return
+ }
+ var v2Err *v2error.Error
+ var httpErr *httptypes.HTTPError
+ switch {
+ case errorspkg.As(err, &v2Err):
+ v2Err.WriteTo(w)
+
+ case errorspkg.As(err, &httpErr):
+ if et := httpErr.WriteTo(w); et != nil {
+ if lg != nil {
+ lg.Debug(
+ "failed to write v2 HTTP error",
+ zap.String("remote-addr", r.RemoteAddr),
+ zap.String("internal-server-error", httpErr.Error()),
+ zap.Error(et),
+ )
+ }
+ }
+
+ default:
+ switch {
+ case
+ errorspkg.Is(err, errors.ErrTimeoutDueToLeaderFail),
+ errorspkg.Is(err, errors.ErrTimeoutDueToConnectionLost),
+ errorspkg.Is(err, errors.ErrNotEnoughStartedMembers),
+ errorspkg.Is(err, errors.ErrUnhealthy):
+ if lg != nil {
+ lg.Warn(
+ "v2 response error",
+ zap.String("remote-addr", r.RemoteAddr),
+ zap.String("internal-server-error", err.Error()),
+ )
+ }
+
+ default:
+ if lg != nil {
+ lg.Warn(
+ "unexpected v2 response error",
+ zap.String("remote-addr", r.RemoteAddr),
+ zap.String("internal-server-error", err.Error()),
+ )
+ }
+ }
+
+ herr := httptypes.NewHTTPError(http.StatusInternalServerError, "Internal Server Error")
+ if et := herr.WriteTo(w); et != nil {
+ if lg != nil {
+ lg.Debug(
+ "failed to write v2 HTTP error",
+ zap.String("remote-addr", r.RemoteAddr),
+ zap.String("internal-server-error", err.Error()),
+ zap.Error(et),
+ )
+ }
+ }
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/version.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/version.go
new file mode 100644
index 0000000..8090703
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/version.go
@@ -0,0 +1,65 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdhttp
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+)
+
+const (
+ versionPath = "/version"
+)
+
+func HandleVersion(mux *http.ServeMux, server etcdserver.Server) {
+ mux.HandleFunc(versionPath, versionHandler(server, serveVersion))
+}
+
+func versionHandler(server etcdserver.Server, fn func(http.ResponseWriter, *http.Request, string, string)) http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ clusterVersion := server.ClusterVersion()
+ storageVersion := server.StorageVersion()
+ clusterVersionStr, storageVersionStr := "not_decided", "unknown"
+ if clusterVersion != nil {
+ clusterVersionStr = clusterVersion.String()
+ }
+ if storageVersion != nil {
+ storageVersionStr = storageVersion.String()
+ }
+ fn(w, r, clusterVersionStr, storageVersionStr)
+ }
+}
+
+func serveVersion(w http.ResponseWriter, r *http.Request, clusterV, storageV string) {
+ if !allowMethod(w, r, "GET") {
+ return
+ }
+ vs := version.Versions{
+ Server: version.Version,
+ Cluster: clusterV,
+ Storage: storageV,
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ b, err := json.Marshal(&vs)
+ if err != nil {
+ panic(fmt.Sprintf("cannot marshal versions to json (%v)", err))
+ }
+ w.Write(b)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/cluster.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/cluster.go
new file mode 100644
index 0000000..299e613
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/cluster.go
@@ -0,0 +1,1012 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+import (
+ "context"
+ "crypto/sha1"
+ "encoding/binary"
+ "encoding/json"
+ "fmt"
+ "sort"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/coreos/go-semver/semver"
+ "github.com/prometheus/client_golang/prometheus"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/netutil"
+ "go.etcd.io/etcd/pkg/v3/notify"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
+ serverversion "go.etcd.io/etcd/server/v3/etcdserver/version"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+// RaftCluster is a list of Members that belong to the same raft cluster
+type RaftCluster struct {
+ lg *zap.Logger
+
+ localID types.ID
+ cid types.ID
+
+ v2store v2store.Store
+ be MembershipBackend
+
+ sync.Mutex // guards the fields below
+ version *semver.Version
+ members map[types.ID]*Member
+ // removed contains the ids of removed members in the cluster.
+ // removed id cannot be reused.
+ removed map[types.ID]bool
+
+ downgradeInfo *serverversion.DowngradeInfo
+ maxLearners int
+ versionChanged *notify.Notifier
+}
+
+// ConfigChangeContext represents a context for confChange.
+type ConfigChangeContext struct {
+ Member
+ // IsPromote indicates if the config change is for promoting a learner member.
+ // This flag is needed because both adding a new member and promoting a learner member
+ // uses the same config change type 'ConfChangeAddNode'.
+ IsPromote bool `json:"isPromote"`
+}
+
+type ShouldApplyV3 bool
+
+const (
+ ApplyBoth = ShouldApplyV3(true)
+ ApplyV2storeOnly = ShouldApplyV3(false)
+)
+
+// NewClusterFromURLsMap creates a new raft cluster using provided urls map. Currently, it does not support creating
+// cluster with raft learner member.
+func NewClusterFromURLsMap(lg *zap.Logger, token string, urlsmap types.URLsMap, opts ...ClusterOption) (*RaftCluster, error) {
+ c := NewCluster(lg, opts...)
+ for name, urls := range urlsmap {
+ m := NewMember(name, urls, token, nil)
+ if _, ok := c.members[m.ID]; ok {
+ return nil, fmt.Errorf("member exists with identical ID %v", m)
+ }
+ if uint64(m.ID) == raft.None {
+ return nil, fmt.Errorf("cannot use %x as member id", raft.None)
+ }
+ c.members[m.ID] = m
+ }
+ c.genID()
+ return c, nil
+}
+
+func NewClusterFromMembers(lg *zap.Logger, id types.ID, membs []*Member, opts ...ClusterOption) *RaftCluster {
+ c := NewCluster(lg, opts...)
+ c.cid = id
+ for _, m := range membs {
+ c.members[m.ID] = m
+ }
+ return c
+}
+
+func NewCluster(lg *zap.Logger, opts ...ClusterOption) *RaftCluster {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ clOpts := newClusterOpts(opts...)
+
+ return &RaftCluster{
+ lg: lg,
+ members: make(map[types.ID]*Member),
+ removed: make(map[types.ID]bool),
+ downgradeInfo: &serverversion.DowngradeInfo{Enabled: false},
+ maxLearners: clOpts.maxLearners,
+ }
+}
+
+func (c *RaftCluster) ID() types.ID { return c.cid }
+
+func (c *RaftCluster) Members() []*Member {
+ c.Lock()
+ defer c.Unlock()
+ var ms MembersByID
+ for _, m := range c.members {
+ ms = append(ms, m.Clone())
+ }
+ sort.Sort(ms)
+ return ms
+}
+
+func (c *RaftCluster) Member(id types.ID) *Member {
+ c.Lock()
+ defer c.Unlock()
+ return c.members[id].Clone()
+}
+
+func (c *RaftCluster) VotingMembers() []*Member {
+ c.Lock()
+ defer c.Unlock()
+ var ms MembersByID
+ for _, m := range c.members {
+ if !m.IsLearner {
+ ms = append(ms, m.Clone())
+ }
+ }
+ sort.Sort(ms)
+ return ms
+}
+
+// MemberByName returns a Member with the given name if exists.
+// If more than one member has the given name, it will panic.
+func (c *RaftCluster) MemberByName(name string) *Member {
+ c.Lock()
+ defer c.Unlock()
+ var memb *Member
+ for _, m := range c.members {
+ if m.Name == name {
+ if memb != nil {
+ c.lg.Panic("two member with same name found", zap.String("name", name))
+ }
+ memb = m
+ }
+ }
+ return memb.Clone()
+}
+
+func (c *RaftCluster) MemberIDs() []types.ID {
+ c.Lock()
+ defer c.Unlock()
+ var ids []types.ID
+ for _, m := range c.members {
+ ids = append(ids, m.ID)
+ }
+ sort.Sort(types.IDSlice(ids))
+ return ids
+}
+
+func (c *RaftCluster) IsIDRemoved(id types.ID) bool {
+ c.Lock()
+ defer c.Unlock()
+ return c.removed[id]
+}
+
+// PeerURLs returns a list of all peer addresses.
+// The returned list is sorted in ascending lexicographical order.
+func (c *RaftCluster) PeerURLs() []string {
+ c.Lock()
+ defer c.Unlock()
+ urls := make([]string, 0)
+ for _, p := range c.members {
+ urls = append(urls, p.PeerURLs...)
+ }
+ sort.Strings(urls)
+ return urls
+}
+
+// ClientURLs returns a list of all client addresses.
+// The returned list is sorted in ascending lexicographical order.
+func (c *RaftCluster) ClientURLs() []string {
+ c.Lock()
+ defer c.Unlock()
+ urls := make([]string, 0)
+ for _, p := range c.members {
+ urls = append(urls, p.ClientURLs...)
+ }
+ sort.Strings(urls)
+ return urls
+}
+
+func (c *RaftCluster) String() string {
+ c.Lock()
+ defer c.Unlock()
+ b := &strings.Builder{}
+ fmt.Fprintf(b, "{ClusterID:%s ", c.cid)
+ var ms []string
+ for _, m := range c.members {
+ ms = append(ms, fmt.Sprintf("%+v", m))
+ }
+ fmt.Fprintf(b, "Members:[%s] ", strings.Join(ms, " "))
+ var ids []string
+ for id := range c.removed {
+ ids = append(ids, id.String())
+ }
+ fmt.Fprintf(b, "RemovedMemberIDs:[%s]}", strings.Join(ids, " "))
+ return b.String()
+}
+
+func (c *RaftCluster) genID() {
+ mIDs := c.MemberIDs()
+ b := make([]byte, 8*len(mIDs))
+ for i, id := range mIDs {
+ binary.BigEndian.PutUint64(b[8*i:], uint64(id))
+ }
+ hash := sha1.Sum(b)
+ c.cid = types.ID(binary.BigEndian.Uint64(hash[:8]))
+}
+
+func (c *RaftCluster) SetID(localID, cid types.ID) {
+ c.localID = localID
+ c.cid = cid
+ c.buildMembershipMetric()
+}
+
+func (c *RaftCluster) SetStore(st v2store.Store) { c.v2store = st }
+
+func (c *RaftCluster) SetBackend(be MembershipBackend) {
+ c.be = be
+ c.be.MustCreateBackendBuckets()
+}
+
+func (c *RaftCluster) SetVersionChangedNotifier(n *notify.Notifier) {
+ c.versionChanged = n
+}
+
+func (c *RaftCluster) UnsafeLoad() {
+ if c.be != nil {
+ c.version = c.be.ClusterVersionFromBackend()
+ c.members, c.removed = c.be.MustReadMembersFromBackend()
+ } else {
+ c.version = clusterVersionFromStore(c.lg, c.v2store)
+ c.members, c.removed = membersFromStore(c.lg, c.v2store)
+ }
+
+ if c.be != nil {
+ c.downgradeInfo = c.be.DowngradeInfoFromBackend()
+ }
+}
+
+func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) {
+ c.Lock()
+ defer c.Unlock()
+
+ c.UnsafeLoad()
+
+ c.buildMembershipMetric()
+
+ sv := semver.Must(semver.NewVersion(version.Version))
+ if c.downgradeInfo != nil && c.downgradeInfo.Enabled {
+ c.lg.Info(
+ "cluster is downgrading to target version",
+ zap.String("target-cluster-version", c.downgradeInfo.TargetVersion),
+ zap.String("current-server-version", sv.String()),
+ )
+ }
+ serverversion.MustDetectDowngrade(c.lg, sv, c.version)
+ onSet(c.lg, c.version)
+
+ for _, m := range c.members {
+ if c.localID == m.ID {
+ setIsLearnerMetric(m)
+ }
+
+ c.lg.Info(
+ "recovered/added member from store",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("recovered-remote-peer-id", m.ID.String()),
+ zap.Strings("recovered-remote-peer-urls", m.PeerURLs),
+ zap.Bool("recovered-remote-peer-is-learner", m.IsLearner),
+ )
+ }
+ if c.version != nil {
+ c.lg.Info(
+ "set cluster version from store",
+ zap.String("cluster-version", version.Cluster(c.version.String())),
+ )
+ }
+}
+
+// ValidateConfigurationChange takes a proposed ConfChange and
+// ensures that it is still valid.
+func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange, shouldApplyV3 ShouldApplyV3) error {
+ var membersMap map[types.ID]*Member
+ var removedMap map[types.ID]bool
+
+ if shouldApplyV3 {
+ membersMap, removedMap = c.be.MustReadMembersFromBackend()
+ } else {
+ membersMap, removedMap = membersFromStore(c.lg, c.v2store)
+ }
+
+ id := types.ID(cc.NodeID)
+ if removedMap[id] {
+ return ErrIDRemoved
+ }
+ switch cc.Type {
+ case raftpb.ConfChangeAddNode, raftpb.ConfChangeAddLearnerNode:
+ confChangeContext := new(ConfigChangeContext)
+ if err := json.Unmarshal(cc.Context, confChangeContext); err != nil {
+ c.lg.Panic("failed to unmarshal confChangeContext", zap.Error(err))
+ }
+
+ if confChangeContext.IsPromote { // promoting a learner member to voting member
+ if membersMap[id] == nil {
+ return ErrIDNotFound
+ }
+ if !membersMap[id].IsLearner {
+ return ErrMemberNotLearner
+ }
+ } else { // adding a new member
+ if membersMap[id] != nil {
+ return ErrIDExists
+ }
+
+ var members []*Member
+ urls := make(map[string]bool)
+ for _, m := range membersMap {
+ members = append(members, m)
+ for _, u := range m.PeerURLs {
+ urls[u] = true
+ }
+ }
+ for _, u := range confChangeContext.Member.PeerURLs {
+ if urls[u] {
+ return ErrPeerURLexists
+ }
+ }
+
+ if confChangeContext.Member.RaftAttributes.IsLearner && cc.Type == raftpb.ConfChangeAddLearnerNode { // the new member is a learner
+ scaleUpLearners := true
+ if err := ValidateMaxLearnerConfig(c.maxLearners, members, scaleUpLearners); err != nil {
+ return err
+ }
+ }
+ }
+ case raftpb.ConfChangeRemoveNode:
+ if membersMap[id] == nil {
+ return ErrIDNotFound
+ }
+
+ case raftpb.ConfChangeUpdateNode:
+ if membersMap[id] == nil {
+ return ErrIDNotFound
+ }
+ urls := make(map[string]bool)
+ for _, m := range membersMap {
+ if m.ID == id {
+ continue
+ }
+ for _, u := range m.PeerURLs {
+ urls[u] = true
+ }
+ }
+ m := new(Member)
+ if err := json.Unmarshal(cc.Context, m); err != nil {
+ c.lg.Panic("failed to unmarshal member", zap.Error(err))
+ }
+ for _, u := range m.PeerURLs {
+ if urls[u] {
+ return ErrPeerURLexists
+ }
+ }
+
+ default:
+ c.lg.Panic("unknown ConfChange type", zap.String("type", cc.Type.String()))
+ }
+ return nil
+}
+
+// AddMember adds a new Member into the cluster, and saves the given member's
+// raftAttributes into the store. The given member should have empty attributes.
+// A Member with a matching id must not exist.
+func (c *RaftCluster) AddMember(m *Member, shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+ if c.v2store != nil {
+ mustSaveMemberToStore(c.lg, c.v2store, m)
+ }
+
+ if m.ID == c.localID {
+ setIsLearnerMetric(m)
+ }
+
+ if c.be != nil && shouldApplyV3 {
+ c.be.MustSaveMemberToBackend(m)
+
+ c.members[m.ID] = m
+ c.updateMembershipMetric(m.ID, true)
+
+ c.lg.Info(
+ "added member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("added-peer-id", m.ID.String()),
+ zap.Strings("added-peer-peer-urls", m.PeerURLs),
+ zap.Bool("added-peer-is-learner", m.IsLearner),
+ )
+ } else {
+ c.lg.Info(
+ "ignore already added member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("added-peer-id", m.ID.String()),
+ zap.Strings("added-peer-peer-urls", m.PeerURLs),
+ zap.Bool("added-peer-is-learner", m.IsLearner))
+ }
+}
+
+// RemoveMember removes a member from the store.
+// The given id MUST exist, or the function panics.
+func (c *RaftCluster) RemoveMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+ if c.v2store != nil {
+ mustDeleteMemberFromStore(c.lg, c.v2store, id)
+ }
+ if c.be != nil && shouldApplyV3 {
+ c.be.MustDeleteMemberFromBackend(id)
+
+ m, ok := c.members[id]
+ delete(c.members, id)
+ c.removed[id] = true
+ c.updateMembershipMetric(id, false)
+
+ if ok {
+ c.lg.Info(
+ "removed member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("removed-remote-peer-id", id.String()),
+ zap.Strings("removed-remote-peer-urls", m.PeerURLs),
+ zap.Bool("removed-remote-peer-is-learner", m.IsLearner),
+ )
+ } else {
+ c.lg.Warn(
+ "skipped removing already removed member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("removed-remote-peer-id", id.String()),
+ )
+ }
+ } else {
+ c.lg.Info(
+ "ignore already removed member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("removed-remote-peer-id", id.String()),
+ )
+ }
+}
+
+func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes, shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+
+ if m, ok := c.members[id]; ok {
+ m.Attributes = attr
+ if c.v2store != nil {
+ mustUpdateMemberAttrInStore(c.lg, c.v2store, m)
+ }
+ if c.be != nil && shouldApplyV3 {
+ c.be.MustSaveMemberToBackend(m)
+ }
+ return
+ }
+
+ _, ok := c.removed[id]
+ if !ok {
+ c.lg.Panic(
+ "failed to update; member unknown",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("unknown-remote-peer-id", id.String()),
+ )
+ }
+
+ c.lg.Warn(
+ "skipped attributes update of removed member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("updated-peer-id", id.String()),
+ )
+}
+
+// PromoteMember marks the member's IsLearner RaftAttributes to false.
+func (c *RaftCluster) PromoteMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+
+ if c.v2store != nil {
+ membersMap, _ := membersFromStore(c.lg, c.v2store)
+ if _, ok := membersMap[id]; ok {
+ m := *(membersMap[id])
+ m.RaftAttributes.IsLearner = false
+ mustUpdateMemberInStore(c.lg, c.v2store, &m)
+ } else {
+ c.lg.Info("Skipped promoting non-existent member in v2store",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("promoted-member-id", id.String()),
+ )
+ }
+ }
+
+ if id == c.localID {
+ isLearner.Set(0)
+ }
+
+ if c.be != nil {
+ m := c.members[id]
+ if shouldApplyV3 {
+ m.RaftAttributes.IsLearner = false
+ c.updateMembershipMetric(id, true)
+ c.be.MustSaveMemberToBackend(m)
+
+ c.lg.Info(
+ "promote member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("promoted-member-id", id.String()),
+ )
+ } else {
+ // Workaround the issues which have already been affected by
+ // https://github.com/etcd-io/etcd/issues/19557. The learner
+ // promotion request had been applied to v3store, but not saved
+ // to v2snapshot yet when in 3.5. Once upgrading to 3.6, the
+ // patch here ensure the issue can be automatically fixed.
+ if m == nil {
+ c.lg.Info(
+ "Skipped forcibly promoting non-existent member in v3store",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("promoted-member-id", id.String()),
+ )
+ } else if m.IsLearner {
+ m.RaftAttributes.IsLearner = false
+ c.lg.Info("Forcibly apply member promotion request in v3store", zap.String("member", fmt.Sprintf("%+v", *m)))
+ c.be.MustHackySaveMemberToBackend(m)
+ } else {
+ c.lg.Info(
+ "ignore already promoted member in v3store",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("promoted-member-id", id.String()),
+ )
+ }
+ }
+ } else {
+ c.lg.Info(
+ "ignore already promoted member due to backend being nil",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("promoted-member-id", id.String()),
+ )
+ }
+}
+
+// SyncLearnerPromotionIfNeeded provides a workaround solution to fix the issues
+// which have already been affected by https://github.com/etcd-io/etcd/issues/19557.
+func (c *RaftCluster) SyncLearnerPromotionIfNeeded() {
+ c.Lock()
+ defer c.Unlock()
+
+ v2Members, _ := membersFromStore(c.lg, c.v2store)
+ v3Members, _ := c.be.MustReadMembersFromBackend()
+
+ for id, v3Member := range v3Members {
+ v2Member, ok := v2Members[id]
+ if !ok {
+ // This isn't an error. The conf change on the member hasn't been saved to the v2 snapshot yet.
+ c.lg.Info("Detected member only in v3store but missing in v2store", zap.String("member", fmt.Sprintf("%+v", *v3Member)))
+ continue
+ }
+
+ if !v2Member.IsLearner && v3Member.IsLearner {
+ syncedV3Member := v3Member.Clone()
+ syncedV3Member.IsLearner = false
+ c.lg.Warn("Syncing member in v3store", zap.String("member", fmt.Sprintf("%+v", *syncedV3Member)))
+ c.be.MustHackySaveMemberToBackend(syncedV3Member)
+ }
+ }
+}
+
+func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes, shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+
+ if c.v2store != nil {
+ if _, ok := c.members[id]; ok {
+ m := *(c.members[id])
+ m.RaftAttributes = raftAttr
+ mustUpdateMemberInStore(c.lg, c.v2store, &m)
+ } else {
+ c.lg.Info("Skipped updating non-existent member in v2store",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("updated-remote-peer-id", id.String()),
+ zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs),
+ zap.Bool("updated-remote-peer-is-learner", raftAttr.IsLearner),
+ )
+ }
+ }
+ if c.be != nil && shouldApplyV3 {
+ c.members[id].RaftAttributes = raftAttr
+ c.be.MustSaveMemberToBackend(c.members[id])
+
+ c.lg.Info(
+ "updated member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("updated-remote-peer-id", id.String()),
+ zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs),
+ zap.Bool("updated-remote-peer-is-learner", raftAttr.IsLearner),
+ )
+ } else {
+ c.lg.Info(
+ "ignored already updated member",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("updated-remote-peer-id", id.String()),
+ zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs),
+ zap.Bool("updated-remote-peer-is-learner", raftAttr.IsLearner),
+ )
+ }
+}
+
+func (c *RaftCluster) Version() *semver.Version {
+ c.Lock()
+ defer c.Unlock()
+ if c.version == nil {
+ return nil
+ }
+ return semver.Must(semver.NewVersion(c.version.String()))
+}
+
+func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *semver.Version), shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+ if c.version != nil {
+ c.lg.Info(
+ "updated cluster version",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("from", version.Cluster(c.version.String())),
+ zap.String("to", version.Cluster(ver.String())),
+ )
+ } else {
+ c.lg.Info(
+ "set initial cluster version",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ zap.String("cluster-version", version.Cluster(ver.String())),
+ )
+ }
+ oldVer := c.version
+ c.version = ver
+ sv := semver.Must(semver.NewVersion(version.Version))
+ serverversion.MustDetectDowngrade(c.lg, sv, c.version)
+ if c.v2store != nil {
+ mustSaveClusterVersionToStore(c.lg, c.v2store, ver)
+ }
+ if c.be != nil && shouldApplyV3 {
+ c.be.MustSaveClusterVersionToBackend(ver)
+ }
+ if oldVer != nil {
+ ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(oldVer.String())}).Set(0)
+ }
+ ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(ver.String())}).Set(1)
+ if c.versionChanged != nil {
+ c.versionChanged.Notify()
+ }
+ onSet(c.lg, ver)
+}
+
+func (c *RaftCluster) IsReadyToAddVotingMember() bool {
+ nmembers := 1
+ nstarted := 0
+
+ for _, member := range c.VotingMembers() {
+ if member.IsStarted() {
+ nstarted++
+ }
+ nmembers++
+ }
+
+ if nstarted == 1 && nmembers == 2 {
+ // a case of adding a new node to 1-member cluster for restoring cluster data
+ // https://github.com/etcd-io/website/blob/main/content/docs/v2/admin_guide.md#restoring-the-cluster
+ c.lg.Debug("number of started member is 1; can accept add member request")
+ return true
+ }
+
+ nquorum := nmembers/2 + 1
+ if nstarted < nquorum {
+ c.lg.Warn(
+ "rejecting member add; started member will be less than quorum",
+ zap.Int("number-of-started-member", nstarted),
+ zap.Int("quorum", nquorum),
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ )
+ return false
+ }
+
+ return true
+}
+
+func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
+ nmembers := 0
+ nstarted := 0
+
+ for _, member := range c.VotingMembers() {
+ if uint64(member.ID) == id {
+ continue
+ }
+
+ if member.IsStarted() {
+ nstarted++
+ }
+ nmembers++
+ }
+
+ nquorum := nmembers/2 + 1
+ if nstarted < nquorum {
+ c.lg.Warn(
+ "rejecting member remove; started member will be less than quorum",
+ zap.Int("number-of-started-member", nstarted),
+ zap.Int("quorum", nquorum),
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ )
+ return false
+ }
+
+ return true
+}
+
+func (c *RaftCluster) IsReadyToPromoteMember(id uint64) bool {
+ nmembers := 1 // We count the learner to be promoted for the future quorum
+ nstarted := 1 // and we also count it as started.
+
+ for _, member := range c.VotingMembers() {
+ if member.IsStarted() {
+ nstarted++
+ }
+ nmembers++
+ }
+
+ nquorum := nmembers/2 + 1
+ if nstarted < nquorum {
+ c.lg.Warn(
+ "rejecting member promote; started member will be less than quorum",
+ zap.Int("number-of-started-member", nstarted),
+ zap.Int("quorum", nquorum),
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ )
+ return false
+ }
+
+ return true
+}
+
+func (c *RaftCluster) MembersFromBackend() (map[types.ID]*Member, map[types.ID]bool) {
+ return c.be.MustReadMembersFromBackend()
+}
+
+func (c *RaftCluster) MembersFromStore() (map[types.ID]*Member, map[types.ID]bool) {
+ return membersFromStore(c.lg, c.v2store)
+}
+
+func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
+ members := make(map[types.ID]*Member)
+ removed := make(map[types.ID]bool)
+ e, err := st.Get(StoreMembersPrefix, true, true)
+ if err != nil {
+ if isKeyNotFound(err) {
+ return members, removed
+ }
+ lg.Panic("failed to get members from store", zap.String("path", StoreMembersPrefix), zap.Error(err))
+ }
+ for _, n := range e.Node.Nodes {
+ var m *Member
+ m, err = nodeToMember(lg, n)
+ if err != nil {
+ lg.Panic("failed to nodeToMember", zap.Error(err))
+ }
+ members[m.ID] = m
+ }
+
+ e, err = st.Get(storeRemovedMembersPrefix, true, true)
+ if err != nil {
+ if isKeyNotFound(err) {
+ return members, removed
+ }
+ lg.Panic(
+ "failed to get removed members from store",
+ zap.String("path", storeRemovedMembersPrefix),
+ zap.Error(err),
+ )
+ }
+ for _, n := range e.Node.Nodes {
+ removed[MustParseMemberIDFromKey(lg, n.Key)] = true
+ }
+ return members, removed
+}
+
+// ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs
+// with the existing cluster. If the validation succeeds, it assigns the IDs
+// from the existing cluster to the local cluster.
+// If the validation fails, an error will be returned.
+func ValidateClusterAndAssignIDs(lg *zap.Logger, local *RaftCluster, existing *RaftCluster) error {
+ ems := existing.Members()
+ lms := local.Members()
+ if len(ems) != len(lms) {
+ return fmt.Errorf("member count is unequal")
+ }
+
+ ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
+ defer cancel()
+ for i := range ems {
+ var err error
+ ok := false
+ for j := range lms {
+ if ok, err = netutil.URLStringsEqual(ctx, lg, ems[i].PeerURLs, lms[j].PeerURLs); ok {
+ lms[j].ID = ems[i].ID
+ break
+ }
+ }
+ if !ok {
+ return fmt.Errorf("PeerURLs: no match found for existing member (%v, %v), last resolver error (%w)", ems[i].ID, ems[i].PeerURLs, err)
+ }
+ }
+ local.members = make(map[types.ID]*Member)
+ for _, m := range lms {
+ local.members[m.ID] = m
+ }
+ local.buildMembershipMetric()
+ return nil
+}
+
+// IsLocalMemberLearner returns if the local member is raft learner
+func (c *RaftCluster) IsLocalMemberLearner() bool {
+ c.Lock()
+ defer c.Unlock()
+ localMember, ok := c.members[c.localID]
+ if !ok {
+ c.lg.Panic(
+ "failed to find local ID in cluster members",
+ zap.String("cluster-id", c.cid.String()),
+ zap.String("local-member-id", c.localID.String()),
+ )
+ }
+ return localMember.IsLearner
+}
+
+// DowngradeInfo returns the downgrade status of the cluster
+func (c *RaftCluster) DowngradeInfo() *serverversion.DowngradeInfo {
+ c.Lock()
+ defer c.Unlock()
+ if c.downgradeInfo == nil {
+ return &serverversion.DowngradeInfo{Enabled: false}
+ }
+ d := &serverversion.DowngradeInfo{Enabled: c.downgradeInfo.Enabled, TargetVersion: c.downgradeInfo.TargetVersion}
+ return d
+}
+
+func (c *RaftCluster) SetDowngradeInfo(d *serverversion.DowngradeInfo, shouldApplyV3 ShouldApplyV3) {
+ c.Lock()
+ defer c.Unlock()
+
+ if c.be != nil && shouldApplyV3 {
+ c.be.MustSaveDowngradeToBackend(d)
+ }
+
+ c.downgradeInfo = d
+}
+
+// IsMemberExist returns if the member with the given id exists in cluster.
+func (c *RaftCluster) IsMemberExist(id types.ID) bool {
+ c.Lock()
+ _, ok := c.members[id]
+ c.Unlock()
+
+ // gofail: var afterIsMemberExist struct{}
+ return ok
+}
+
+// VotingMemberIDs returns the ID of voting members in cluster.
+func (c *RaftCluster) VotingMemberIDs() []types.ID {
+ c.Lock()
+ defer c.Unlock()
+ var ids []types.ID
+ for _, m := range c.members {
+ if !m.IsLearner {
+ ids = append(ids, m.ID)
+ }
+ }
+ sort.Sort(types.IDSlice(ids))
+ return ids
+}
+
+// buildMembershipMetric sets the knownPeers metric based on the current
+// members of the cluster.
+func (c *RaftCluster) buildMembershipMetric() {
+ if c.localID == 0 {
+ // We don't know our own id yet.
+ return
+ }
+ for p := range c.members {
+ knownPeers.WithLabelValues(c.localID.String(), p.String()).Set(1)
+ }
+ for p := range c.removed {
+ knownPeers.WithLabelValues(c.localID.String(), p.String()).Set(0)
+ }
+}
+
+// updateMembershipMetric updates the knownPeers metric to indicate that
+// the given peer is now (un)known.
+func (c *RaftCluster) updateMembershipMetric(peer types.ID, known bool) {
+ if c.localID == 0 {
+ // We don't know our own id yet.
+ return
+ }
+ v := float64(0)
+ if known {
+ v = 1
+ }
+ knownPeers.WithLabelValues(c.localID.String(), peer.String()).Set(v)
+}
+
+// ValidateMaxLearnerConfig verifies the existing learner members in the cluster membership and an optional N+1 learner
+// scale up are not more than maxLearners.
+func ValidateMaxLearnerConfig(maxLearners int, members []*Member, scaleUpLearners bool) error {
+ numLearners := 0
+ for _, m := range members {
+ if m.IsLearner {
+ numLearners++
+ }
+ }
+ // Validate config can accommodate scale up.
+ if scaleUpLearners {
+ numLearners++
+ }
+
+ if numLearners > maxLearners {
+ return ErrTooManyLearners
+ }
+
+ return nil
+}
+
+func (c *RaftCluster) Store(store v2store.Store) {
+ c.Lock()
+ defer c.Unlock()
+
+ verifyNoMembersInStore(c.lg, store)
+
+ for _, m := range c.members {
+ mustSaveMemberToStore(c.lg, store, m)
+ if m.ClientURLs != nil {
+ mustUpdateMemberAttrInStore(c.lg, store, m)
+ }
+ c.lg.Debug(
+ "snapshot storing member",
+ zap.String("id", m.ID.String()),
+ zap.Strings("peer-urls", m.PeerURLs),
+ zap.Bool("is-learner", m.IsLearner),
+ )
+ }
+ for id := range c.removed {
+ // We do not need to delete the member since the store is empty.
+ mustAddToRemovedMembersInStore(c.lg, store, id)
+ }
+ if c.version != nil {
+ mustSaveClusterVersionToStore(c.lg, store, c.version)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/cluster_opts.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/cluster_opts.go
new file mode 100644
index 0000000..204fbf0
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/cluster_opts.go
@@ -0,0 +1,43 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+const DefaultMaxLearners = 1
+
+type ClusterOptions struct {
+ maxLearners int
+}
+
+// ClusterOption are options which can be applied to the raft cluster.
+type ClusterOption func(*ClusterOptions)
+
+func newClusterOpts(opts ...ClusterOption) *ClusterOptions {
+ clOpts := &ClusterOptions{}
+ clOpts.applyOpts(opts)
+ return clOpts
+}
+
+func (co *ClusterOptions) applyOpts(opts []ClusterOption) {
+ for _, opt := range opts {
+ opt(co)
+ }
+}
+
+// WithMaxLearners sets the maximum number of learners that can exist in the cluster membership.
+func WithMaxLearners(max int) ClusterOption {
+ return func(co *ClusterOptions) {
+ co.maxLearners = max
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/doc.go
new file mode 100644
index 0000000..b07fb2d
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package membership describes individual etcd members and clusters of members.
+package membership
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/errors.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/errors.go
new file mode 100644
index 0000000..ff68297
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/errors.go
@@ -0,0 +1,35 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+import (
+ "errors"
+
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2error"
+)
+
+var (
+ ErrIDRemoved = errors.New("membership: ID removed")
+ ErrIDExists = errors.New("membership: ID exists")
+ ErrIDNotFound = errors.New("membership: ID not found")
+ ErrPeerURLexists = errors.New("membership: peerURL exists")
+ ErrMemberNotLearner = errors.New("membership: can only promote a learner member")
+ ErrTooManyLearners = errors.New("membership: too many learner members in cluster")
+)
+
+func isKeyNotFound(err error) bool {
+ var e *v2error.Error
+ return errors.As(err, &e) && e.ErrorCode == v2error.EcodeKeyNotFound
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/member.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/member.go
new file mode 100644
index 0000000..b6037bf
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/member.go
@@ -0,0 +1,132 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+import (
+ "crypto/sha1"
+ "encoding/binary"
+ "fmt"
+ "sort"
+ "strings"
+ "time"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+)
+
+// RaftAttributes represents the raft related attributes of an etcd member.
+type RaftAttributes struct {
+ // PeerURLs is the list of peers in the raft cluster.
+ // TODO(philips): ensure these are URLs
+ PeerURLs []string `json:"peerURLs"`
+ // IsLearner indicates if the member is raft learner.
+ IsLearner bool `json:"isLearner,omitempty"`
+}
+
+// Attributes represents all the non-raft related attributes of an etcd member.
+type Attributes struct {
+ Name string `json:"name,omitempty"`
+ ClientURLs []string `json:"clientURLs,omitempty"`
+}
+
+type Member struct {
+ ID types.ID `json:"id"`
+ RaftAttributes
+ Attributes
+}
+
+// NewMember creates a Member without an ID and generates one based on the
+// cluster name, peer URLs, and time. This is used for bootstrapping/adding new member.
+func NewMember(name string, peerURLs types.URLs, clusterName string, now *time.Time) *Member {
+ memberID := computeMemberID(peerURLs, clusterName, now)
+ return newMember(name, peerURLs, memberID, false)
+}
+
+// NewMemberAsLearner creates a learner Member without an ID and generates one based on the
+// cluster name, peer URLs, and time. This is used for adding new learner member.
+func NewMemberAsLearner(name string, peerURLs types.URLs, clusterName string, now *time.Time) *Member {
+ memberID := computeMemberID(peerURLs, clusterName, now)
+ return newMember(name, peerURLs, memberID, true)
+}
+
+func computeMemberID(peerURLs types.URLs, clusterName string, now *time.Time) types.ID {
+ peerURLstrs := peerURLs.StringSlice()
+ sort.Strings(peerURLstrs)
+ joinedPeerUrls := strings.Join(peerURLstrs, "")
+ b := []byte(joinedPeerUrls)
+
+ b = append(b, []byte(clusterName)...)
+ if now != nil {
+ b = append(b, []byte(fmt.Sprintf("%d", now.Unix()))...)
+ }
+
+ hash := sha1.Sum(b)
+ return types.ID(binary.BigEndian.Uint64(hash[:8]))
+}
+
+func newMember(name string, peerURLs types.URLs, memberID types.ID, isLearner bool) *Member {
+ m := &Member{
+ RaftAttributes: RaftAttributes{
+ PeerURLs: peerURLs.StringSlice(),
+ IsLearner: isLearner,
+ },
+ Attributes: Attributes{Name: name},
+ ID: memberID,
+ }
+ return m
+}
+
+func (m *Member) Clone() *Member {
+ if m == nil {
+ return nil
+ }
+ mm := &Member{
+ ID: m.ID,
+ RaftAttributes: RaftAttributes{
+ IsLearner: m.IsLearner,
+ },
+ Attributes: Attributes{
+ Name: m.Name,
+ },
+ }
+ if m.PeerURLs != nil {
+ mm.PeerURLs = make([]string, len(m.PeerURLs))
+ copy(mm.PeerURLs, m.PeerURLs)
+ }
+ if m.ClientURLs != nil {
+ mm.ClientURLs = make([]string, len(m.ClientURLs))
+ copy(mm.ClientURLs, m.ClientURLs)
+ }
+ return mm
+}
+
+func (m *Member) IsStarted() bool {
+ return len(m.Name) != 0
+}
+
+// MembersByID implements sort by ID interface
+type MembersByID []*Member
+
+func (ms MembersByID) Len() int { return len(ms) }
+func (ms MembersByID) Less(i, j int) bool { return ms[i].ID < ms[j].ID }
+func (ms MembersByID) Swap(i, j int) { ms[i], ms[j] = ms[j], ms[i] }
+
+// MembersByPeerURLs implements sort by peer urls interface
+type MembersByPeerURLs []*Member
+
+func (ms MembersByPeerURLs) Len() int { return len(ms) }
+func (ms MembersByPeerURLs) Less(i, j int) bool {
+ return ms[i].PeerURLs[0] < ms[j].PeerURLs[0]
+}
+func (ms MembersByPeerURLs) Swap(i, j int) { ms[i], ms[j] = ms[j], ms[i] }
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/metrics.go
new file mode 100644
index 0000000..ad98dbb
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/metrics.go
@@ -0,0 +1,58 @@
+// Copyright 2018 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+import "github.com/prometheus/client_golang/prometheus"
+
+var (
+ ClusterVersionMetrics = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "cluster",
+ Name: "version",
+ Help: "Which version is running. 1 for 'cluster_version' label with current cluster version",
+ },
+ []string{"cluster_version"},
+ )
+ knownPeers = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "known_peers",
+ Help: "The current number of known peers.",
+ },
+ []string{"Local", "Remote"},
+ )
+ isLearner = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "is_learner",
+ Help: "Whether or not this member is a learner. 1 if is, 0 otherwise.",
+ })
+)
+
+func setIsLearnerMetric(m *Member) {
+ if m.IsLearner {
+ isLearner.Set(1)
+ } else {
+ isLearner.Set(0)
+ }
+}
+
+func init() {
+ prometheus.MustRegister(ClusterVersionMetrics)
+ prometheus.MustRegister(knownPeers)
+ prometheus.MustRegister(isLearner)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/store.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/store.go
new file mode 100644
index 0000000..d4bb734
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/store.go
@@ -0,0 +1,58 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+import (
+ "path"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/version"
+)
+
+type MembershipBackend interface {
+ ClusterVersionBackend
+ MemberBackend
+ DowngradeInfoBackend
+ MustCreateBackendBuckets()
+}
+
+type ClusterVersionBackend interface {
+ ClusterVersionFromBackend() *semver.Version
+ MustSaveClusterVersionToBackend(version *semver.Version)
+}
+
+type MemberBackend interface {
+ MustReadMembersFromBackend() (map[types.ID]*Member, map[types.ID]bool)
+ MustSaveMemberToBackend(*Member)
+ MustHackySaveMemberToBackend(*Member)
+ TrimMembershipFromBackend() error
+ MustDeleteMemberFromBackend(types.ID)
+}
+
+type DowngradeInfoBackend interface {
+ MustSaveDowngradeToBackend(*version.DowngradeInfo)
+ DowngradeInfoFromBackend() *version.DowngradeInfo
+}
+
+func MustParseMemberIDFromKey(lg *zap.Logger, key string) types.ID {
+ id, err := types.IDFromString(path.Base(key))
+ if err != nil {
+ lg.Panic("failed to parse member id from key", zap.Error(err))
+ }
+ return id
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/storev2.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/storev2.go
new file mode 100644
index 0000000..0511505
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/membership/storev2.go
@@ -0,0 +1,258 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package membership
+
+import (
+ "encoding/json"
+ "fmt"
+ "path"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
+)
+
+const (
+ // the prefix for storing membership related information in store provided by store pkg.
+ storePrefix = "/0"
+
+ attributesSuffix = "attributes"
+ raftAttributesSuffix = "raftAttributes"
+)
+
+var (
+ StoreMembersPrefix = path.Join(storePrefix, "members")
+ storeRemovedMembersPrefix = path.Join(storePrefix, "removed_members")
+)
+
+// IsMetaStoreOnly verifies if the given `store` contains only
+// a meta-information (members, version) that can be recovered from the
+// backend (storev3) as well as opposed to user-data.
+func IsMetaStoreOnly(store v2store.Store) (bool, error) {
+ event, err := store.Get("/", true, false)
+ if err != nil {
+ return false, err
+ }
+
+ // storePermsPrefix is the internal prefix of the storage layer dedicated to storing user data.
+ // refer to https://github.com/etcd-io/etcd/blob/v3.5.21/server/etcdserver/api/v2auth/auth.go#L40
+ storePermsPrefix := "/2"
+ for _, n := range event.Node.Nodes {
+ if n.Key == storePrefix {
+ continue
+ }
+
+ // For auth data, even after we remove all users and roles, the node
+ // "/2/roles" and "/2/users" are still present in the tree. We need
+ // to exclude such case. See an example below,
+ // Refer to https://github.com/etcd-io/etcd/discussions/20231#discussioncomment-13791940
+ /*
+ "2": {
+ "Path": "/2",
+ "CreatedIndex": 204749,
+ "ModifiedIndex": 204749,
+ "ExpireTime": "0001-01-01T00:00:00Z",
+ "Value": "",
+ "Children": {
+ "enabled": {
+ "Path": "/2/enabled",
+ "CreatedIndex": 204752,
+ "ModifiedIndex": 16546016,
+ "ExpireTime": "0001-01-01T00:00:00Z",
+ "Value": "false",
+ "Children": null
+ },
+ "roles": {
+ "Path": "/2/roles",
+ "CreatedIndex": 204751,
+ "ModifiedIndex": 204751,
+ "ExpireTime": "0001-01-01T00:00:00Z",
+ "Value": "",
+ "Children": {}
+ },
+ "users": {
+ "Path": "/2/users",
+ "CreatedIndex": 204750,
+ "ModifiedIndex": 204750,
+ "ExpireTime": "0001-01-01T00:00:00Z",
+ "Value": "",
+ "Children": {}
+ }
+ }
+ }
+ */
+ if n.Key == storePermsPrefix {
+ if n.Nodes.Len() > 0 {
+ for _, child := range n.Nodes {
+ if child.Nodes.Len() > 0 {
+ return false, nil
+ }
+ }
+ }
+ continue
+ }
+
+ if n.Nodes.Len() > 0 {
+ return false, nil
+ }
+ }
+
+ return true, nil
+}
+
+func verifyNoMembersInStore(lg *zap.Logger, s v2store.Store) {
+ members, removed := membersFromStore(lg, s)
+ if len(members) != 0 || len(removed) != 0 {
+ lg.Panic("store has membership info")
+ }
+}
+
+func mustSaveMemberToStore(lg *zap.Logger, s v2store.Store, m *Member) {
+ b, err := json.Marshal(m.RaftAttributes)
+ if err != nil {
+ lg.Panic("failed to marshal raftAttributes", zap.Error(err))
+ }
+ p := path.Join(MemberStoreKey(m.ID), raftAttributesSuffix)
+ if _, err := s.Create(p, false, string(b), false, v2store.TTLOptionSet{ExpireTime: v2store.Permanent}); err != nil {
+ lg.Panic(
+ "failed to save member to store",
+ zap.String("path", p),
+ zap.Error(err),
+ )
+ }
+}
+
+func mustDeleteMemberFromStore(lg *zap.Logger, s v2store.Store, id types.ID) {
+ if _, err := s.Delete(MemberStoreKey(id), true, true); err != nil {
+ lg.Panic(
+ "failed to delete member from store",
+ zap.String("path", MemberStoreKey(id)),
+ zap.Error(err),
+ )
+ }
+
+ mustAddToRemovedMembersInStore(lg, s, id)
+}
+
+func mustAddToRemovedMembersInStore(lg *zap.Logger, s v2store.Store, id types.ID) {
+ if _, err := s.Create(RemovedMemberStoreKey(id), false, "", false, v2store.TTLOptionSet{ExpireTime: v2store.Permanent}); err != nil {
+ lg.Panic(
+ "failed to create removedMember",
+ zap.String("path", RemovedMemberStoreKey(id)),
+ zap.Error(err),
+ )
+ }
+}
+
+func mustUpdateMemberInStore(lg *zap.Logger, s v2store.Store, m *Member) {
+ b, err := json.Marshal(m.RaftAttributes)
+ if err != nil {
+ lg.Panic("failed to marshal raftAttributes", zap.Error(err))
+ }
+ p := path.Join(MemberStoreKey(m.ID), raftAttributesSuffix)
+ if _, err := s.Update(p, string(b), v2store.TTLOptionSet{ExpireTime: v2store.Permanent}); err != nil {
+ lg.Panic(
+ "failed to update raftAttributes",
+ zap.String("path", p),
+ zap.Error(err),
+ )
+ }
+}
+
+func mustUpdateMemberAttrInStore(lg *zap.Logger, s v2store.Store, m *Member) {
+ b, err := json.Marshal(m.Attributes)
+ if err != nil {
+ lg.Panic("failed to marshal attributes", zap.Error(err))
+ }
+ p := path.Join(MemberStoreKey(m.ID), attributesSuffix)
+ if _, err := s.Set(p, false, string(b), v2store.TTLOptionSet{ExpireTime: v2store.Permanent}); err != nil {
+ lg.Panic(
+ "failed to update attributes",
+ zap.String("path", p),
+ zap.Error(err),
+ )
+ }
+}
+
+func mustSaveClusterVersionToStore(lg *zap.Logger, s v2store.Store, ver *semver.Version) {
+ if _, err := s.Set(StoreClusterVersionKey(), false, ver.String(), v2store.TTLOptionSet{ExpireTime: v2store.Permanent}); err != nil {
+ lg.Panic(
+ "failed to save cluster version to store",
+ zap.String("path", StoreClusterVersionKey()),
+ zap.Error(err),
+ )
+ }
+}
+
+// nodeToMember builds member from a key value node.
+// the child nodes of the given node MUST be sorted by key.
+func nodeToMember(lg *zap.Logger, n *v2store.NodeExtern) (*Member, error) {
+ m := &Member{ID: MustParseMemberIDFromKey(lg, n.Key)}
+ attrs := make(map[string][]byte)
+ raftAttrKey := path.Join(n.Key, raftAttributesSuffix)
+ attrKey := path.Join(n.Key, attributesSuffix)
+ for _, nn := range n.Nodes {
+ if nn.Key != raftAttrKey && nn.Key != attrKey {
+ return nil, fmt.Errorf("unknown key %q", nn.Key)
+ }
+ attrs[nn.Key] = []byte(*nn.Value)
+ }
+ if data := attrs[raftAttrKey]; data != nil {
+ if err := json.Unmarshal(data, &m.RaftAttributes); err != nil {
+ return nil, fmt.Errorf("unmarshal raftAttributes error: %w", err)
+ }
+ } else {
+ return nil, fmt.Errorf("raftAttributes key doesn't exist")
+ }
+ if data := attrs[attrKey]; data != nil {
+ if err := json.Unmarshal(data, &m.Attributes); err != nil {
+ return m, fmt.Errorf("unmarshal attributes error: %w", err)
+ }
+ }
+ return m, nil
+}
+
+func StoreClusterVersionKey() string {
+ return path.Join(storePrefix, "version")
+}
+
+func RemovedMemberStoreKey(id types.ID) string {
+ return path.Join(storeRemovedMembersPrefix, id.String())
+}
+
+func MemberStoreKey(id types.ID) string {
+ return path.Join(StoreMembersPrefix, id.String())
+}
+
+func MemberAttributesStorePath(id types.ID) string {
+ return path.Join(MemberStoreKey(id), attributesSuffix)
+}
+
+func clusterVersionFromStore(lg *zap.Logger, st v2store.Store) *semver.Version {
+ e, err := st.Get(path.Join(storePrefix, "version"), false, false)
+ if err != nil {
+ if isKeyNotFound(err) {
+ return nil
+ }
+ lg.Panic(
+ "failed to get cluster version from store",
+ zap.String("path", path.Join(storePrefix, "version")),
+ zap.Error(err),
+ )
+ }
+ return semver.Must(semver.NewVersion(*e.Node.Value))
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/coder.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/coder.go
new file mode 100644
index 0000000..9774429
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/coder.go
@@ -0,0 +1,27 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import "go.etcd.io/raft/v3/raftpb"
+
+type encoder interface {
+ // encode encodes the given message to an output stream.
+ encode(m *raftpb.Message) error
+}
+
+type decoder interface {
+ // decode decodes the message from an input stream.
+ decode() (raftpb.Message, error)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/doc.go
new file mode 100644
index 0000000..c45dc81
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package rafthttp implements HTTP transportation layer for raft pkg.
+package rafthttp
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/http.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/http.go
new file mode 100644
index 0000000..2610240
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/http.go
@@ -0,0 +1,533 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "path"
+ "strings"
+ "time"
+
+ humanize "github.com/dustin/go-humanize"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ pioutil "go.etcd.io/etcd/pkg/v3/ioutil"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ // connReadLimitByte limits the number of bytes
+ // a single read can read out.
+ //
+ // 64KB should be large enough for not causing
+ // throughput bottleneck as well as small enough
+ // for not causing a read timeout.
+ connReadLimitByte = 64 * 1024
+
+ // snapshotLimitByte limits the snapshot size to 1TB
+ snapshotLimitByte = 1 * 1024 * 1024 * 1024 * 1024
+)
+
+var (
+ RaftPrefix = "/raft"
+ ProbingPrefix = path.Join(RaftPrefix, "probing")
+ RaftStreamPrefix = path.Join(RaftPrefix, "stream")
+ RaftSnapshotPrefix = path.Join(RaftPrefix, "snapshot")
+
+ errIncompatibleVersion = errors.New("incompatible version")
+ ErrClusterIDMismatch = errors.New("cluster ID mismatch")
+)
+
+type peerGetter interface {
+ Get(id types.ID) Peer
+}
+
+type writerToResponse interface {
+ WriteTo(w http.ResponseWriter)
+}
+
+type pipelineHandler struct {
+ lg *zap.Logger
+ localID types.ID
+ tr Transporter
+ r Raft
+ cid types.ID
+}
+
+// newPipelineHandler returns a handler for handling raft messages
+// from pipeline for RaftPrefix.
+//
+// The handler reads out the raft message from request body,
+// and forwards it to the given raft state machine for processing.
+func newPipelineHandler(t *Transport, r Raft, cid types.ID) http.Handler {
+ h := &pipelineHandler{
+ lg: t.Logger,
+ localID: t.ID,
+ tr: t,
+ r: r,
+ cid: cid,
+ }
+ if h.lg == nil {
+ h.lg = zap.NewNop()
+ }
+ return h
+}
+
+func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodPost {
+ w.Header().Set("Allow", "POST")
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
+
+ if err := checkClusterCompatibilityFromHeader(h.lg, h.localID, r.Header, h.cid); err != nil {
+ http.Error(w, err.Error(), http.StatusPreconditionFailed)
+ return
+ }
+
+ addRemoteFromRequest(h.tr, r)
+
+ // Limit the data size that could be read from the request body, which ensures that read from
+ // connection will not time out accidentally due to possible blocking in underlying implementation.
+ limitedr := pioutil.NewLimitedBufferReader(r.Body, connReadLimitByte)
+ b, err := io.ReadAll(limitedr)
+ if err != nil {
+ h.lg.Warn(
+ "failed to read Raft message",
+ zap.String("local-member-id", h.localID.String()),
+ zap.Error(err),
+ )
+ http.Error(w, "error reading raft message", http.StatusBadRequest)
+ recvFailures.WithLabelValues(r.RemoteAddr).Inc()
+ return
+ }
+
+ var m raftpb.Message
+ if err := m.Unmarshal(b); err != nil {
+ h.lg.Warn(
+ "failed to unmarshal Raft message",
+ zap.String("local-member-id", h.localID.String()),
+ zap.Error(err),
+ )
+ http.Error(w, "error unmarshalling raft message", http.StatusBadRequest)
+ recvFailures.WithLabelValues(r.RemoteAddr).Inc()
+ return
+ }
+
+ receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(len(b)))
+
+ if err := h.r.Process(context.TODO(), m); err != nil {
+ var writerErr writerToResponse
+ switch {
+ case errors.As(err, &writerErr):
+ writerErr.WriteTo(w)
+ default:
+ h.lg.Warn(
+ "failed to process Raft message",
+ zap.String("local-member-id", h.localID.String()),
+ zap.Error(err),
+ )
+ http.Error(w, "error processing raft message", http.StatusInternalServerError)
+ w.(http.Flusher).Flush()
+ // disconnect the http stream
+ panic(err)
+ }
+ return
+ }
+
+ // Write StatusNoContent header after the message has been processed by
+ // raft, which facilitates the client to report MsgSnap status.
+ w.WriteHeader(http.StatusNoContent)
+}
+
+type snapshotHandler struct {
+ lg *zap.Logger
+ tr Transporter
+ r Raft
+ snapshotter *snap.Snapshotter
+
+ localID types.ID
+ cid types.ID
+}
+
+func newSnapshotHandler(t *Transport, r Raft, snapshotter *snap.Snapshotter, cid types.ID) http.Handler {
+ h := &snapshotHandler{
+ lg: t.Logger,
+ tr: t,
+ r: r,
+ snapshotter: snapshotter,
+ localID: t.ID,
+ cid: cid,
+ }
+ if h.lg == nil {
+ h.lg = zap.NewNop()
+ }
+ return h
+}
+
+const unknownSnapshotSender = "UNKNOWN_SNAPSHOT_SENDER"
+
+// ServeHTTP serves HTTP request to receive and process snapshot message.
+//
+// If request sender dies without closing underlying TCP connection,
+// the handler will keep waiting for the request body until TCP keepalive
+// finds out that the connection is broken after several minutes.
+// This is acceptable because
+// 1. snapshot messages sent through other TCP connections could still be
+// received and processed.
+// 2. this case should happen rarely, so no further optimization is done.
+func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ start := time.Now()
+
+ if r.Method != http.MethodPost {
+ w.Header().Set("Allow", "POST")
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ snapshotReceiveFailures.WithLabelValues(unknownSnapshotSender).Inc()
+ return
+ }
+
+ w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
+
+ if err := checkClusterCompatibilityFromHeader(h.lg, h.localID, r.Header, h.cid); err != nil {
+ http.Error(w, err.Error(), http.StatusPreconditionFailed)
+ snapshotReceiveFailures.WithLabelValues(unknownSnapshotSender).Inc()
+ return
+ }
+
+ addRemoteFromRequest(h.tr, r)
+
+ dec := &messageDecoder{r: r.Body}
+ // let snapshots be very large since they can exceed 512MB for large installations
+ m, err := dec.decodeLimit(snapshotLimitByte)
+ from := types.ID(m.From).String()
+ if err != nil {
+ msg := fmt.Sprintf("failed to decode raft message (%v)", err)
+ h.lg.Warn(
+ "failed to decode Raft message",
+ zap.String("local-member-id", h.localID.String()),
+ zap.String("remote-snapshot-sender-id", from),
+ zap.Error(err),
+ )
+ http.Error(w, msg, http.StatusBadRequest)
+ recvFailures.WithLabelValues(r.RemoteAddr).Inc()
+ snapshotReceiveFailures.WithLabelValues(from).Inc()
+ return
+ }
+
+ msgSize := m.Size()
+ receivedBytes.WithLabelValues(from).Add(float64(msgSize))
+
+ if m.Type != raftpb.MsgSnap {
+ h.lg.Warn(
+ "unexpected Raft message type",
+ zap.String("local-member-id", h.localID.String()),
+ zap.String("remote-snapshot-sender-id", from),
+ zap.String("message-type", m.Type.String()),
+ )
+ http.Error(w, "wrong raft message type", http.StatusBadRequest)
+ snapshotReceiveFailures.WithLabelValues(from).Inc()
+ return
+ }
+
+ snapshotReceiveInflights.WithLabelValues(from).Inc()
+ defer func() {
+ snapshotReceiveInflights.WithLabelValues(from).Dec()
+ }()
+
+ h.lg.Info(
+ "receiving database snapshot",
+ zap.String("local-member-id", h.localID.String()),
+ zap.String("remote-snapshot-sender-id", from),
+ zap.Uint64("incoming-snapshot-index", m.Snapshot.Metadata.Index),
+ zap.Int("incoming-snapshot-message-size-bytes", msgSize),
+ zap.String("incoming-snapshot-message-size", humanize.Bytes(uint64(msgSize))),
+ )
+
+ // save incoming database snapshot.
+
+ n, err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index)
+ if err != nil {
+ msg := fmt.Sprintf("failed to save KV snapshot (%v)", err)
+ h.lg.Warn(
+ "failed to save incoming database snapshot",
+ zap.String("local-member-id", h.localID.String()),
+ zap.String("remote-snapshot-sender-id", from),
+ zap.Uint64("incoming-snapshot-index", m.Snapshot.Metadata.Index),
+ zap.Error(err),
+ )
+ http.Error(w, msg, http.StatusInternalServerError)
+ snapshotReceiveFailures.WithLabelValues(from).Inc()
+ return
+ }
+
+ receivedBytes.WithLabelValues(from).Add(float64(n))
+
+ downloadTook := time.Since(start)
+ h.lg.Info(
+ "received and saved database snapshot",
+ zap.String("local-member-id", h.localID.String()),
+ zap.String("remote-snapshot-sender-id", from),
+ zap.Uint64("incoming-snapshot-index", m.Snapshot.Metadata.Index),
+ zap.Int64("incoming-snapshot-size-bytes", n),
+ zap.String("incoming-snapshot-size", humanize.Bytes(uint64(n))),
+ zap.String("download-took", downloadTook.String()),
+ )
+
+ if err := h.r.Process(context.TODO(), m); err != nil {
+ var writerErr writerToResponse
+ switch {
+ // Process may return writerToResponse error when doing some
+ // additional checks before calling raft.Node.Step.
+ case errors.As(err, &writerErr):
+ writerErr.WriteTo(w)
+ default:
+ msg := fmt.Sprintf("failed to process raft message (%v)", err)
+ h.lg.Warn(
+ "failed to process Raft message",
+ zap.String("local-member-id", h.localID.String()),
+ zap.String("remote-snapshot-sender-id", from),
+ zap.Error(err),
+ )
+ http.Error(w, msg, http.StatusInternalServerError)
+ snapshotReceiveFailures.WithLabelValues(from).Inc()
+ }
+ return
+ }
+
+ // Write StatusNoContent header after the message has been processed by
+ // raft, which facilitates the client to report MsgSnap status.
+ w.WriteHeader(http.StatusNoContent)
+
+ snapshotReceive.WithLabelValues(from).Inc()
+ snapshotReceiveSeconds.WithLabelValues(from).Observe(time.Since(start).Seconds())
+}
+
+type streamHandler struct {
+ lg *zap.Logger
+ tr *Transport
+ peerGetter peerGetter
+ r Raft
+ id types.ID
+ cid types.ID
+}
+
+func newStreamHandler(t *Transport, pg peerGetter, r Raft, id, cid types.ID) http.Handler {
+ h := &streamHandler{
+ lg: t.Logger,
+ tr: t,
+ peerGetter: pg,
+ r: r,
+ id: id,
+ cid: cid,
+ }
+ if h.lg == nil {
+ h.lg = zap.NewNop()
+ }
+ return h
+}
+
+func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ w.Header().Set("Allow", "GET")
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ w.Header().Set("X-Server-Version", version.Version)
+ w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
+
+ if err := checkClusterCompatibilityFromHeader(h.lg, h.tr.ID, r.Header, h.cid); err != nil {
+ http.Error(w, err.Error(), http.StatusPreconditionFailed)
+ return
+ }
+
+ var t streamType
+ switch path.Dir(r.URL.Path) {
+ case streamTypeMsgAppV2.endpoint(h.lg):
+ t = streamTypeMsgAppV2
+ case streamTypeMessage.endpoint(h.lg):
+ t = streamTypeMessage
+ default:
+ h.lg.Debug(
+ "ignored unexpected streaming request path",
+ zap.String("local-member-id", h.tr.ID.String()),
+ zap.String("remote-peer-id-stream-handler", h.id.String()),
+ zap.String("path", r.URL.Path),
+ )
+ http.Error(w, "invalid path", http.StatusNotFound)
+ return
+ }
+
+ fromStr := path.Base(r.URL.Path)
+ from, err := types.IDFromString(fromStr)
+ if err != nil {
+ h.lg.Warn(
+ "failed to parse path into ID",
+ zap.String("local-member-id", h.tr.ID.String()),
+ zap.String("remote-peer-id-stream-handler", h.id.String()),
+ zap.String("path", fromStr),
+ zap.Error(err),
+ )
+ http.Error(w, "invalid from", http.StatusNotFound)
+ return
+ }
+ if h.r.IsIDRemoved(uint64(from)) {
+ h.lg.Warn(
+ "rejected stream from remote peer because it was removed",
+ zap.String("local-member-id", h.tr.ID.String()),
+ zap.String("remote-peer-id-stream-handler", h.id.String()),
+ zap.String("remote-peer-id-from", from.String()),
+ )
+ http.Error(w, "removed member", http.StatusGone)
+ return
+ }
+ p := h.peerGetter.Get(from)
+ if p == nil {
+ // This may happen in following cases:
+ // 1. user starts a remote peer that belongs to a different cluster
+ // with the same cluster ID.
+ // 2. local etcd falls behind of the cluster, and cannot recognize
+ // the members that joined after its current progress.
+ if urls := r.Header.Get("X-PeerURLs"); urls != "" {
+ h.tr.AddRemote(from, strings.Split(urls, ","))
+ }
+ h.lg.Warn(
+ "failed to find remote peer in cluster",
+ zap.String("local-member-id", h.tr.ID.String()),
+ zap.String("remote-peer-id-stream-handler", h.id.String()),
+ zap.String("remote-peer-id-from", from.String()),
+ zap.String("cluster-id", h.cid.String()),
+ )
+ http.Error(w, "error sender not found", http.StatusNotFound)
+ return
+ }
+
+ wto := h.id.String()
+ if gto := r.Header.Get("X-Raft-To"); gto != wto {
+ h.lg.Warn(
+ "ignored streaming request; ID mismatch",
+ zap.String("local-member-id", h.tr.ID.String()),
+ zap.String("remote-peer-id-stream-handler", h.id.String()),
+ zap.String("remote-peer-id-header", gto),
+ zap.String("remote-peer-id-from", from.String()),
+ zap.String("cluster-id", h.cid.String()),
+ )
+ http.Error(w, "to field mismatch", http.StatusPreconditionFailed)
+ return
+ }
+
+ w.WriteHeader(http.StatusOK)
+ w.(http.Flusher).Flush()
+
+ c := newCloseNotifier()
+ conn := &outgoingConn{
+ t: t,
+ Writer: w,
+ Flusher: w.(http.Flusher),
+ Closer: c,
+ localID: h.tr.ID,
+ peerID: from,
+ }
+ p.attachOutgoingConn(conn)
+ <-c.closeNotify()
+}
+
+// checkClusterCompatibilityFromHeader checks the cluster compatibility of
+// the local member from the given header.
+// It checks whether the version of local member is compatible with
+// the versions in the header, and whether the cluster ID of local member
+// matches the one in the header.
+func checkClusterCompatibilityFromHeader(lg *zap.Logger, localID types.ID, header http.Header, cid types.ID) error {
+ remoteName := header.Get("X-Server-From")
+
+ remoteServer := serverVersion(header)
+ remoteVs := ""
+ if remoteServer != nil {
+ remoteVs = remoteServer.String()
+ }
+
+ remoteMinClusterVer := minClusterVersion(header)
+ remoteMinClusterVs := ""
+ if remoteMinClusterVer != nil {
+ remoteMinClusterVs = remoteMinClusterVer.String()
+ }
+
+ localServer, localMinCluster, err := checkVersionCompatibility(remoteName, remoteServer, remoteMinClusterVer)
+
+ localVs := ""
+ if localServer != nil {
+ localVs = localServer.String()
+ }
+ localMinClusterVs := ""
+ if localMinCluster != nil {
+ localMinClusterVs = localMinCluster.String()
+ }
+
+ if err != nil {
+ lg.Warn(
+ "failed version compatibility check",
+ zap.String("local-member-id", localID.String()),
+ zap.String("local-member-cluster-id", cid.String()),
+ zap.String("local-member-server-version", localVs),
+ zap.String("local-member-server-minimum-cluster-version", localMinClusterVs),
+ zap.String("remote-peer-server-name", remoteName),
+ zap.String("remote-peer-server-version", remoteVs),
+ zap.String("remote-peer-server-minimum-cluster-version", remoteMinClusterVs),
+ zap.Error(err),
+ )
+ return errIncompatibleVersion
+ }
+ if gcid := header.Get("X-Etcd-Cluster-ID"); gcid != cid.String() {
+ lg.Warn(
+ "request cluster ID mismatch",
+ zap.String("local-member-id", localID.String()),
+ zap.String("local-member-cluster-id", cid.String()),
+ zap.String("local-member-server-version", localVs),
+ zap.String("local-member-server-minimum-cluster-version", localMinClusterVs),
+ zap.String("remote-peer-server-name", remoteName),
+ zap.String("remote-peer-server-version", remoteVs),
+ zap.String("remote-peer-server-minimum-cluster-version", remoteMinClusterVs),
+ zap.String("remote-peer-cluster-id", gcid),
+ )
+ return ErrClusterIDMismatch
+ }
+ return nil
+}
+
+type closeNotifier struct {
+ done chan struct{}
+}
+
+func newCloseNotifier() *closeNotifier {
+ return &closeNotifier{
+ done: make(chan struct{}),
+ }
+}
+
+func (n *closeNotifier) Close() error {
+ close(n.done)
+ return nil
+}
+
+func (n *closeNotifier) closeNotify() <-chan struct{} { return n.done }
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/metrics.go
new file mode 100644
index 0000000..f9e13e2
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/metrics.go
@@ -0,0 +1,201 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import "github.com/prometheus/client_golang/prometheus"
+
+var (
+ activePeers = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "active_peers",
+ Help: "The current number of active peer connections.",
+ },
+ []string{"Local", "Remote"},
+ )
+
+ disconnectedPeers = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "disconnected_peers_total",
+ Help: "The total number of disconnected peers.",
+ },
+ []string{"Local", "Remote"},
+ )
+
+ sentBytes = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "peer_sent_bytes_total",
+ Help: "The total number of bytes sent to peers.",
+ },
+ []string{"To"},
+ )
+
+ receivedBytes = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "peer_received_bytes_total",
+ Help: "The total number of bytes received from peers.",
+ },
+ []string{"From"},
+ )
+
+ sentFailures = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "peer_sent_failures_total",
+ Help: "The total number of send failures from peers.",
+ },
+ []string{"To"},
+ )
+
+ recvFailures = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "peer_received_failures_total",
+ Help: "The total number of receive failures from peers.",
+ },
+ []string{"From"},
+ )
+
+ snapshotSend = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_send_success",
+ Help: "Total number of successful snapshot sends",
+ },
+ []string{"To"},
+ )
+
+ snapshotSendInflights = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_send_inflights_total",
+ Help: "Total number of inflight snapshot sends",
+ },
+ []string{"To"},
+ )
+
+ snapshotSendFailures = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_send_failures",
+ Help: "Total number of snapshot send failures",
+ },
+ []string{"To"},
+ )
+
+ snapshotSendSeconds = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_send_total_duration_seconds",
+ Help: "Total latency distributions of v3 snapshot sends",
+
+ // lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2
+ // highest bucket start of 0.1 sec * 2^9 == 51.2 sec
+ Buckets: prometheus.ExponentialBuckets(0.1, 2, 10),
+ },
+ []string{"To"},
+ )
+
+ snapshotReceive = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_receive_success",
+ Help: "Total number of successful snapshot receives",
+ },
+ []string{"From"},
+ )
+
+ snapshotReceiveInflights = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_receive_inflights_total",
+ Help: "Total number of inflight snapshot receives",
+ },
+ []string{"From"},
+ )
+
+ snapshotReceiveFailures = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_receive_failures",
+ Help: "Total number of snapshot receive failures",
+ },
+ []string{"From"},
+ )
+
+ snapshotReceiveSeconds = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "snapshot_receive_total_duration_seconds",
+ Help: "Total latency distributions of v3 snapshot receives",
+
+ // lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2
+ // highest bucket start of 0.1 sec * 2^9 == 51.2 sec
+ Buckets: prometheus.ExponentialBuckets(0.1, 2, 10),
+ },
+ []string{"From"},
+ )
+
+ rttSec = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "peer_round_trip_time_seconds",
+ Help: "Round-Trip-Time histogram between peers",
+
+ // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
+ // highest bucket start of 0.0001 sec * 2^15 == 3.2768 sec
+ Buckets: prometheus.ExponentialBuckets(0.0001, 2, 16),
+ },
+ []string{"To"},
+ )
+)
+
+func init() {
+ prometheus.MustRegister(activePeers)
+ prometheus.MustRegister(disconnectedPeers)
+ prometheus.MustRegister(sentBytes)
+ prometheus.MustRegister(receivedBytes)
+ prometheus.MustRegister(sentFailures)
+ prometheus.MustRegister(recvFailures)
+
+ prometheus.MustRegister(snapshotSend)
+ prometheus.MustRegister(snapshotSendInflights)
+ prometheus.MustRegister(snapshotSendFailures)
+ prometheus.MustRegister(snapshotSendSeconds)
+ prometheus.MustRegister(snapshotReceive)
+ prometheus.MustRegister(snapshotReceiveInflights)
+ prometheus.MustRegister(snapshotReceiveFailures)
+ prometheus.MustRegister(snapshotReceiveSeconds)
+
+ prometheus.MustRegister(rttSec)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/msg_codec.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/msg_codec.go
new file mode 100644
index 0000000..5444c01
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/msg_codec.go
@@ -0,0 +1,68 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+
+ "go.etcd.io/etcd/pkg/v3/pbutil"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+// messageEncoder is a encoder that can encode all kinds of messages.
+// It MUST be used with a paired messageDecoder.
+type messageEncoder struct {
+ w io.Writer
+}
+
+func (enc *messageEncoder) encode(m *raftpb.Message) error {
+ if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil {
+ return err
+ }
+ _, err := enc.w.Write(pbutil.MustMarshal(m))
+ return err
+}
+
+// messageDecoder is a decoder that can decode all kinds of messages.
+type messageDecoder struct {
+ r io.Reader
+}
+
+var (
+ readBytesLimit uint64 = 512 * 1024 * 1024 // 512 MB
+ ErrExceedSizeLimit = errors.New("rafthttp: error limit exceeded")
+)
+
+func (dec *messageDecoder) decode() (raftpb.Message, error) {
+ return dec.decodeLimit(readBytesLimit)
+}
+
+func (dec *messageDecoder) decodeLimit(numBytes uint64) (raftpb.Message, error) {
+ var m raftpb.Message
+ var l uint64
+ if err := binary.Read(dec.r, binary.BigEndian, &l); err != nil {
+ return m, err
+ }
+ if l > numBytes {
+ return m, ErrExceedSizeLimit
+ }
+ buf := make([]byte, int(l))
+ if _, err := io.ReadFull(dec.r, buf); err != nil {
+ return m, err
+ }
+ return m, m.Unmarshal(buf)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/msgappv2_codec.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/msgappv2_codec.go
new file mode 100644
index 0000000..59425ae
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/msgappv2_codec.go
@@ -0,0 +1,248 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "time"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/pbutil"
+ stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ msgTypeLinkHeartbeat uint8 = 0
+ msgTypeAppEntries uint8 = 1
+ msgTypeApp uint8 = 2
+
+ msgAppV2BufSize = 1024 * 1024
+)
+
+// msgappv2 stream sends three types of message: linkHeartbeatMessage,
+// AppEntries and MsgApp. AppEntries is the MsgApp that is sent in
+// replicate state in raft, whose index and term are fully predictable.
+//
+// Data format of linkHeartbeatMessage:
+// | offset | bytes | description |
+// +--------+-------+-------------+
+// | 0 | 1 | \x00 |
+//
+// Data format of AppEntries:
+// | offset | bytes | description |
+// +--------+-------+-------------+
+// | 0 | 1 | \x01 |
+// | 1 | 8 | length of entries |
+// | 9 | 8 | length of first entry |
+// | 17 | n1 | first entry |
+// ...
+// | x | 8 | length of k-th entry data |
+// | x+8 | nk | k-th entry data |
+// | x+8+nk | 8 | commit index |
+//
+// Data format of MsgApp:
+// | offset | bytes | description |
+// +--------+-------+-------------+
+// | 0 | 1 | \x02 |
+// | 1 | 8 | length of encoded message |
+// | 9 | n | encoded message |
+type msgAppV2Encoder struct {
+ w io.Writer
+ fs *stats.FollowerStats
+
+ term uint64
+ index uint64
+ buf []byte
+ uint64buf []byte
+ uint8buf []byte
+}
+
+func newMsgAppV2Encoder(w io.Writer, fs *stats.FollowerStats) *msgAppV2Encoder {
+ return &msgAppV2Encoder{
+ w: w,
+ fs: fs,
+ buf: make([]byte, msgAppV2BufSize),
+ uint64buf: make([]byte, 8),
+ uint8buf: make([]byte, 1),
+ }
+}
+
+func (enc *msgAppV2Encoder) encode(m *raftpb.Message) error {
+ start := time.Now()
+ switch {
+ case isLinkHeartbeatMessage(m):
+ enc.uint8buf[0] = msgTypeLinkHeartbeat
+ if _, err := enc.w.Write(enc.uint8buf); err != nil {
+ return err
+ }
+ case enc.index == m.Index && enc.term == m.LogTerm && m.LogTerm == m.Term:
+ enc.uint8buf[0] = msgTypeAppEntries
+ if _, err := enc.w.Write(enc.uint8buf); err != nil {
+ return err
+ }
+ // write length of entries
+ binary.BigEndian.PutUint64(enc.uint64buf, uint64(len(m.Entries)))
+ if _, err := enc.w.Write(enc.uint64buf); err != nil {
+ return err
+ }
+ for i := 0; i < len(m.Entries); i++ {
+ // write length of entry
+ binary.BigEndian.PutUint64(enc.uint64buf, uint64(m.Entries[i].Size()))
+ if _, err := enc.w.Write(enc.uint64buf); err != nil {
+ return err
+ }
+ if n := m.Entries[i].Size(); n < msgAppV2BufSize {
+ if _, err := m.Entries[i].MarshalTo(enc.buf); err != nil {
+ return err
+ }
+ if _, err := enc.w.Write(enc.buf[:n]); err != nil {
+ return err
+ }
+ } else {
+ if _, err := enc.w.Write(pbutil.MustMarshal(&m.Entries[i])); err != nil {
+ return err
+ }
+ }
+ enc.index++
+ }
+ // write commit index
+ binary.BigEndian.PutUint64(enc.uint64buf, m.Commit)
+ if _, err := enc.w.Write(enc.uint64buf); err != nil {
+ return err
+ }
+ enc.fs.Succ(time.Since(start))
+ default:
+ if err := binary.Write(enc.w, binary.BigEndian, msgTypeApp); err != nil {
+ return err
+ }
+ // write size of message
+ if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil {
+ return err
+ }
+ // write message
+ if _, err := enc.w.Write(pbutil.MustMarshal(m)); err != nil {
+ return err
+ }
+
+ enc.term = m.Term
+ enc.index = m.Index
+ if l := len(m.Entries); l > 0 {
+ enc.index = m.Entries[l-1].Index
+ }
+ enc.fs.Succ(time.Since(start))
+ }
+ return nil
+}
+
+type msgAppV2Decoder struct {
+ r io.Reader
+ local, remote types.ID
+
+ term uint64
+ index uint64
+ buf []byte
+ uint64buf []byte
+ uint8buf []byte
+}
+
+func newMsgAppV2Decoder(r io.Reader, local, remote types.ID) *msgAppV2Decoder {
+ return &msgAppV2Decoder{
+ r: r,
+ local: local,
+ remote: remote,
+ buf: make([]byte, msgAppV2BufSize),
+ uint64buf: make([]byte, 8),
+ uint8buf: make([]byte, 1),
+ }
+}
+
+func (dec *msgAppV2Decoder) decode() (raftpb.Message, error) {
+ var (
+ m raftpb.Message
+ typ uint8
+ )
+ if _, err := io.ReadFull(dec.r, dec.uint8buf); err != nil {
+ return m, err
+ }
+ typ = dec.uint8buf[0]
+ switch typ {
+ case msgTypeLinkHeartbeat:
+ return linkHeartbeatMessage, nil
+ case msgTypeAppEntries:
+ m = raftpb.Message{
+ Type: raftpb.MsgApp,
+ From: uint64(dec.remote),
+ To: uint64(dec.local),
+ Term: dec.term,
+ LogTerm: dec.term,
+ Index: dec.index,
+ }
+
+ // decode entries
+ if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
+ return m, err
+ }
+ l := binary.BigEndian.Uint64(dec.uint64buf)
+ m.Entries = make([]raftpb.Entry, int(l))
+ for i := 0; i < int(l); i++ {
+ if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
+ return m, err
+ }
+ size := binary.BigEndian.Uint64(dec.uint64buf)
+ var buf []byte
+ if size < msgAppV2BufSize {
+ buf = dec.buf[:size]
+ if _, err := io.ReadFull(dec.r, buf); err != nil {
+ return m, err
+ }
+ } else {
+ buf = make([]byte, int(size))
+ if _, err := io.ReadFull(dec.r, buf); err != nil {
+ return m, err
+ }
+ }
+ dec.index++
+ // 1 alloc
+ pbutil.MustUnmarshal(&m.Entries[i], buf)
+ }
+ // decode commit index
+ if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
+ return m, err
+ }
+ m.Commit = binary.BigEndian.Uint64(dec.uint64buf)
+ case msgTypeApp:
+ var size uint64
+ if err := binary.Read(dec.r, binary.BigEndian, &size); err != nil {
+ return m, err
+ }
+ buf := make([]byte, int(size))
+ if _, err := io.ReadFull(dec.r, buf); err != nil {
+ return m, err
+ }
+ pbutil.MustUnmarshal(&m, buf)
+
+ dec.term = m.Term
+ dec.index = m.Index
+ if l := len(m.Entries); l > 0 {
+ dec.index = m.Entries[l-1].Index
+ }
+ default:
+ return m, fmt.Errorf("failed to parse type %d in msgappv2 stream", typ)
+ }
+ return m, nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/peer.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/peer.go
new file mode 100644
index 0000000..c1e6ba1
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/peer.go
@@ -0,0 +1,353 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "context"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+ "golang.org/x/time/rate"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ // ConnReadTimeout and ConnWriteTimeout are the i/o timeout set on each connection rafthttp pkg creates.
+ // A 5 seconds timeout is good enough for recycling bad connections. Or we have to wait for
+ // tcp keepalive failing to detect a bad connection, which is at minutes level.
+ // For long term streaming connections, rafthttp pkg sends application level linkHeartbeatMessage
+ // to keep the connection alive.
+ // For short term pipeline connections, the connection MUST be killed to avoid it being
+ // put back to http pkg connection pool.
+ DefaultConnReadTimeout = 5 * time.Second
+ DefaultConnWriteTimeout = 5 * time.Second
+
+ recvBufSize = 4096
+ // maxPendingProposals holds the proposals during one leader election process.
+ // Generally one leader election takes at most 1 sec. It should have
+ // 0-2 election conflicts, and each one takes 0.5 sec.
+ // We assume the number of concurrent proposers is smaller than 4096.
+ // One client blocks on its proposal for at least 1 sec, so 4096 is enough
+ // to hold all proposals.
+ maxPendingProposals = 4096
+
+ streamAppV2 = "streamMsgAppV2"
+ streamMsg = "streamMsg"
+ pipelineMsg = "pipeline"
+ sendSnap = "sendMsgSnap"
+)
+
+var (
+ ConnReadTimeout = DefaultConnReadTimeout
+ ConnWriteTimeout = DefaultConnWriteTimeout
+)
+
+type Peer interface {
+ // send sends the message to the remote peer. The function is non-blocking
+ // and has no promise that the message will be received by the remote.
+ // When it fails to send message out, it will report the status to underlying
+ // raft.
+ send(m raftpb.Message)
+
+ // sendSnap sends the merged snapshot message to the remote peer. Its behavior
+ // is similar to send.
+ sendSnap(m snap.Message)
+
+ // update updates the urls of remote peer.
+ update(urls types.URLs)
+
+ // attachOutgoingConn attaches the outgoing connection to the peer for
+ // stream usage. After the call, the ownership of the outgoing
+ // connection hands over to the peer. The peer will close the connection
+ // when it is no longer used.
+ attachOutgoingConn(conn *outgoingConn)
+ // activeSince returns the time that the connection with the
+ // peer becomes active.
+ activeSince() time.Time
+ // stop performs any necessary finalization and terminates the peer
+ // elegantly.
+ stop()
+}
+
+// peer is the representative of a remote raft node. Local raft node sends
+// messages to the remote through peer.
+// Each peer has two underlying mechanisms to send out a message: stream and
+// pipeline.
+// A stream is a receiver initialized long-polling connection, which
+// is always open to transfer messages. Besides general stream, peer also has
+// a optimized stream for sending msgApp since msgApp accounts for large part
+// of all messages. Only raft leader uses the optimized stream to send msgApp
+// to the remote follower node.
+// A pipeline is a series of http clients that send http requests to the remote.
+// It is only used when the stream has not been established.
+type peer struct {
+ lg *zap.Logger
+
+ localID types.ID
+ // id of the remote raft peer node
+ id types.ID
+
+ r Raft
+
+ status *peerStatus
+
+ picker *urlPicker
+
+ msgAppV2Writer *streamWriter
+ writer *streamWriter
+ pipeline *pipeline
+ snapSender *snapshotSender // snapshot sender to send v3 snapshot messages
+ msgAppV2Reader *streamReader
+ msgAppReader *streamReader
+
+ recvc chan raftpb.Message
+ propc chan raftpb.Message
+
+ mu sync.Mutex
+ paused bool
+
+ cancel context.CancelFunc // cancel pending works in go routine created by peer.
+ stopc chan struct{}
+}
+
+func startPeer(t *Transport, urls types.URLs, peerID types.ID, fs *stats.FollowerStats) *peer {
+ if t.Logger != nil {
+ t.Logger.Info("starting remote peer", zap.String("remote-peer-id", peerID.String()))
+ }
+ defer func() {
+ if t.Logger != nil {
+ t.Logger.Info("started remote peer", zap.String("remote-peer-id", peerID.String()))
+ }
+ }()
+
+ status := newPeerStatus(t.Logger, t.ID, peerID)
+ picker := newURLPicker(urls)
+ errorc := t.ErrorC
+ r := t.Raft
+ pipeline := &pipeline{
+ peerID: peerID,
+ tr: t,
+ picker: picker,
+ status: status,
+ followerStats: fs,
+ raft: r,
+ errorc: errorc,
+ }
+ pipeline.start()
+
+ p := &peer{
+ lg: t.Logger,
+ localID: t.ID,
+ id: peerID,
+ r: r,
+ status: status,
+ picker: picker,
+ msgAppV2Writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
+ writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
+ pipeline: pipeline,
+ snapSender: newSnapshotSender(t, picker, peerID, status),
+ recvc: make(chan raftpb.Message, recvBufSize),
+ propc: make(chan raftpb.Message, maxPendingProposals),
+ stopc: make(chan struct{}),
+ }
+
+ ctx, cancel := context.WithCancel(context.Background())
+ p.cancel = cancel
+ go func() {
+ for {
+ select {
+ case mm := <-p.recvc:
+ if err := r.Process(ctx, mm); err != nil {
+ if t.Logger != nil {
+ t.Logger.Warn("failed to process Raft message", zap.Error(err))
+ }
+ }
+ case <-p.stopc:
+ return
+ }
+ }
+ }()
+
+ // r.Process might block for processing proposal when there is no leader.
+ // Thus propc must be put into a separate routine with recvc to avoid blocking
+ // processing other raft messages.
+ go func() {
+ for {
+ select {
+ case mm := <-p.propc:
+ if err := r.Process(ctx, mm); err != nil {
+ if t.Logger != nil {
+ t.Logger.Warn("failed to process Raft message", zap.Error(err))
+ }
+ }
+ case <-p.stopc:
+ return
+ }
+ }
+ }()
+
+ p.msgAppV2Reader = &streamReader{
+ lg: t.Logger,
+ peerID: peerID,
+ typ: streamTypeMsgAppV2,
+ tr: t,
+ picker: picker,
+ status: status,
+ recvc: p.recvc,
+ propc: p.propc,
+ rl: rate.NewLimiter(t.DialRetryFrequency, 1),
+ }
+ p.msgAppReader = &streamReader{
+ lg: t.Logger,
+ peerID: peerID,
+ typ: streamTypeMessage,
+ tr: t,
+ picker: picker,
+ status: status,
+ recvc: p.recvc,
+ propc: p.propc,
+ rl: rate.NewLimiter(t.DialRetryFrequency, 1),
+ }
+
+ p.msgAppV2Reader.start()
+ p.msgAppReader.start()
+
+ return p
+}
+
+func (p *peer) send(m raftpb.Message) {
+ p.mu.Lock()
+ paused := p.paused
+ p.mu.Unlock()
+
+ if paused {
+ return
+ }
+
+ writec, name := p.pick(m)
+ select {
+ case writec <- m:
+ default:
+ p.r.ReportUnreachable(m.To)
+ if isMsgSnap(m) {
+ p.r.ReportSnapshot(m.To, raft.SnapshotFailure)
+ }
+ if p.lg != nil {
+ p.lg.Warn(
+ "dropped internal Raft message since sending buffer is full",
+ zap.String("message-type", m.Type.String()),
+ zap.String("local-member-id", p.localID.String()),
+ zap.String("from", types.ID(m.From).String()),
+ zap.String("remote-peer-id", p.id.String()),
+ zap.String("remote-peer-name", name),
+ zap.Bool("remote-peer-active", p.status.isActive()),
+ )
+ }
+ sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
+ }
+}
+
+func (p *peer) sendSnap(m snap.Message) {
+ go p.snapSender.send(m)
+}
+
+func (p *peer) update(urls types.URLs) {
+ p.picker.update(urls)
+}
+
+func (p *peer) attachOutgoingConn(conn *outgoingConn) {
+ var ok bool
+ switch conn.t {
+ case streamTypeMsgAppV2:
+ ok = p.msgAppV2Writer.attach(conn)
+ case streamTypeMessage:
+ ok = p.writer.attach(conn)
+ default:
+ if p.lg != nil {
+ p.lg.Panic("unknown stream type", zap.String("type", conn.t.String()))
+ }
+ }
+ if !ok {
+ conn.Close()
+ }
+}
+
+func (p *peer) activeSince() time.Time { return p.status.activeSince() }
+
+// Pause pauses the peer. The peer will simply drops all incoming
+// messages without returning an error.
+func (p *peer) Pause() {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.paused = true
+ p.msgAppReader.pause()
+ p.msgAppV2Reader.pause()
+}
+
+// Resume resumes a paused peer.
+func (p *peer) Resume() {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.paused = false
+ p.msgAppReader.resume()
+ p.msgAppV2Reader.resume()
+}
+
+func (p *peer) stop() {
+ if p.lg != nil {
+ p.lg.Info("stopping remote peer", zap.String("remote-peer-id", p.id.String()))
+ }
+
+ defer func() {
+ if p.lg != nil {
+ p.lg.Info("stopped remote peer", zap.String("remote-peer-id", p.id.String()))
+ }
+ }()
+
+ close(p.stopc)
+ p.cancel()
+ p.msgAppV2Writer.stop()
+ p.writer.stop()
+ p.pipeline.stop()
+ p.snapSender.stop()
+ p.msgAppV2Reader.stop()
+ p.msgAppReader.stop()
+}
+
+// pick picks a chan for sending the given message. The picked chan and the picked chan
+// string name are returned.
+func (p *peer) pick(m raftpb.Message) (writec chan<- raftpb.Message, picked string) {
+ var ok bool
+ // Considering MsgSnap may have a big size, e.g., 1G, and will block
+ // stream for a long time, only use one of the N pipelines to send MsgSnap.
+ if isMsgSnap(m) {
+ return p.pipeline.msgc, pipelineMsg
+ } else if writec, ok = p.msgAppV2Writer.writec(); ok && isMsgApp(m) {
+ return writec, streamAppV2
+ } else if writec, ok = p.writer.writec(); ok {
+ return writec, streamMsg
+ }
+ return p.pipeline.msgc, pipelineMsg
+}
+
+func isMsgApp(m raftpb.Message) bool { return m.Type == raftpb.MsgApp }
+
+func isMsgSnap(m raftpb.Message) bool { return m.Type == raftpb.MsgSnap }
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/peer_status.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/peer_status.go
new file mode 100644
index 0000000..01c3eba
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/peer_status.go
@@ -0,0 +1,90 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "errors"
+ "fmt"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+)
+
+type failureType struct {
+ source string
+ action string
+}
+
+type peerStatus struct {
+ lg *zap.Logger
+ local types.ID
+ id types.ID
+ mu sync.Mutex // protect variables below
+ active bool
+ since time.Time
+}
+
+func newPeerStatus(lg *zap.Logger, local, id types.ID) *peerStatus {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ return &peerStatus{lg: lg, local: local, id: id}
+}
+
+func (s *peerStatus) activate() {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if !s.active {
+ s.lg.Info("peer became active", zap.String("peer-id", s.id.String()))
+ s.active = true
+ s.since = time.Now()
+
+ activePeers.WithLabelValues(s.local.String(), s.id.String()).Inc()
+ }
+}
+
+func (s *peerStatus) deactivate(failure failureType, reason string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ msg := fmt.Sprintf("failed to %s %s on %s (%s)", failure.action, s.id, failure.source, reason)
+ if s.active {
+ s.lg.Warn("peer became inactive (message send to peer failed)", zap.String("peer-id", s.id.String()), zap.Error(errors.New(msg)))
+ s.active = false
+ s.since = time.Time{}
+
+ activePeers.WithLabelValues(s.local.String(), s.id.String()).Dec()
+ disconnectedPeers.WithLabelValues(s.local.String(), s.id.String()).Inc()
+ return
+ }
+
+ if s.lg != nil {
+ s.lg.Debug("peer deactivated again", zap.String("peer-id", s.id.String()), zap.Error(errors.New(msg)))
+ }
+}
+
+func (s *peerStatus) isActive() bool {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ return s.active
+}
+
+func (s *peerStatus) activeSince() time.Time {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ return s.since
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go
new file mode 100644
index 0000000..0790b58
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go
@@ -0,0 +1,178 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "bytes"
+ "context"
+ "errors"
+ "io"
+ "runtime"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/pbutil"
+ stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ connPerPipeline = 4
+ // pipelineBufSize is the size of pipeline buffer, which helps hold the
+ // temporary network latency.
+ // The size ensures that pipeline does not drop messages when the network
+ // is out of work for less than 1 second in good path.
+ pipelineBufSize = 64
+)
+
+var errStopped = errors.New("stopped")
+
+type pipeline struct {
+ peerID types.ID
+
+ tr *Transport
+ picker *urlPicker
+ status *peerStatus
+ raft Raft
+ errorc chan error
+ // deprecate when we depercate v2 API
+ followerStats *stats.FollowerStats
+
+ msgc chan raftpb.Message
+ // wait for the handling routines
+ wg sync.WaitGroup
+ stopc chan struct{}
+}
+
+func (p *pipeline) start() {
+ p.stopc = make(chan struct{})
+ p.msgc = make(chan raftpb.Message, pipelineBufSize)
+ p.wg.Add(connPerPipeline)
+ for i := 0; i < connPerPipeline; i++ {
+ go p.handle()
+ }
+
+ if p.tr != nil && p.tr.Logger != nil {
+ p.tr.Logger.Info(
+ "started HTTP pipelining with remote peer",
+ zap.String("local-member-id", p.tr.ID.String()),
+ zap.String("remote-peer-id", p.peerID.String()),
+ )
+ }
+}
+
+func (p *pipeline) stop() {
+ close(p.stopc)
+ p.wg.Wait()
+
+ if p.tr != nil && p.tr.Logger != nil {
+ p.tr.Logger.Info(
+ "stopped HTTP pipelining with remote peer",
+ zap.String("local-member-id", p.tr.ID.String()),
+ zap.String("remote-peer-id", p.peerID.String()),
+ )
+ }
+}
+
+func (p *pipeline) handle() {
+ defer p.wg.Done()
+
+ for {
+ select {
+ case m := <-p.msgc:
+ start := time.Now()
+ err := p.post(pbutil.MustMarshal(&m))
+ end := time.Now()
+
+ if err != nil {
+ p.status.deactivate(failureType{source: pipelineMsg, action: "write"}, err.Error())
+
+ if isMsgApp(m) && p.followerStats != nil {
+ p.followerStats.Fail()
+ }
+ p.raft.ReportUnreachable(m.To)
+ if isMsgSnap(m) {
+ p.raft.ReportSnapshot(m.To, raft.SnapshotFailure)
+ }
+ sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
+ continue
+ }
+
+ p.status.activate()
+ if isMsgApp(m) && p.followerStats != nil {
+ p.followerStats.Succ(end.Sub(start))
+ }
+ if isMsgSnap(m) {
+ p.raft.ReportSnapshot(m.To, raft.SnapshotFinish)
+ }
+ sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(m.Size()))
+ case <-p.stopc:
+ return
+ }
+ }
+}
+
+// post POSTs a data payload to a url. Returns nil if the POST succeeds,
+// error on any failure.
+func (p *pipeline) post(data []byte) (err error) {
+ u := p.picker.pick()
+ req := createPostRequest(p.tr.Logger, u, RaftPrefix, bytes.NewBuffer(data), "application/protobuf", p.tr.URLs, p.tr.ID, p.tr.ClusterID)
+
+ done := make(chan struct{}, 1)
+ ctx, cancel := context.WithCancel(context.Background())
+ req = req.WithContext(ctx)
+ go func() {
+ select {
+ case <-done:
+ cancel()
+ case <-p.stopc:
+ waitSchedule()
+ cancel()
+ }
+ }()
+
+ resp, err := p.tr.pipelineRt.RoundTrip(req)
+ done <- struct{}{}
+ if err != nil {
+ p.picker.unreachable(u)
+ return err
+ }
+ defer resp.Body.Close()
+ b, err := io.ReadAll(resp.Body)
+ if err != nil {
+ p.picker.unreachable(u)
+ return err
+ }
+
+ err = checkPostResponse(p.tr.Logger, resp, b, req, p.peerID)
+ if err != nil {
+ p.picker.unreachable(u)
+ // errMemberRemoved is a critical error since a removed member should
+ // always be stopped. So we use reportCriticalError to report it to errorc.
+ if errors.Is(err, errMemberRemoved) {
+ reportCriticalError(err, p.errorc)
+ }
+ return err
+ }
+
+ return nil
+}
+
+// waitSchedule waits other goroutines to be scheduled for a while
+func waitSchedule() { runtime.Gosched() }
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/probing_status.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/probing_status.go
new file mode 100644
index 0000000..672a579
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/probing_status.go
@@ -0,0 +1,98 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/xiang90/probing"
+ "go.uber.org/zap"
+)
+
+const (
+ // RoundTripperNameRaftMessage is the name of round-tripper that sends
+ // all other Raft messages, other than "snap.Message".
+ RoundTripperNameRaftMessage = "ROUND_TRIPPER_RAFT_MESSAGE"
+ // RoundTripperNameSnapshot is the name of round-tripper that sends merged snapshot message.
+ RoundTripperNameSnapshot = "ROUND_TRIPPER_SNAPSHOT"
+)
+
+var (
+ // proberInterval must be shorter than read timeout.
+ // Or the connection will time-out.
+ proberInterval = ConnReadTimeout - time.Second
+ statusMonitoringInterval = 30 * time.Second
+ statusErrorInterval = 5 * time.Second
+)
+
+func addPeerToProber(lg *zap.Logger, p probing.Prober, id string, us []string, roundTripperName string, rttSecProm *prometheus.HistogramVec) {
+ hus := make([]string, len(us))
+ for i := range us {
+ hus[i] = us[i] + ProbingPrefix
+ }
+
+ p.AddHTTP(id, proberInterval, hus)
+
+ s, err := p.Status(id)
+ if err != nil {
+ if lg != nil {
+ lg.Warn("failed to add peer into prober", zap.String("remote-peer-id", id), zap.Error(err))
+ }
+ return
+ }
+
+ go monitorProbingStatus(lg, s, id, roundTripperName, rttSecProm)
+}
+
+func monitorProbingStatus(lg *zap.Logger, s probing.Status, id string, roundTripperName string, rttSecProm *prometheus.HistogramVec) {
+ // set the first interval short to log error early.
+ interval := statusErrorInterval
+ for {
+ select {
+ case <-time.After(interval):
+ if !s.Health() {
+ if lg != nil {
+ lg.Warn(
+ "prober detected unhealthy status",
+ zap.String("round-tripper-name", roundTripperName),
+ zap.String("remote-peer-id", id),
+ zap.Duration("rtt", s.SRTT()),
+ zap.Error(s.Err()),
+ )
+ }
+ interval = statusErrorInterval
+ } else {
+ interval = statusMonitoringInterval
+ }
+ if s.ClockDiff() > time.Second {
+ if lg != nil {
+ lg.Warn(
+ "prober found high clock drift",
+ zap.String("round-tripper-name", roundTripperName),
+ zap.String("remote-peer-id", id),
+ zap.Duration("clock-drift", s.ClockDiff()),
+ zap.Duration("rtt", s.SRTT()),
+ zap.Error(s.Err()),
+ )
+ }
+ }
+ rttSecProm.WithLabelValues(id).Observe(s.SRTT().Seconds())
+
+ case <-s.StopNotify():
+ return
+ }
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/remote.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/remote.go
new file mode 100644
index 0000000..3eb2f38
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/remote.go
@@ -0,0 +1,95 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+type remote struct {
+ lg *zap.Logger
+ localID types.ID
+ id types.ID
+ status *peerStatus
+ pipeline *pipeline
+}
+
+func startRemote(tr *Transport, urls types.URLs, id types.ID) *remote {
+ picker := newURLPicker(urls)
+ status := newPeerStatus(tr.Logger, tr.ID, id)
+ pipeline := &pipeline{
+ peerID: id,
+ tr: tr,
+ picker: picker,
+ status: status,
+ raft: tr.Raft,
+ errorc: tr.ErrorC,
+ }
+ pipeline.start()
+
+ return &remote{
+ lg: tr.Logger,
+ localID: tr.ID,
+ id: id,
+ status: status,
+ pipeline: pipeline,
+ }
+}
+
+func (g *remote) send(m raftpb.Message) {
+ select {
+ case g.pipeline.msgc <- m:
+ default:
+ if g.status.isActive() {
+ if g.lg != nil {
+ g.lg.Warn(
+ "dropped internal Raft message since sending buffer is full (overloaded network)",
+ zap.String("message-type", m.Type.String()),
+ zap.String("local-member-id", g.localID.String()),
+ zap.String("from", types.ID(m.From).String()),
+ zap.String("remote-peer-id", g.id.String()),
+ zap.Bool("remote-peer-active", g.status.isActive()),
+ )
+ }
+ } else {
+ if g.lg != nil {
+ g.lg.Warn(
+ "dropped Raft message since sending buffer is full (overloaded network)",
+ zap.String("message-type", m.Type.String()),
+ zap.String("local-member-id", g.localID.String()),
+ zap.String("from", types.ID(m.From).String()),
+ zap.String("remote-peer-id", g.id.String()),
+ zap.Bool("remote-peer-active", g.status.isActive()),
+ )
+ }
+ }
+ sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
+ }
+}
+
+func (g *remote) stop() {
+ g.pipeline.stop()
+}
+
+func (g *remote) Pause() {
+ g.stop()
+}
+
+func (g *remote) Resume() {
+ g.pipeline.start()
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/snapshot_sender.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/snapshot_sender.go
new file mode 100644
index 0000000..8dbc117
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/snapshot_sender.go
@@ -0,0 +1,199 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "bytes"
+ "context"
+ "errors"
+ "io"
+ "net/http"
+ "time"
+
+ "github.com/dustin/go-humanize"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/httputil"
+ pioutil "go.etcd.io/etcd/pkg/v3/ioutil"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ "go.etcd.io/raft/v3"
+)
+
+// timeout for reading snapshot response body
+var snapResponseReadTimeout = 5 * time.Second
+
+type snapshotSender struct {
+ from, to types.ID
+ cid types.ID
+
+ tr *Transport
+ picker *urlPicker
+ status *peerStatus
+ r Raft
+ errorc chan error
+
+ stopc chan struct{}
+}
+
+func newSnapshotSender(tr *Transport, picker *urlPicker, to types.ID, status *peerStatus) *snapshotSender {
+ return &snapshotSender{
+ from: tr.ID,
+ to: to,
+ cid: tr.ClusterID,
+ tr: tr,
+ picker: picker,
+ status: status,
+ r: tr.Raft,
+ errorc: tr.ErrorC,
+ stopc: make(chan struct{}),
+ }
+}
+
+func (s *snapshotSender) stop() { close(s.stopc) }
+
+func (s *snapshotSender) send(merged snap.Message) {
+ start := time.Now()
+
+ m := merged.Message
+ to := types.ID(m.To).String()
+
+ body := createSnapBody(s.tr.Logger, merged)
+ defer body.Close()
+
+ u := s.picker.pick()
+ req := createPostRequest(s.tr.Logger, u, RaftSnapshotPrefix, body, "application/octet-stream", s.tr.URLs, s.from, s.cid)
+
+ snapshotSizeVal := uint64(merged.TotalSize)
+ snapshotSize := humanize.Bytes(snapshotSizeVal)
+ if s.tr.Logger != nil {
+ s.tr.Logger.Info(
+ "sending database snapshot",
+ zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
+ zap.String("remote-peer-id", to),
+ zap.Uint64("bytes", snapshotSizeVal),
+ zap.String("size", snapshotSize),
+ )
+ }
+
+ snapshotSendInflights.WithLabelValues(to).Inc()
+ defer func() {
+ snapshotSendInflights.WithLabelValues(to).Dec()
+ }()
+
+ err := s.post(req)
+ defer merged.CloseWithError(err)
+ if err != nil {
+ if s.tr.Logger != nil {
+ s.tr.Logger.Warn(
+ "failed to send database snapshot",
+ zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
+ zap.String("remote-peer-id", to),
+ zap.Uint64("bytes", snapshotSizeVal),
+ zap.String("size", snapshotSize),
+ zap.Error(err),
+ )
+ }
+
+ // errMemberRemoved is a critical error since a removed member should
+ // always be stopped. So we use reportCriticalError to report it to errorc.
+ if errors.Is(err, errMemberRemoved) {
+ reportCriticalError(err, s.errorc)
+ }
+
+ s.picker.unreachable(u)
+ s.status.deactivate(failureType{source: sendSnap, action: "post"}, err.Error())
+ s.r.ReportUnreachable(m.To)
+ // report SnapshotFailure to raft state machine. After raft state
+ // machine knows about it, it would pause a while and retry sending
+ // new snapshot message.
+ s.r.ReportSnapshot(m.To, raft.SnapshotFailure)
+ sentFailures.WithLabelValues(to).Inc()
+ snapshotSendFailures.WithLabelValues(to).Inc()
+ return
+ }
+ s.status.activate()
+ s.r.ReportSnapshot(m.To, raft.SnapshotFinish)
+
+ if s.tr.Logger != nil {
+ s.tr.Logger.Info(
+ "sent database snapshot",
+ zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
+ zap.String("remote-peer-id", to),
+ zap.Uint64("bytes", snapshotSizeVal),
+ zap.String("size", snapshotSize),
+ )
+ }
+
+ sentBytes.WithLabelValues(to).Add(float64(merged.TotalSize))
+ snapshotSend.WithLabelValues(to).Inc()
+ snapshotSendSeconds.WithLabelValues(to).Observe(time.Since(start).Seconds())
+}
+
+// post posts the given request.
+// It returns nil when request is sent out and processed successfully.
+func (s *snapshotSender) post(req *http.Request) (err error) {
+ ctx, cancel := context.WithCancel(context.Background())
+ req = req.WithContext(ctx)
+ defer cancel()
+
+ type responseAndError struct {
+ resp *http.Response
+ body []byte
+ err error
+ }
+ result := make(chan responseAndError, 1)
+
+ go func() {
+ resp, err := s.tr.pipelineRt.RoundTrip(req)
+ if err != nil {
+ result <- responseAndError{resp, nil, err}
+ return
+ }
+
+ // close the response body when timeouts.
+ // prevents from reading the body forever when the other side dies right after
+ // successfully receives the request body.
+ time.AfterFunc(snapResponseReadTimeout, func() { httputil.GracefulClose(resp) })
+ body, err := io.ReadAll(resp.Body)
+ result <- responseAndError{resp, body, err}
+ }()
+
+ select {
+ case <-s.stopc:
+ return errStopped
+ case r := <-result:
+ if r.err != nil {
+ return r.err
+ }
+ return checkPostResponse(s.tr.Logger, r.resp, r.body, req, s.to)
+ }
+}
+
+func createSnapBody(lg *zap.Logger, merged snap.Message) io.ReadCloser {
+ buf := new(bytes.Buffer)
+ enc := &messageEncoder{w: buf}
+ // encode raft message
+ if err := enc.encode(&merged.Message); err != nil {
+ if lg != nil {
+ lg.Panic("failed to encode message", zap.Error(err))
+ }
+ }
+
+ return &pioutil.ReaderAndCloser{
+ Reader: io.MultiReader(buf, merged.ReadCloser),
+ Closer: merged.ReadCloser,
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/stream.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/stream.go
new file mode 100644
index 0000000..fa02f42
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/stream.go
@@ -0,0 +1,712 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "path"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+ "golang.org/x/time/rate"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/client/pkg/v3/transport"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/httputil"
+ stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ streamTypeMessage streamType = "message"
+ streamTypeMsgAppV2 streamType = "msgappv2"
+
+ streamBufSize = 4096
+)
+
+var (
+ errUnsupportedStreamType = fmt.Errorf("unsupported stream type")
+
+ // the key is in string format "major.minor.patch"
+ supportedStream = map[string][]streamType{
+ "2.0.0": {},
+ "2.1.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "2.2.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "2.3.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.0.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.1.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.2.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.3.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.4.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.5.0": {streamTypeMsgAppV2, streamTypeMessage},
+ "3.6.0": {streamTypeMsgAppV2, streamTypeMessage},
+ }
+)
+
+type streamType string
+
+func (t streamType) endpoint(lg *zap.Logger) string {
+ switch t {
+ case streamTypeMsgAppV2:
+ return path.Join(RaftStreamPrefix, "msgapp")
+ case streamTypeMessage:
+ return path.Join(RaftStreamPrefix, "message")
+ default:
+ if lg != nil {
+ lg.Panic("unhandled stream type", zap.String("stream-type", t.String()))
+ }
+ return ""
+ }
+}
+
+func (t streamType) String() string {
+ switch t {
+ case streamTypeMsgAppV2:
+ return "stream MsgApp v2"
+ case streamTypeMessage:
+ return "stream Message"
+ default:
+ return "unknown stream"
+ }
+}
+
+// linkHeartbeatMessage is a special message used as heartbeat message in
+// link layer. It never conflicts with messages from raft because raft
+// doesn't send out messages without From and To fields.
+var linkHeartbeatMessage = raftpb.Message{Type: raftpb.MsgHeartbeat}
+
+func isLinkHeartbeatMessage(m *raftpb.Message) bool {
+ return m.Type == raftpb.MsgHeartbeat && m.From == 0 && m.To == 0
+}
+
+type outgoingConn struct {
+ t streamType
+ io.Writer
+ http.Flusher
+ io.Closer
+
+ localID types.ID
+ peerID types.ID
+}
+
+// streamWriter writes messages to the attached outgoingConn.
+type streamWriter struct {
+ lg *zap.Logger
+
+ localID types.ID
+ peerID types.ID
+
+ status *peerStatus
+ fs *stats.FollowerStats
+ r Raft
+
+ mu sync.Mutex // guard field working and closer
+ closer io.Closer
+ working bool
+
+ msgc chan raftpb.Message
+ connc chan *outgoingConn
+ stopc chan struct{}
+ done chan struct{}
+}
+
+// startStreamWriter creates a streamWrite and starts a long running go-routine that accepts
+// messages and writes to the attached outgoing connection.
+func startStreamWriter(lg *zap.Logger, local, id types.ID, status *peerStatus, fs *stats.FollowerStats, r Raft) *streamWriter {
+ w := &streamWriter{
+ lg: lg,
+
+ localID: local,
+ peerID: id,
+
+ status: status,
+ fs: fs,
+ r: r,
+ msgc: make(chan raftpb.Message, streamBufSize),
+ connc: make(chan *outgoingConn),
+ stopc: make(chan struct{}),
+ done: make(chan struct{}),
+ }
+ go w.run()
+ return w
+}
+
+func (cw *streamWriter) run() {
+ var (
+ msgc chan raftpb.Message
+ heartbeatc <-chan time.Time
+ t streamType
+ enc encoder
+ flusher http.Flusher
+ batched int
+ )
+ tickc := time.NewTicker(ConnReadTimeout / 3)
+ defer tickc.Stop()
+ unflushed := 0
+
+ if cw.lg != nil {
+ cw.lg.Info(
+ "started stream writer with remote peer",
+ zap.String("local-member-id", cw.localID.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+
+ for {
+ select {
+ case <-heartbeatc:
+ err := enc.encode(&linkHeartbeatMessage)
+ unflushed += linkHeartbeatMessage.Size()
+ if err == nil {
+ flusher.Flush()
+ batched = 0
+ sentBytes.WithLabelValues(cw.peerID.String()).Add(float64(unflushed))
+ unflushed = 0
+ continue
+ }
+
+ cw.status.deactivate(failureType{source: t.String(), action: "heartbeat"}, err.Error())
+
+ sentFailures.WithLabelValues(cw.peerID.String()).Inc()
+ cw.close()
+ if cw.lg != nil {
+ cw.lg.Warn(
+ "lost TCP streaming connection with remote peer",
+ zap.String("stream-writer-type", t.String()),
+ zap.String("local-member-id", cw.localID.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+ heartbeatc, msgc = nil, nil
+
+ case m := <-msgc:
+ err := enc.encode(&m)
+ if err == nil {
+ unflushed += m.Size()
+
+ if len(msgc) == 0 || batched > streamBufSize/2 {
+ flusher.Flush()
+ sentBytes.WithLabelValues(cw.peerID.String()).Add(float64(unflushed))
+ unflushed = 0
+ batched = 0
+ } else {
+ batched++
+ }
+
+ continue
+ }
+
+ cw.status.deactivate(failureType{source: t.String(), action: "write"}, err.Error())
+ cw.close()
+ if cw.lg != nil {
+ cw.lg.Warn(
+ "lost TCP streaming connection with remote peer",
+ zap.String("stream-writer-type", t.String()),
+ zap.String("local-member-id", cw.localID.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+ heartbeatc, msgc = nil, nil
+ cw.r.ReportUnreachable(m.To)
+ sentFailures.WithLabelValues(cw.peerID.String()).Inc()
+
+ case conn := <-cw.connc:
+ cw.mu.Lock()
+ closed := cw.closeUnlocked()
+ t = conn.t
+ switch conn.t {
+ case streamTypeMsgAppV2:
+ enc = newMsgAppV2Encoder(conn.Writer, cw.fs)
+ case streamTypeMessage:
+ enc = &messageEncoder{w: conn.Writer}
+ default:
+ if cw.lg != nil {
+ cw.lg.Panic("unhandled stream type", zap.String("stream-type", t.String()))
+ }
+ }
+ if cw.lg != nil {
+ cw.lg.Info(
+ "set message encoder",
+ zap.String("from", conn.localID.String()),
+ zap.String("to", conn.peerID.String()),
+ zap.String("stream-type", t.String()),
+ )
+ }
+ flusher = conn.Flusher
+ unflushed = 0
+ cw.status.activate()
+ cw.closer = conn.Closer
+ cw.working = true
+ cw.mu.Unlock()
+
+ if closed {
+ if cw.lg != nil {
+ cw.lg.Warn(
+ "closed TCP streaming connection with remote peer",
+ zap.String("stream-writer-type", t.String()),
+ zap.String("local-member-id", cw.localID.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+ }
+ if cw.lg != nil {
+ cw.lg.Info(
+ "established TCP streaming connection with remote peer",
+ zap.String("stream-writer-type", t.String()),
+ zap.String("local-member-id", cw.localID.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+ heartbeatc, msgc = tickc.C, cw.msgc
+
+ case <-cw.stopc:
+ if cw.close() {
+ if cw.lg != nil {
+ cw.lg.Warn(
+ "closed TCP streaming connection with remote peer",
+ zap.String("stream-writer-type", t.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+ }
+ if cw.lg != nil {
+ cw.lg.Info(
+ "stopped TCP streaming connection with remote peer",
+ zap.String("stream-writer-type", t.String()),
+ zap.String("remote-peer-id", cw.peerID.String()),
+ )
+ }
+ close(cw.done)
+ return
+ }
+ }
+}
+
+func (cw *streamWriter) writec() (chan<- raftpb.Message, bool) {
+ cw.mu.Lock()
+ defer cw.mu.Unlock()
+ return cw.msgc, cw.working
+}
+
+func (cw *streamWriter) close() bool {
+ cw.mu.Lock()
+ defer cw.mu.Unlock()
+ return cw.closeUnlocked()
+}
+
+func (cw *streamWriter) closeUnlocked() bool {
+ if !cw.working {
+ return false
+ }
+ if err := cw.closer.Close(); err != nil {
+ if cw.lg != nil {
+ cw.lg.Warn(
+ "failed to close connection with remote peer",
+ zap.String("remote-peer-id", cw.peerID.String()),
+ zap.Error(err),
+ )
+ }
+ }
+ if len(cw.msgc) > 0 {
+ cw.r.ReportUnreachable(uint64(cw.peerID))
+ }
+ cw.msgc = make(chan raftpb.Message, streamBufSize)
+ cw.working = false
+ return true
+}
+
+func (cw *streamWriter) attach(conn *outgoingConn) bool {
+ select {
+ case cw.connc <- conn:
+ return true
+ case <-cw.done:
+ return false
+ }
+}
+
+func (cw *streamWriter) stop() {
+ close(cw.stopc)
+ <-cw.done
+}
+
+// streamReader is a long-running go-routine that dials to the remote stream
+// endpoint and reads messages from the response body returned.
+type streamReader struct {
+ lg *zap.Logger
+
+ peerID types.ID
+ typ streamType
+
+ tr *Transport
+ picker *urlPicker
+ status *peerStatus
+ recvc chan<- raftpb.Message
+ propc chan<- raftpb.Message
+
+ rl *rate.Limiter // alters the frequency of dial retrial attempts
+
+ errorc chan<- error
+
+ mu sync.Mutex
+ paused bool
+ closer io.Closer
+
+ ctx context.Context
+ cancel context.CancelFunc
+ done chan struct{}
+}
+
+func (cr *streamReader) start() {
+ cr.done = make(chan struct{})
+ if cr.errorc == nil {
+ cr.errorc = cr.tr.ErrorC
+ }
+ if cr.ctx == nil {
+ cr.ctx, cr.cancel = context.WithCancel(context.Background())
+ }
+ go cr.run()
+}
+
+func (cr *streamReader) run() {
+ t := cr.typ
+
+ if cr.lg != nil {
+ cr.lg.Info(
+ "started stream reader with remote peer",
+ zap.String("stream-reader-type", t.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ )
+ }
+
+ for {
+ rc, err := cr.dial(t)
+ if err != nil {
+ if !errors.Is(err, errUnsupportedStreamType) {
+ cr.status.deactivate(failureType{source: t.String(), action: "dial"}, err.Error())
+ }
+ } else {
+ cr.status.activate()
+ if cr.lg != nil {
+ cr.lg.Info(
+ "established TCP streaming connection with remote peer",
+ zap.String("stream-reader-type", cr.typ.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ )
+ }
+ err = cr.decodeLoop(rc, t)
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "lost TCP streaming connection with remote peer",
+ zap.String("stream-reader-type", cr.typ.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ zap.Error(err),
+ )
+ }
+ switch {
+ // all data is read out
+ case errors.Is(err, io.EOF):
+ // connection is closed by the remote
+ case transport.IsClosedConnError(err):
+ default:
+ cr.status.deactivate(failureType{source: t.String(), action: "read"}, err.Error())
+ }
+ }
+ // Wait for a while before new dial attempt
+ err = cr.rl.Wait(cr.ctx)
+ if cr.ctx.Err() != nil {
+ if cr.lg != nil {
+ cr.lg.Info(
+ "stopped stream reader with remote peer",
+ zap.String("stream-reader-type", t.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ )
+ }
+ close(cr.done)
+ return
+ }
+ if err != nil {
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "rate limit on stream reader with remote peer",
+ zap.String("stream-reader-type", t.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ zap.Error(err),
+ )
+ }
+ }
+ }
+}
+
+func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
+ var dec decoder
+ cr.mu.Lock()
+ switch t {
+ case streamTypeMsgAppV2:
+ dec = newMsgAppV2Decoder(rc, cr.tr.ID, cr.peerID)
+ case streamTypeMessage:
+ dec = &messageDecoder{r: rc}
+ default:
+ if cr.lg != nil {
+ cr.lg.Panic("unknown stream type", zap.String("type", t.String()))
+ }
+ }
+ select {
+ case <-cr.ctx.Done():
+ cr.mu.Unlock()
+ if err := rc.Close(); err != nil {
+ return err
+ }
+ return io.EOF
+ default:
+ cr.closer = rc
+ }
+ cr.mu.Unlock()
+
+ // gofail: labelRaftDropHeartbeat:
+ for {
+ m, err := dec.decode()
+ if err != nil {
+ cr.mu.Lock()
+ cr.close()
+ cr.mu.Unlock()
+ return err
+ }
+
+ // gofail-go: var raftDropHeartbeat struct{}
+ // continue labelRaftDropHeartbeat
+ receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(m.Size()))
+
+ cr.mu.Lock()
+ paused := cr.paused
+ cr.mu.Unlock()
+
+ if paused {
+ continue
+ }
+
+ if isLinkHeartbeatMessage(&m) {
+ // raft is not interested in link layer
+ // heartbeat message, so we should ignore
+ // it.
+ continue
+ }
+
+ recvc := cr.recvc
+ if m.Type == raftpb.MsgProp {
+ recvc = cr.propc
+ }
+
+ select {
+ case recvc <- m:
+ default:
+ if cr.status.isActive() {
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "dropped internal Raft message since receiving buffer is full (overloaded network)",
+ zap.String("message-type", m.Type.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("from", types.ID(m.From).String()),
+ zap.String("remote-peer-id", types.ID(m.To).String()),
+ zap.Bool("remote-peer-active", cr.status.isActive()),
+ )
+ }
+ } else {
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "dropped Raft message since receiving buffer is full (overloaded network)",
+ zap.String("message-type", m.Type.String()),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("from", types.ID(m.From).String()),
+ zap.String("remote-peer-id", types.ID(m.To).String()),
+ zap.Bool("remote-peer-active", cr.status.isActive()),
+ )
+ }
+ }
+ recvFailures.WithLabelValues(types.ID(m.From).String()).Inc()
+ }
+ }
+}
+
+func (cr *streamReader) stop() {
+ cr.mu.Lock()
+ cr.cancel()
+ cr.close()
+ cr.mu.Unlock()
+ <-cr.done
+}
+
+func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
+ u := cr.picker.pick()
+ uu := u
+ uu.Path = path.Join(t.endpoint(cr.lg), cr.tr.ID.String())
+
+ if cr.lg != nil {
+ cr.lg.Debug(
+ "dial stream reader",
+ zap.String("from", cr.tr.ID.String()),
+ zap.String("to", cr.peerID.String()),
+ zap.String("address", uu.String()),
+ )
+ }
+ req, err := http.NewRequest(http.MethodGet, uu.String(), nil)
+ if err != nil {
+ cr.picker.unreachable(u)
+ return nil, fmt.Errorf("failed to make http request to %v (%w)", u, err)
+ }
+ req.Header.Set("X-Server-From", cr.tr.ID.String())
+ req.Header.Set("X-Server-Version", version.Version)
+ req.Header.Set("X-Min-Cluster-Version", version.MinClusterVersion)
+ req.Header.Set("X-Etcd-Cluster-ID", cr.tr.ClusterID.String())
+ req.Header.Set("X-Raft-To", cr.peerID.String())
+
+ setPeerURLsHeader(req, cr.tr.URLs)
+
+ req = req.WithContext(cr.ctx)
+
+ cr.mu.Lock()
+ select {
+ case <-cr.ctx.Done():
+ cr.mu.Unlock()
+ return nil, fmt.Errorf("stream reader is stopped")
+ default:
+ }
+ cr.mu.Unlock()
+
+ resp, err := cr.tr.streamRt.RoundTrip(req)
+ if err != nil {
+ cr.picker.unreachable(u)
+ return nil, err
+ }
+
+ rv := serverVersion(resp.Header)
+ lv := semver.Must(semver.NewVersion(version.Version))
+ if compareMajorMinorVersion(rv, lv) == -1 && !checkStreamSupport(rv, t) {
+ httputil.GracefulClose(resp)
+ cr.picker.unreachable(u)
+ return nil, errUnsupportedStreamType
+ }
+
+ switch resp.StatusCode {
+ case http.StatusGone:
+ httputil.GracefulClose(resp)
+ cr.picker.unreachable(u)
+ reportCriticalError(errMemberRemoved, cr.errorc)
+ return nil, errMemberRemoved
+
+ case http.StatusOK:
+ return resp.Body, nil
+
+ case http.StatusNotFound:
+ httputil.GracefulClose(resp)
+ cr.picker.unreachable(u)
+ return nil, fmt.Errorf("peer %s failed to find local node %s", cr.peerID, cr.tr.ID)
+
+ case http.StatusPreconditionFailed:
+ b, err := io.ReadAll(resp.Body)
+ if err != nil {
+ cr.picker.unreachable(u)
+ return nil, err
+ }
+ httputil.GracefulClose(resp)
+ cr.picker.unreachable(u)
+
+ switch strings.TrimSuffix(string(b), "\n") {
+ case errIncompatibleVersion.Error():
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "request sent was ignored by remote peer due to server version incompatibility",
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ zap.Error(errIncompatibleVersion),
+ )
+ }
+ return nil, errIncompatibleVersion
+
+ case ErrClusterIDMismatch.Error():
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "request sent was ignored by remote peer due to cluster ID mismatch",
+ zap.String("remote-peer-id", cr.peerID.String()),
+ zap.String("remote-peer-cluster-id", resp.Header.Get("X-Etcd-Cluster-ID")),
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("local-member-cluster-id", cr.tr.ClusterID.String()),
+ zap.Error(ErrClusterIDMismatch),
+ )
+ }
+ return nil, ErrClusterIDMismatch
+
+ default:
+ return nil, fmt.Errorf("unhandled error %q when precondition failed", string(b))
+ }
+
+ default:
+ httputil.GracefulClose(resp)
+ cr.picker.unreachable(u)
+ return nil, fmt.Errorf("unhandled http status %d", resp.StatusCode)
+ }
+}
+
+func (cr *streamReader) close() {
+ if cr.closer != nil {
+ if err := cr.closer.Close(); err != nil {
+ if cr.lg != nil {
+ cr.lg.Warn(
+ "failed to close remote peer connection",
+ zap.String("local-member-id", cr.tr.ID.String()),
+ zap.String("remote-peer-id", cr.peerID.String()),
+ zap.Error(err),
+ )
+ }
+ }
+ }
+ cr.closer = nil
+}
+
+func (cr *streamReader) pause() {
+ cr.mu.Lock()
+ defer cr.mu.Unlock()
+ cr.paused = true
+}
+
+func (cr *streamReader) resume() {
+ cr.mu.Lock()
+ defer cr.mu.Unlock()
+ cr.paused = false
+}
+
+// checkStreamSupport checks whether the stream type is supported in the
+// given version.
+func checkStreamSupport(v *semver.Version, t streamType) bool {
+ nv := &semver.Version{Major: v.Major, Minor: v.Minor}
+ for _, s := range supportedStream[nv.String()] {
+ if s == t {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/transport.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/transport.go
new file mode 100644
index 0000000..b376d57
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/transport.go
@@ -0,0 +1,453 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "context"
+ "net/http"
+ "sync"
+ "time"
+
+ "github.com/xiang90/probing"
+ "go.uber.org/zap"
+ "golang.org/x/time/rate"
+
+ "go.etcd.io/etcd/client/pkg/v3/transport"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+type Raft interface {
+ Process(ctx context.Context, m raftpb.Message) error
+ IsIDRemoved(id uint64) bool
+ ReportUnreachable(id uint64)
+ ReportSnapshot(id uint64, status raft.SnapshotStatus)
+}
+
+type Transporter interface {
+ // Start starts the given Transporter.
+ // Start MUST be called before calling other functions in the interface.
+ Start() error
+ // Handler returns the HTTP handler of the transporter.
+ // A transporter HTTP handler handles the HTTP requests
+ // from remote peers.
+ // The handler MUST be used to handle RaftPrefix(/raft)
+ // endpoint.
+ Handler() http.Handler
+ // Send sends out the given messages to the remote peers.
+ // Each message has a To field, which is an id that maps
+ // to an existing peer in the transport.
+ // If the id cannot be found in the transport, the message
+ // will be ignored.
+ Send(m []raftpb.Message)
+ // SendSnapshot sends out the given snapshot message to a remote peer.
+ // The behavior of SendSnapshot is similar to Send.
+ SendSnapshot(m snap.Message)
+ // AddRemote adds a remote with given peer urls into the transport.
+ // A remote helps newly joined member to catch up the progress of cluster,
+ // and will not be used after that.
+ // It is the caller's responsibility to ensure the urls are all valid,
+ // or it panics.
+ AddRemote(id types.ID, urls []string)
+ // AddPeer adds a peer with given peer urls into the transport.
+ // It is the caller's responsibility to ensure the urls are all valid,
+ // or it panics.
+ // Peer urls are used to connect to the remote peer.
+ AddPeer(id types.ID, urls []string)
+ // RemovePeer removes the peer with given id.
+ RemovePeer(id types.ID)
+ // RemoveAllPeers removes all the existing peers in the transport.
+ RemoveAllPeers()
+ // UpdatePeer updates the peer urls of the peer with the given id.
+ // It is the caller's responsibility to ensure the urls are all valid,
+ // or it panics.
+ UpdatePeer(id types.ID, urls []string)
+ // ActiveSince returns the time that the connection with the peer
+ // of the given id becomes active.
+ // If the connection is active since peer was added, it returns the adding time.
+ // If the connection is currently inactive, it returns zero time.
+ ActiveSince(id types.ID) time.Time
+ // ActivePeers returns the number of active peers.
+ ActivePeers() int
+ // Stop closes the connections and stops the transporter.
+ Stop()
+}
+
+// Transport implements Transporter interface. It provides the functionality
+// to send raft messages to peers, and receive raft messages from peers.
+// User should call Handler method to get a handler to serve requests
+// received from peerURLs.
+// User needs to call Start before calling other functions, and call
+// Stop when the Transport is no longer used.
+type Transport struct {
+ Logger *zap.Logger
+
+ DialTimeout time.Duration // maximum duration before timing out dial of the request
+ // DialRetryFrequency defines the frequency of streamReader dial retrial attempts;
+ // a distinct rate limiter is created per every peer (default value: 10 events/sec)
+ DialRetryFrequency rate.Limit
+
+ TLSInfo transport.TLSInfo // TLS information used when creating connection
+
+ ID types.ID // local member ID
+ URLs types.URLs // local peer URLs
+ ClusterID types.ID // raft cluster ID for request validation
+ Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
+ Snapshotter *snap.Snapshotter
+ ServerStats *stats.ServerStats // used to record general transportation statistics
+ // LeaderStats records transportation statistics with followers when
+ // performing as leader in raft protocol
+ LeaderStats *stats.LeaderStats
+ // ErrorC is used to report detected critical errors, e.g.,
+ // the member has been permanently removed from the cluster
+ // When an error is received from ErrorC, user should stop raft state
+ // machine and thus stop the Transport.
+ ErrorC chan error
+
+ streamRt http.RoundTripper // roundTripper used by streams
+ pipelineRt http.RoundTripper // roundTripper used by pipelines
+
+ mu sync.RWMutex // protect the remote and peer map
+ remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
+ peers map[types.ID]Peer // peers map
+
+ pipelineProber probing.Prober
+ streamProber probing.Prober
+}
+
+func (t *Transport) Start() error {
+ var err error
+ t.streamRt, err = newStreamRoundTripper(t.TLSInfo, t.DialTimeout)
+ if err != nil {
+ return err
+ }
+ t.pipelineRt, err = NewRoundTripper(t.TLSInfo, t.DialTimeout)
+ if err != nil {
+ return err
+ }
+ t.remotes = make(map[types.ID]*remote)
+ t.peers = make(map[types.ID]Peer)
+ t.pipelineProber = probing.NewProber(t.pipelineRt)
+ t.streamProber = probing.NewProber(t.streamRt)
+
+ // If client didn't provide dial retry frequency, use the default
+ // (100ms backoff between attempts to create a new stream),
+ // so it doesn't bring too much overhead when retry.
+ if t.DialRetryFrequency == 0 {
+ t.DialRetryFrequency = rate.Every(100 * time.Millisecond)
+ }
+ return nil
+}
+
+func (t *Transport) Handler() http.Handler {
+ pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
+ streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
+ snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
+ mux := http.NewServeMux()
+ mux.Handle(RaftPrefix, pipelineHandler)
+ mux.Handle(RaftStreamPrefix+"/", streamHandler)
+ mux.Handle(RaftSnapshotPrefix, snapHandler)
+ mux.Handle(ProbingPrefix, probing.NewHandler())
+ return mux
+}
+
+func (t *Transport) Get(id types.ID) Peer {
+ t.mu.RLock()
+ defer t.mu.RUnlock()
+ return t.peers[id]
+}
+
+func (t *Transport) Send(msgs []raftpb.Message) {
+ for _, m := range msgs {
+ if m.To == 0 {
+ // ignore intentionally dropped message
+ continue
+ }
+ to := types.ID(m.To)
+
+ t.mu.RLock()
+ p, pok := t.peers[to]
+ g, rok := t.remotes[to]
+ t.mu.RUnlock()
+
+ if pok {
+ if isMsgApp(m) {
+ t.ServerStats.SendAppendReq(m.Size())
+ }
+ p.send(m)
+ continue
+ }
+
+ if rok {
+ g.send(m)
+ continue
+ }
+
+ if t.Logger != nil {
+ t.Logger.Debug(
+ "ignored message send request; unknown remote peer target",
+ zap.String("type", m.Type.String()),
+ zap.String("unknown-target-peer-id", to.String()),
+ )
+ }
+ }
+}
+
+func (t *Transport) Stop() {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ for _, r := range t.remotes {
+ r.stop()
+ }
+ for _, p := range t.peers {
+ p.stop()
+ }
+ t.pipelineProber.RemoveAll()
+ t.streamProber.RemoveAll()
+ if tr, ok := t.streamRt.(*http.Transport); ok {
+ tr.CloseIdleConnections()
+ }
+ if tr, ok := t.pipelineRt.(*http.Transport); ok {
+ tr.CloseIdleConnections()
+ }
+ t.peers = nil
+ t.remotes = nil
+}
+
+// CutPeer drops messages to the specified peer.
+func (t *Transport) CutPeer(id types.ID) {
+ t.mu.RLock()
+ p, pok := t.peers[id]
+ g, gok := t.remotes[id]
+ t.mu.RUnlock()
+
+ if pok {
+ p.(Pausable).Pause()
+ }
+ if gok {
+ g.Pause()
+ }
+}
+
+// MendPeer recovers the message dropping behavior of the given peer.
+func (t *Transport) MendPeer(id types.ID) {
+ t.mu.RLock()
+ p, pok := t.peers[id]
+ g, gok := t.remotes[id]
+ t.mu.RUnlock()
+
+ if pok {
+ p.(Pausable).Resume()
+ }
+ if gok {
+ g.Resume()
+ }
+}
+
+func (t *Transport) AddRemote(id types.ID, us []string) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ if t.remotes == nil {
+ // there's no clean way to shutdown the golang http server
+ // (see: https://github.com/golang/go/issues/4674) before
+ // stopping the transport; ignore any new connections.
+ return
+ }
+ if _, ok := t.peers[id]; ok {
+ return
+ }
+ if _, ok := t.remotes[id]; ok {
+ return
+ }
+ urls, err := types.NewURLs(us)
+ if err != nil {
+ if t.Logger != nil {
+ t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
+ }
+ }
+ t.remotes[id] = startRemote(t, urls, id)
+
+ if t.Logger != nil {
+ t.Logger.Info(
+ "added new remote peer",
+ zap.String("local-member-id", t.ID.String()),
+ zap.String("remote-peer-id", id.String()),
+ zap.Strings("remote-peer-urls", us),
+ )
+ }
+}
+
+func (t *Transport) AddPeer(id types.ID, us []string) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+
+ if t.peers == nil {
+ panic("transport stopped")
+ }
+ if _, ok := t.peers[id]; ok {
+ return
+ }
+ urls, err := types.NewURLs(us)
+ if err != nil {
+ if t.Logger != nil {
+ t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
+ }
+ }
+ fs := t.LeaderStats.Follower(id.String())
+ t.peers[id] = startPeer(t, urls, id, fs)
+ addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
+ addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
+
+ if t.Logger != nil {
+ t.Logger.Info(
+ "added remote peer",
+ zap.String("local-member-id", t.ID.String()),
+ zap.String("remote-peer-id", id.String()),
+ zap.Strings("remote-peer-urls", us),
+ )
+ }
+}
+
+func (t *Transport) RemovePeer(id types.ID) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ t.removePeer(id)
+}
+
+func (t *Transport) RemoveAllPeers() {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ for id := range t.peers {
+ t.removePeer(id)
+ }
+}
+
+// the caller of this function must have the peers mutex.
+func (t *Transport) removePeer(id types.ID) {
+ // etcd may remove a member again on startup due to WAL files replaying.
+ peer, ok := t.peers[id]
+ if ok {
+ peer.stop()
+ delete(t.peers, id)
+ delete(t.LeaderStats.Followers, id.String())
+ t.pipelineProber.Remove(id.String())
+ t.streamProber.Remove(id.String())
+ }
+
+ if t.Logger != nil {
+ if ok {
+ t.Logger.Info(
+ "removed remote peer",
+ zap.String("local-member-id", t.ID.String()),
+ zap.String("removed-remote-peer-id", id.String()),
+ )
+ } else {
+ t.Logger.Warn(
+ "skipped removing already removed peer",
+ zap.String("local-member-id", t.ID.String()),
+ zap.String("removed-remote-peer-id", id.String()),
+ )
+ }
+ }
+}
+
+func (t *Transport) UpdatePeer(id types.ID, us []string) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ // TODO: return error or just panic?
+ if _, ok := t.peers[id]; !ok {
+ return
+ }
+ urls, err := types.NewURLs(us)
+ if err != nil {
+ if t.Logger != nil {
+ t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
+ }
+ }
+ t.peers[id].update(urls)
+
+ t.pipelineProber.Remove(id.String())
+ addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
+ t.streamProber.Remove(id.String())
+ addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
+
+ if t.Logger != nil {
+ t.Logger.Info(
+ "updated remote peer",
+ zap.String("local-member-id", t.ID.String()),
+ zap.String("updated-remote-peer-id", id.String()),
+ zap.Strings("updated-remote-peer-urls", us),
+ )
+ }
+}
+
+func (t *Transport) ActiveSince(id types.ID) time.Time {
+ t.mu.RLock()
+ defer t.mu.RUnlock()
+ if p, ok := t.peers[id]; ok {
+ return p.activeSince()
+ }
+ return time.Time{}
+}
+
+func (t *Transport) SendSnapshot(m snap.Message) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ p := t.peers[types.ID(m.To)]
+ if p == nil {
+ m.CloseWithError(errMemberNotFound)
+ return
+ }
+ p.sendSnap(m)
+}
+
+// Pausable is a testing interface for pausing transport traffic.
+type Pausable interface {
+ Pause()
+ Resume()
+}
+
+func (t *Transport) Pause() {
+ t.mu.RLock()
+ defer t.mu.RUnlock()
+ for _, p := range t.peers {
+ p.(Pausable).Pause()
+ }
+}
+
+func (t *Transport) Resume() {
+ t.mu.RLock()
+ defer t.mu.RUnlock()
+ for _, p := range t.peers {
+ p.(Pausable).Resume()
+ }
+}
+
+// ActivePeers returns a channel that closes when an initial
+// peer connection has been established. Use this to wait until the
+// first peer connection becomes active.
+func (t *Transport) ActivePeers() (cnt int) {
+ t.mu.RLock()
+ defer t.mu.RUnlock()
+ for _, p := range t.peers {
+ if !p.activeSince().IsZero() {
+ cnt++
+ }
+ }
+ return cnt
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/urlpick.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/urlpick.go
new file mode 100644
index 0000000..fc6054a
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/urlpick.go
@@ -0,0 +1,57 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "net/url"
+ "sync"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+)
+
+type urlPicker struct {
+ mu sync.Mutex // guards urls and picked
+ urls types.URLs
+ picked int
+}
+
+func newURLPicker(urls types.URLs) *urlPicker {
+ return &urlPicker{
+ urls: urls,
+ }
+}
+
+func (p *urlPicker) update(urls types.URLs) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.urls = urls
+ p.picked = 0
+}
+
+func (p *urlPicker) pick() url.URL {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ return p.urls[p.picked]
+}
+
+// unreachable notices the picker that the given url is unreachable,
+// and it should use other possible urls.
+func (p *urlPicker) unreachable(u url.URL) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ if u == p.urls[p.picked] {
+ p.picked = (p.picked + 1) % len(p.urls)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/util.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/util.go
new file mode 100644
index 0000000..5057f53
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/util.go
@@ -0,0 +1,205 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rafthttp
+
+import (
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "net/url"
+ "strings"
+ "time"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/client/pkg/v3/transport"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+)
+
+var (
+ errMemberRemoved = fmt.Errorf("the member has been permanently removed from the cluster")
+ errMemberNotFound = fmt.Errorf("member not found")
+)
+
+// NewListener returns a listener for raft message transfer between peers.
+// It uses timeout listener to identify broken streams promptly.
+func NewListener(u url.URL, tlsinfo *transport.TLSInfo) (net.Listener, error) {
+ return transport.NewListenerWithOpts(u.Host, u.Scheme, transport.WithTLSInfo(tlsinfo), transport.WithTimeout(ConnReadTimeout, ConnWriteTimeout))
+}
+
+// NewRoundTripper returns a roundTripper used to send requests
+// to rafthttp listener of remote peers.
+func NewRoundTripper(tlsInfo transport.TLSInfo, dialTimeout time.Duration) (http.RoundTripper, error) {
+ // It uses timeout transport to pair with remote timeout listeners.
+ // It sets no read/write timeout, because message in requests may
+ // take long time to write out before reading out the response.
+ return transport.NewTimeoutTransport(tlsInfo, dialTimeout, 0, 0)
+}
+
+// newStreamRoundTripper returns a roundTripper used to send stream requests
+// to rafthttp listener of remote peers.
+// Read/write timeout is set for stream roundTripper to promptly
+// find out broken status, which minimizes the number of messages
+// sent on broken connection.
+func newStreamRoundTripper(tlsInfo transport.TLSInfo, dialTimeout time.Duration) (http.RoundTripper, error) {
+ return transport.NewTimeoutTransport(tlsInfo, dialTimeout, ConnReadTimeout, ConnWriteTimeout)
+}
+
+// createPostRequest creates a HTTP POST request that sends raft message.
+func createPostRequest(lg *zap.Logger, u url.URL, path string, body io.Reader, ct string, urls types.URLs, from, cid types.ID) *http.Request {
+ uu := u
+ uu.Path = path
+ req, err := http.NewRequest(http.MethodPost, uu.String(), body)
+ if err != nil {
+ if lg != nil {
+ lg.Panic("unexpected new request error", zap.Error(err))
+ }
+ }
+ req.Header.Set("Content-Type", ct)
+ req.Header.Set("X-Server-From", from.String())
+ req.Header.Set("X-Server-Version", version.Version)
+ req.Header.Set("X-Min-Cluster-Version", version.MinClusterVersion)
+ req.Header.Set("X-Etcd-Cluster-ID", cid.String())
+ setPeerURLsHeader(req, urls)
+
+ return req
+}
+
+// checkPostResponse checks the response of the HTTP POST request that sends
+// raft message.
+func checkPostResponse(lg *zap.Logger, resp *http.Response, body []byte, req *http.Request, to types.ID) error {
+ switch resp.StatusCode {
+ case http.StatusPreconditionFailed:
+ switch strings.TrimSuffix(string(body), "\n") {
+ case errIncompatibleVersion.Error():
+ if lg != nil {
+ lg.Error(
+ "request sent was ignored by peer",
+ zap.String("remote-peer-id", to.String()),
+ )
+ }
+ return errIncompatibleVersion
+ case ErrClusterIDMismatch.Error():
+ if lg != nil {
+ lg.Error(
+ "request sent was ignored due to cluster ID mismatch",
+ zap.String("remote-peer-id", to.String()),
+ zap.String("remote-peer-cluster-id", resp.Header.Get("X-Etcd-Cluster-ID")),
+ zap.String("local-member-cluster-id", req.Header.Get("X-Etcd-Cluster-ID")),
+ )
+ }
+ return ErrClusterIDMismatch
+ default:
+ return fmt.Errorf("unhandled error %q when precondition failed", string(body))
+ }
+ case http.StatusForbidden:
+ return errMemberRemoved
+ case http.StatusNoContent:
+ return nil
+ default:
+ return fmt.Errorf("unexpected http status %s while posting to %q", http.StatusText(resp.StatusCode), req.URL.String())
+ }
+}
+
+// reportCriticalError reports the given error through sending it into
+// the given error channel.
+// If the error channel is filled up when sending error, it drops the error
+// because the fact that error has happened is reported, which is
+// good enough.
+func reportCriticalError(err error, errc chan<- error) {
+ select {
+ case errc <- err:
+ default:
+ }
+}
+
+// compareMajorMinorVersion returns an integer comparing two versions based on
+// their major and minor version. The result will be 0 if a==b, -1 if a < b,
+// and 1 if a > b.
+func compareMajorMinorVersion(a, b *semver.Version) int {
+ na := &semver.Version{Major: a.Major, Minor: a.Minor}
+ nb := &semver.Version{Major: b.Major, Minor: b.Minor}
+ switch {
+ case na.LessThan(*nb):
+ return -1
+ case nb.LessThan(*na):
+ return 1
+ default:
+ return 0
+ }
+}
+
+// serverVersion returns the server version from the given header.
+func serverVersion(h http.Header) *semver.Version {
+ verStr := h.Get("X-Server-Version")
+ // backward compatibility with etcd 2.0
+ if verStr == "" {
+ verStr = "2.0.0"
+ }
+ return semver.Must(semver.NewVersion(verStr))
+}
+
+// serverVersion returns the min cluster version from the given header.
+func minClusterVersion(h http.Header) *semver.Version {
+ verStr := h.Get("X-Min-Cluster-Version")
+ // backward compatibility with etcd 2.0
+ if verStr == "" {
+ verStr = "2.0.0"
+ }
+ return semver.Must(semver.NewVersion(verStr))
+}
+
+// checkVersionCompatibility checks whether the given version is compatible
+// with the local version.
+func checkVersionCompatibility(name string, server, minCluster *semver.Version) (
+ localServer *semver.Version,
+ localMinCluster *semver.Version,
+ err error,
+) {
+ localServer = semver.Must(semver.NewVersion(version.Version))
+ localMinCluster = semver.Must(semver.NewVersion(version.MinClusterVersion))
+ if compareMajorMinorVersion(server, localMinCluster) == -1 {
+ return localServer, localMinCluster, fmt.Errorf("remote version is too low: remote[%s]=%s, local=%s", name, server, localServer)
+ }
+ if compareMajorMinorVersion(minCluster, localServer) == 1 {
+ return localServer, localMinCluster, fmt.Errorf("local version is too low: remote[%s]=%s, local=%s", name, server, localServer)
+ }
+ return localServer, localMinCluster, nil
+}
+
+// setPeerURLsHeader reports local urls for peer discovery
+func setPeerURLsHeader(req *http.Request, urls types.URLs) {
+ if urls == nil {
+ // often not set in unit tests
+ return
+ }
+ peerURLs := make([]string, urls.Len())
+ for i := range urls {
+ peerURLs[i] = urls[i].String()
+ }
+ req.Header.Set("X-PeerURLs", strings.Join(peerURLs, ","))
+}
+
+// addRemoteFromRequest adds a remote peer according to an http request header
+func addRemoteFromRequest(tr Transporter, r *http.Request) {
+ if from, err := types.IDFromString(r.Header.Get("X-Server-From")); err == nil {
+ if urls := r.Header.Get("X-PeerURLs"); urls != "" {
+ tr.AddRemote(from, strings.Split(urls, ","))
+ }
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/db.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/db.go
new file mode 100644
index 0000000..e8add5e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/db.go
@@ -0,0 +1,99 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snap
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "time"
+
+ humanize "github.com/dustin/go-humanize"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/fileutil"
+)
+
+var ErrNoDBSnapshot = errors.New("snap: snapshot file doesn't exist")
+
+// SaveDBFrom saves snapshot of the database from the given reader. It
+// guarantees the save operation is atomic.
+func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) (int64, error) {
+ start := time.Now()
+
+ f, err := os.CreateTemp(s.dir, "tmp")
+ if err != nil {
+ return 0, err
+ }
+ var n int64
+ n, err = io.Copy(f, r)
+ if err == nil {
+ fsyncStart := time.Now()
+ err = fileutil.Fsync(f)
+ snapDBFsyncSec.Observe(time.Since(fsyncStart).Seconds())
+ }
+ f.Close()
+ if err != nil {
+ os.Remove(f.Name())
+ return n, err
+ }
+ fn := s.dbFilePath(id)
+ if fileutil.Exist(fn) {
+ os.Remove(f.Name())
+ return n, nil
+ }
+ err = os.Rename(f.Name(), fn)
+ if err != nil {
+ os.Remove(f.Name())
+ return n, err
+ }
+
+ s.lg.Info(
+ "saved database snapshot to disk",
+ zap.String("path", fn),
+ zap.Int64("bytes", n),
+ zap.String("size", humanize.Bytes(uint64(n))),
+ )
+
+ snapDBSaveSec.Observe(time.Since(start).Seconds())
+ return n, nil
+}
+
+// DBFilePath returns the file path for the snapshot of the database with
+// given id. If the snapshot does not exist, it returns error.
+func (s *Snapshotter) DBFilePath(id uint64) (string, error) {
+ if _, err := fileutil.ReadDir(s.dir); err != nil {
+ return "", err
+ }
+ fn := s.dbFilePath(id)
+ if fileutil.Exist(fn) {
+ return fn, nil
+ }
+ if s.lg != nil {
+ s.lg.Warn(
+ "failed to find [SNAPSHOT-INDEX].snap.db",
+ zap.Uint64("snapshot-index", id),
+ zap.String("snapshot-file-path", fn),
+ zap.Error(ErrNoDBSnapshot),
+ )
+ }
+ return "", ErrNoDBSnapshot
+}
+
+func (s *Snapshotter) dbFilePath(id uint64) string {
+ return filepath.Join(s.dir, fmt.Sprintf("%016x.snap.db", id))
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/doc.go
new file mode 100644
index 0000000..dcc5db5
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/doc.go
@@ -0,0 +1,17 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package snap handles Raft nodes' states with snapshots.
+// The snapshot logic is internal to etcd server and raft package.
+package snap
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/message.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/message.go
new file mode 100644
index 0000000..2b4090c
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/message.go
@@ -0,0 +1,64 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snap
+
+import (
+ "io"
+
+ "go.etcd.io/etcd/pkg/v3/ioutil"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+// Message is a struct that contains a raft Message and a ReadCloser. The type
+// of raft message MUST be MsgSnap, which contains the raft meta-data and an
+// additional data []byte field that contains the snapshot of the actual state
+// machine.
+// Message contains the ReadCloser field for handling large snapshot. This avoid
+// copying the entire snapshot into a byte array, which consumes a lot of memory.
+//
+// User of Message should close the Message after sending it.
+type Message struct {
+ raftpb.Message
+ ReadCloser io.ReadCloser
+ TotalSize int64
+ closeC chan bool
+}
+
+func NewMessage(rs raftpb.Message, rc io.ReadCloser, rcSize int64) *Message {
+ return &Message{
+ Message: rs,
+ ReadCloser: ioutil.NewExactReadCloser(rc, rcSize),
+ TotalSize: int64(rs.Size()) + rcSize,
+ closeC: make(chan bool, 1),
+ }
+}
+
+// CloseNotify returns a channel that receives a single value
+// when the message sent is finished. true indicates the sent
+// is successful.
+func (m Message) CloseNotify() <-chan bool {
+ return m.closeC
+}
+
+func (m Message) CloseWithError(err error) {
+ if cerr := m.ReadCloser.Close(); cerr != nil {
+ err = cerr
+ }
+ if err == nil {
+ m.closeC <- true
+ } else {
+ m.closeC <- false
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/metrics.go
new file mode 100644
index 0000000..2affecf
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/metrics.go
@@ -0,0 +1,82 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snap
+
+import "github.com/prometheus/client_golang/prometheus"
+
+var (
+ snapMarshallingSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "snap",
+ Name: "save_marshalling_duration_seconds",
+ Help: "The marshalling cost distributions of save called by snapshot.",
+
+ // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2
+ // highest bucket start of 0.001 sec * 2^13 == 8.192 sec
+ Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
+ })
+
+ snapSaveSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "snap",
+ Name: "save_total_duration_seconds",
+ Help: "The total latency distributions of save called by snapshot.",
+
+ // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2
+ // highest bucket start of 0.001 sec * 2^13 == 8.192 sec
+ Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
+ })
+
+ snapFsyncSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "snap",
+ Name: "fsync_duration_seconds",
+ Help: "The latency distributions of fsync called by snap.",
+
+ // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2
+ // highest bucket start of 0.001 sec * 2^13 == 8.192 sec
+ Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
+ })
+
+ snapDBSaveSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "snap_db",
+ Name: "save_total_duration_seconds",
+ Help: "The total latency distributions of v3 snapshot save",
+
+ // lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2
+ // highest bucket start of 0.1 sec * 2^9 == 51.2 sec
+ Buckets: prometheus.ExponentialBuckets(0.1, 2, 10),
+ })
+
+ snapDBFsyncSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "snap_db",
+ Name: "fsync_duration_seconds",
+ Help: "The latency distributions of fsyncing .snap.db file",
+
+ // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2
+ // highest bucket start of 0.001 sec * 2^13 == 8.192 sec
+ Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
+ })
+)
+
+func init() {
+ prometheus.MustRegister(snapMarshallingSec)
+ prometheus.MustRegister(snapSaveSec)
+ prometheus.MustRegister(snapFsyncSec)
+ prometheus.MustRegister(snapDBSaveSec)
+ prometheus.MustRegister(snapDBFsyncSec)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb/snap.pb.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb/snap.pb.go
new file mode 100644
index 0000000..ff9d39c
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb/snap.pb.go
@@ -0,0 +1,347 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: snap.proto
+
+package snappb
+
+import (
+ fmt "fmt"
+ io "io"
+ math "math"
+ math_bits "math/bits"
+
+ _ "github.com/gogo/protobuf/gogoproto"
+ proto "github.com/golang/protobuf/proto"
+)
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
+
+type Snapshot struct {
+ Crc uint32 `protobuf:"varint,1,opt,name=crc" json:"crc"`
+ Data []byte `protobuf:"bytes,2,opt,name=data" json:"data,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *Snapshot) Reset() { *m = Snapshot{} }
+func (m *Snapshot) String() string { return proto.CompactTextString(m) }
+func (*Snapshot) ProtoMessage() {}
+func (*Snapshot) Descriptor() ([]byte, []int) {
+ return fileDescriptor_f2e3c045ebf84d00, []int{0}
+}
+func (m *Snapshot) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *Snapshot) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_Snapshot.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *Snapshot) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Snapshot.Merge(m, src)
+}
+func (m *Snapshot) XXX_Size() int {
+ return m.Size()
+}
+func (m *Snapshot) XXX_DiscardUnknown() {
+ xxx_messageInfo_Snapshot.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_Snapshot proto.InternalMessageInfo
+
+func init() {
+ proto.RegisterType((*Snapshot)(nil), "snappb.snapshot")
+}
+
+func init() { proto.RegisterFile("snap.proto", fileDescriptor_f2e3c045ebf84d00) }
+
+var fileDescriptor_f2e3c045ebf84d00 = []byte{
+ // 164 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x2a, 0xce, 0x4b, 0x2c,
+ 0xd0, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x03, 0xb1, 0x0b, 0x92, 0xa4, 0x44, 0xd2, 0xf3,
+ 0xd3, 0xf3, 0xc1, 0x42, 0xfa, 0x20, 0x16, 0x44, 0x56, 0xc9, 0x8c, 0x8b, 0x03, 0x24, 0x5f, 0x9c,
+ 0x91, 0x5f, 0x22, 0x24, 0xc6, 0xc5, 0x9c, 0x5c, 0x94, 0x2c, 0xc1, 0xa8, 0xc0, 0xa8, 0xc1, 0xeb,
+ 0xc4, 0x72, 0xe2, 0x9e, 0x3c, 0x43, 0x10, 0x48, 0x40, 0x48, 0x88, 0x8b, 0x25, 0x25, 0xb1, 0x24,
+ 0x51, 0x82, 0x49, 0x81, 0x51, 0x83, 0x27, 0x08, 0xcc, 0x76, 0xf2, 0x3a, 0xf1, 0x50, 0x8e, 0xe1,
+ 0xc4, 0x23, 0x39, 0xc6, 0x0b, 0x8f, 0xe4, 0x18, 0x1f, 0x3c, 0x92, 0x63, 0x9c, 0xf1, 0x58, 0x8e,
+ 0x21, 0xca, 0x24, 0x3d, 0x5f, 0x2f, 0xb5, 0x24, 0x39, 0x45, 0x2f, 0x33, 0x5f, 0x1f, 0x44, 0xeb,
+ 0x17, 0xa7, 0x16, 0x95, 0xa5, 0x16, 0xe9, 0x97, 0x19, 0x83, 0xb9, 0x50, 0x5e, 0x62, 0x41, 0xa6,
+ 0x3e, 0xc8, 0x52, 0x7d, 0x88, 0xcb, 0x00, 0x01, 0x00, 0x00, 0xff, 0xff, 0x8d, 0x65, 0xd9, 0x03,
+ 0xae, 0x00, 0x00, 0x00,
+}
+
+func (m *Snapshot) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *Snapshot) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Snapshot) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Data != nil {
+ i -= len(m.Data)
+ copy(dAtA[i:], m.Data)
+ i = encodeVarintSnap(dAtA, i, uint64(len(m.Data)))
+ i--
+ dAtA[i] = 0x12
+ }
+ i = encodeVarintSnap(dAtA, i, uint64(m.Crc))
+ i--
+ dAtA[i] = 0x8
+ return len(dAtA) - i, nil
+}
+
+func encodeVarintSnap(dAtA []byte, offset int, v uint64) int {
+ offset -= sovSnap(v)
+ base := offset
+ for v >= 1<<7 {
+ dAtA[offset] = uint8(v&0x7f | 0x80)
+ v >>= 7
+ offset++
+ }
+ dAtA[offset] = uint8(v)
+ return base
+}
+func (m *Snapshot) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ n += 1 + sovSnap(uint64(m.Crc))
+ if m.Data != nil {
+ l = len(m.Data)
+ n += 1 + l + sovSnap(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func sovSnap(x uint64) (n int) {
+ return (math_bits.Len64(x|1) + 6) / 7
+}
+func sozSnap(x uint64) (n int) {
+ return sovSnap(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (m *Snapshot) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowSnap
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: snapshot: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: snapshot: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 0 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Crc", wireType)
+ }
+ m.Crc = 0
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowSnap
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ m.Crc |= uint32(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowSnap
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthSnap
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthSnap
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...)
+ if m.Data == nil {
+ m.Data = []byte{}
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipSnap(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthSnap
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func skipSnap(dAtA []byte) (n int, err error) {
+ l := len(dAtA)
+ iNdEx := 0
+ depth := 0
+ for iNdEx < l {
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowSnap
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ wireType := int(wire & 0x7)
+ switch wireType {
+ case 0:
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowSnap
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ iNdEx++
+ if dAtA[iNdEx-1] < 0x80 {
+ break
+ }
+ }
+ case 1:
+ iNdEx += 8
+ case 2:
+ var length int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowSnap
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ length |= (int(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if length < 0 {
+ return 0, ErrInvalidLengthSnap
+ }
+ iNdEx += length
+ case 3:
+ depth++
+ case 4:
+ if depth == 0 {
+ return 0, ErrUnexpectedEndOfGroupSnap
+ }
+ depth--
+ case 5:
+ iNdEx += 4
+ default:
+ return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+ }
+ if iNdEx < 0 {
+ return 0, ErrInvalidLengthSnap
+ }
+ if depth == 0 {
+ return iNdEx, nil
+ }
+ }
+ return 0, io.ErrUnexpectedEOF
+}
+
+var (
+ ErrInvalidLengthSnap = fmt.Errorf("proto: negative length found during unmarshaling")
+ ErrIntOverflowSnap = fmt.Errorf("proto: integer overflow")
+ ErrUnexpectedEndOfGroupSnap = fmt.Errorf("proto: unexpected end of group")
+)
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb/snap.proto b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb/snap.proto
new file mode 100644
index 0000000..0a74744
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb/snap.proto
@@ -0,0 +1,16 @@
+syntax = "proto2";
+package snappb;
+
+import "gogoproto/gogo.proto";
+
+option go_package = "go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb";
+
+option (gogoproto.marshaler_all) = true;
+option (gogoproto.sizer_all) = true;
+option (gogoproto.unmarshaler_all) = true;
+option (gogoproto.goproto_getters_all) = false;
+
+message snapshot {
+ optional uint32 crc = 1 [(gogoproto.nullable) = false];
+ optional bytes data = 2;
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snapshotter.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snapshotter.go
new file mode 100644
index 0000000..0de6f9b
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/snap/snapshotter.go
@@ -0,0 +1,282 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snap
+
+import (
+ "errors"
+ "fmt"
+ "hash/crc32"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/verify"
+ pioutil "go.etcd.io/etcd/pkg/v3/ioutil"
+ "go.etcd.io/etcd/pkg/v3/pbutil"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb"
+ "go.etcd.io/etcd/server/v3/storage/wal/walpb"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const snapSuffix = ".snap"
+
+var (
+ ErrNoSnapshot = errors.New("snap: no available snapshot")
+ ErrEmptySnapshot = errors.New("snap: empty snapshot")
+ ErrCRCMismatch = errors.New("snap: crc mismatch")
+ crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+ // A map of valid files that can be present in the snap folder.
+ validFiles = map[string]bool{
+ "db": true,
+ }
+)
+
+type Snapshotter struct {
+ lg *zap.Logger
+ dir string
+}
+
+func New(lg *zap.Logger, dir string) *Snapshotter {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ return &Snapshotter{
+ lg: lg,
+ dir: dir,
+ }
+}
+
+func (s *Snapshotter) SaveSnap(snapshot raftpb.Snapshot) error {
+ if raft.IsEmptySnap(snapshot) {
+ return nil
+ }
+ return s.save(&snapshot)
+}
+
+func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error {
+ start := time.Now()
+
+ fname := fmt.Sprintf("%016x-%016x%s", snapshot.Metadata.Term, snapshot.Metadata.Index, snapSuffix)
+ b := pbutil.MustMarshal(snapshot)
+ crc := crc32.Update(0, crcTable, b)
+ snap := snappb.Snapshot{Crc: crc, Data: b}
+ d, err := snap.Marshal()
+ if err != nil {
+ return err
+ }
+ snapMarshallingSec.Observe(time.Since(start).Seconds())
+
+ spath := filepath.Join(s.dir, fname)
+
+ fsyncStart := time.Now()
+ err = pioutil.WriteAndSyncFile(spath, d, 0o666)
+ snapFsyncSec.Observe(time.Since(fsyncStart).Seconds())
+
+ if err != nil {
+ s.lg.Warn("failed to write a snap file", zap.String("path", spath), zap.Error(err))
+ rerr := os.Remove(spath)
+ if rerr != nil {
+ s.lg.Warn("failed to remove a broken snap file", zap.String("path", spath), zap.Error(rerr))
+ }
+ return err
+ }
+
+ snapSaveSec.Observe(time.Since(start).Seconds())
+ return nil
+}
+
+// Load returns the newest snapshot.
+func (s *Snapshotter) Load() (*raftpb.Snapshot, error) {
+ return s.loadMatching(func(*raftpb.Snapshot) bool { return true })
+}
+
+// LoadNewestAvailable loads the newest snapshot available that is in walSnaps.
+func (s *Snapshotter) LoadNewestAvailable(walSnaps []walpb.Snapshot) (*raftpb.Snapshot, error) {
+ return s.loadMatching(func(snapshot *raftpb.Snapshot) bool {
+ m := snapshot.Metadata
+ for i := len(walSnaps) - 1; i >= 0; i-- {
+ if m.Term == walSnaps[i].Term && m.Index == walSnaps[i].Index {
+ return true
+ }
+ }
+ return false
+ })
+}
+
+// loadMatching returns the newest snapshot where matchFn returns true.
+func (s *Snapshotter) loadMatching(matchFn func(*raftpb.Snapshot) bool) (*raftpb.Snapshot, error) {
+ names, err := s.snapNames()
+ if err != nil {
+ return nil, err
+ }
+ var snap *raftpb.Snapshot
+ for _, name := range names {
+ if snap, err = s.loadSnap(name); err == nil && matchFn(snap) {
+ return snap, nil
+ }
+ }
+ return nil, ErrNoSnapshot
+}
+
+func (s *Snapshotter) loadSnap(name string) (*raftpb.Snapshot, error) {
+ fpath := filepath.Join(s.dir, name)
+ snap, err := Read(s.lg, fpath)
+ if err != nil {
+ brokenPath := fpath + ".broken"
+ s.lg.Warn("failed to read a snap file", zap.String("path", fpath), zap.Error(err))
+ if rerr := os.Rename(fpath, brokenPath); rerr != nil {
+ s.lg.Warn("failed to rename a broken snap file", zap.String("path", fpath), zap.String("broken-path", brokenPath), zap.Error(rerr))
+ } else {
+ s.lg.Warn("renamed to a broken snap file", zap.String("path", fpath), zap.String("broken-path", brokenPath))
+ }
+ }
+ return snap, err
+}
+
+// Read reads the snapshot named by snapname and returns the snapshot.
+func Read(lg *zap.Logger, snapname string) (*raftpb.Snapshot, error) {
+ verify.Assert(lg != nil, "the logger should not be nil")
+ b, err := os.ReadFile(snapname)
+ if err != nil {
+ lg.Warn("failed to read a snap file", zap.String("path", snapname), zap.Error(err))
+ return nil, err
+ }
+
+ if len(b) == 0 {
+ lg.Warn("failed to read empty snapshot file", zap.String("path", snapname))
+ return nil, ErrEmptySnapshot
+ }
+
+ var serializedSnap snappb.Snapshot
+ if err = serializedSnap.Unmarshal(b); err != nil {
+ lg.Warn("failed to unmarshal snappb.Snapshot", zap.String("path", snapname), zap.Error(err))
+ return nil, err
+ }
+
+ if len(serializedSnap.Data) == 0 || serializedSnap.Crc == 0 {
+ lg.Warn("failed to read empty snapshot data", zap.String("path", snapname))
+ return nil, ErrEmptySnapshot
+ }
+
+ crc := crc32.Update(0, crcTable, serializedSnap.Data)
+ if crc != serializedSnap.Crc {
+ lg.Warn("snap file is corrupt",
+ zap.String("path", snapname),
+ zap.Uint32("prev-crc", serializedSnap.Crc),
+ zap.Uint32("new-crc", crc),
+ )
+ return nil, ErrCRCMismatch
+ }
+
+ var snap raftpb.Snapshot
+ if err = snap.Unmarshal(serializedSnap.Data); err != nil {
+ lg.Warn("failed to unmarshal raftpb.Snapshot", zap.String("path", snapname), zap.Error(err))
+ return nil, err
+ }
+ return &snap, nil
+}
+
+// snapNames returns the filename of the snapshots in logical time order (from newest to oldest).
+// If there is no available snapshots, an ErrNoSnapshot will be returned.
+func (s *Snapshotter) snapNames() ([]string, error) {
+ dir, err := os.Open(s.dir)
+ if err != nil {
+ return nil, err
+ }
+ defer dir.Close()
+ names, err := dir.Readdirnames(-1)
+ if err != nil {
+ return nil, err
+ }
+ filenames, err := s.cleanupSnapdir(names)
+ if err != nil {
+ return nil, err
+ }
+ snaps := s.checkSuffix(filenames)
+ if len(snaps) == 0 {
+ return nil, ErrNoSnapshot
+ }
+ sort.Sort(sort.Reverse(sort.StringSlice(snaps)))
+ return snaps, nil
+}
+
+func (s *Snapshotter) checkSuffix(names []string) []string {
+ var snaps []string
+ for i := range names {
+ if strings.HasSuffix(names[i], snapSuffix) {
+ snaps = append(snaps, names[i])
+ } else {
+ // If we find a file which is not a snapshot then check if it's
+ // a valid file. If not throw out a warning.
+ if _, ok := validFiles[names[i]]; !ok {
+ s.lg.Warn("found unexpected non-snap file; skipping", zap.String("path", names[i]))
+ }
+ }
+ }
+ return snaps
+}
+
+// cleanupSnapdir removes any files that should not be in the snapshot directory:
+// - db.tmp prefixed files that can be orphaned by defragmentation
+func (s *Snapshotter) cleanupSnapdir(filenames []string) (names []string, err error) {
+ names = make([]string, 0, len(filenames))
+ for _, filename := range filenames {
+ if strings.HasPrefix(filename, "db.tmp") {
+ s.lg.Info("found orphaned defragmentation file; deleting", zap.String("path", filename))
+ if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) {
+ return names, fmt.Errorf("failed to remove orphaned .snap.db file %s: %w", filename, rmErr)
+ }
+ } else {
+ names = append(names, filename)
+ }
+ }
+ return names, nil
+}
+
+func (s *Snapshotter) ReleaseSnapDBs(snap raftpb.Snapshot) error {
+ dir, err := os.Open(s.dir)
+ if err != nil {
+ return err
+ }
+ defer dir.Close()
+ filenames, err := dir.Readdirnames(-1)
+ if err != nil {
+ return err
+ }
+ for _, filename := range filenames {
+ if strings.HasSuffix(filename, ".snap.db") {
+ hexIndex := strings.TrimSuffix(filepath.Base(filename), ".snap.db")
+ index, err := strconv.ParseUint(hexIndex, 16, 64)
+ if err != nil {
+ s.lg.Error("failed to parse index from filename", zap.String("path", filename), zap.String("error", err.Error()))
+ continue
+ }
+ if index < snap.Metadata.Index {
+ s.lg.Info("found orphaned .snap.db file; deleting", zap.String("path", filename))
+ if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) {
+ s.lg.Error("failed to remove orphaned .snap.db file", zap.String("path", filename), zap.String("error", rmErr.Error()))
+ }
+ }
+ }
+ }
+ return nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2discovery/discovery.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2discovery/discovery.go
new file mode 100644
index 0000000..00a2c7d
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2discovery/discovery.go
@@ -0,0 +1,417 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v2discovery provides an implementation of the cluster discovery that
+// is used by etcd with v2 client.
+package v2discovery
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "math"
+ "net/http"
+ "net/url"
+ "path"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/transport"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ client "go.etcd.io/etcd/server/v3/internal/clientv2"
+)
+
+var (
+ ErrInvalidURL = errors.New("discovery: invalid URL")
+ ErrBadSizeKey = errors.New("discovery: size key is bad")
+ ErrSizeNotFound = errors.New("discovery: size key not found")
+ ErrTokenNotFound = errors.New("discovery: token not found")
+ ErrDuplicateID = errors.New("discovery: found duplicate id")
+ ErrDuplicateName = errors.New("discovery: found duplicate name")
+ ErrFullCluster = errors.New("discovery: cluster is full")
+ ErrTooManyRetries = errors.New("discovery: too many retries")
+ ErrBadDiscoveryEndpoint = errors.New("discovery: bad discovery endpoint")
+)
+
+var (
+ // Number of retries discovery will attempt before giving up and erroring out.
+ nRetries = uint(math.MaxUint32)
+ maxExpoentialRetries = uint(8)
+)
+
+// JoinCluster will connect to the discovery service at the given url, and
+// register the server represented by the given id and config to the cluster
+func JoinCluster(lg *zap.Logger, durl, dproxyurl string, id types.ID, config string) (string, error) {
+ d, err := newDiscovery(lg, durl, dproxyurl, id)
+ if err != nil {
+ return "", err
+ }
+ return d.joinCluster(config)
+}
+
+// GetCluster will connect to the discovery service at the given url and
+// retrieve a string describing the cluster
+func GetCluster(lg *zap.Logger, durl, dproxyurl string) (string, error) {
+ d, err := newDiscovery(lg, durl, dproxyurl, 0)
+ if err != nil {
+ return "", err
+ }
+ return d.getCluster()
+}
+
+type discovery struct {
+ lg *zap.Logger
+ cluster string
+ id types.ID
+ c client.KeysAPI
+ retries uint
+ url *url.URL
+
+ clock clockwork.Clock
+}
+
+// newProxyFunc builds a proxy function from the given string, which should
+// represent a URL that can be used as a proxy. It performs basic
+// sanitization of the URL and returns any error encountered.
+func newProxyFunc(lg *zap.Logger, proxy string) (func(*http.Request) (*url.URL, error), error) {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ if proxy == "" {
+ return nil, nil
+ }
+ // Do a small amount of URL sanitization to help the user
+ // Derived from net/http.ProxyFromEnvironment
+ proxyURL, err := url.Parse(proxy)
+ if err != nil || !strings.HasPrefix(proxyURL.Scheme, "http") {
+ // proxy was bogus. Try prepending "http://" to it and
+ // see if that parses correctly. If not, we ignore the
+ // error and complain about the original one
+ var err2 error
+ proxyURL, err2 = url.Parse("http://" + proxy)
+ if err2 == nil {
+ err = nil
+ }
+ }
+ if err != nil {
+ return nil, fmt.Errorf("invalid proxy address %q: %w", proxy, err)
+ }
+
+ lg.Info("running proxy with discovery", zap.String("proxy-url", proxyURL.String()))
+ return http.ProxyURL(proxyURL), nil
+}
+
+func newDiscovery(lg *zap.Logger, durl, dproxyurl string, id types.ID) (*discovery, error) {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ u, err := url.Parse(durl)
+ if err != nil {
+ return nil, err
+ }
+ token := u.Path
+ u.Path = ""
+ pf, err := newProxyFunc(lg, dproxyurl)
+ if err != nil {
+ return nil, err
+ }
+
+ // TODO: add ResponseHeaderTimeout back when watch on discovery service writes header early
+ tr, err := transport.NewTransport(transport.TLSInfo{}, 30*time.Second)
+ if err != nil {
+ return nil, err
+ }
+ tr.Proxy = pf
+ cfg := client.Config{
+ Transport: tr,
+ Endpoints: []string{u.String()},
+ }
+ c, err := client.New(cfg)
+ if err != nil {
+ return nil, err
+ }
+ dc := client.NewKeysAPIWithPrefix(c, "")
+ return &discovery{
+ lg: lg,
+ cluster: token,
+ c: dc,
+ id: id,
+ url: u,
+ clock: clockwork.NewRealClock(),
+ }, nil
+}
+
+func (d *discovery) joinCluster(config string) (string, error) {
+ // fast path: if the cluster is full, return the error
+ // do not need to register to the cluster in this case.
+ if _, _, _, err := d.checkCluster(); err != nil {
+ return "", err
+ }
+
+ if err := d.createSelf(config); err != nil {
+ // Fails, even on a timeout, if createSelf times out.
+ // TODO(barakmich): Retrying the same node might want to succeed here
+ // (ie, createSelf should be idempotent for discovery).
+ return "", err
+ }
+
+ nodes, size, index, err := d.checkCluster()
+ if err != nil {
+ return "", err
+ }
+
+ all, err := d.waitNodes(nodes, size, index)
+ if err != nil {
+ return "", err
+ }
+
+ return nodesToCluster(all, size)
+}
+
+func (d *discovery) getCluster() (string, error) {
+ nodes, size, index, err := d.checkCluster()
+ if err != nil {
+ if errors.Is(err, ErrFullCluster) {
+ return nodesToCluster(nodes, size)
+ }
+ return "", err
+ }
+
+ all, err := d.waitNodes(nodes, size, index)
+ if err != nil {
+ return "", err
+ }
+ return nodesToCluster(all, size)
+}
+
+func (d *discovery) createSelf(contents string) error {
+ ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
+ resp, err := d.c.Create(ctx, d.selfKey(), contents)
+ cancel()
+ if err != nil {
+ var eerr client.Error
+ if errors.As(err, &eerr) && eerr.Code == client.ErrorCodeNodeExist {
+ return ErrDuplicateID
+ }
+ return err
+ }
+
+ // ensure self appears on the server we connected to
+ w := d.c.Watcher(d.selfKey(), &client.WatcherOptions{AfterIndex: resp.Node.CreatedIndex - 1})
+ _, err = w.Next(context.Background())
+ return err
+}
+
+func (d *discovery) checkCluster() ([]*client.Node, uint64, uint64, error) {
+ configKey := path.Join("/", d.cluster, "_config")
+ ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
+ // find cluster size
+ resp, err := d.c.Get(ctx, path.Join(configKey, "size"), nil)
+ cancel()
+ if err != nil {
+ var eerr *client.Error
+ if errors.As(err, &eerr) && eerr.Code == client.ErrorCodeKeyNotFound {
+ return nil, 0, 0, ErrSizeNotFound
+ }
+ if errors.Is(err, client.ErrInvalidJSON) {
+ return nil, 0, 0, ErrBadDiscoveryEndpoint
+ }
+ var ce *client.ClusterError
+ if errors.As(err, &ce) {
+ d.lg.Warn(
+ "failed to get from discovery server",
+ zap.String("discovery-url", d.url.String()),
+ zap.String("path", path.Join(configKey, "size")),
+ zap.Error(err),
+ zap.String("err-detail", ce.Detail()),
+ )
+ return d.checkClusterRetry()
+ }
+ return nil, 0, 0, err
+ }
+ size, err := strconv.ParseUint(resp.Node.Value, 10, 0)
+ if err != nil {
+ return nil, 0, 0, ErrBadSizeKey
+ }
+
+ ctx, cancel = context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
+ resp, err = d.c.Get(ctx, d.cluster, nil)
+ cancel()
+ if err != nil {
+ var ce *client.ClusterError
+ if errors.As(err, &ce) {
+ d.lg.Warn(
+ "failed to get from discovery server",
+ zap.String("discovery-url", d.url.String()),
+ zap.String("path", d.cluster),
+ zap.Error(err),
+ zap.String("err-detail", ce.Detail()),
+ )
+ return d.checkClusterRetry()
+ }
+ return nil, 0, 0, err
+ }
+ var nodes []*client.Node
+ // append non-config keys to nodes
+ for _, n := range resp.Node.Nodes {
+ if path.Base(n.Key) != path.Base(configKey) {
+ nodes = append(nodes, n)
+ }
+ }
+
+ snodes := sortableNodes{nodes}
+ sort.Sort(snodes)
+
+ // find self position
+ for i := range nodes {
+ if path.Base(nodes[i].Key) == path.Base(d.selfKey()) {
+ break
+ }
+ if uint64(i) >= size-1 {
+ return nodes[:size], size, resp.Index, ErrFullCluster
+ }
+ }
+ return nodes, size, resp.Index, nil
+}
+
+func (d *discovery) logAndBackoffForRetry(step string) {
+ d.retries++
+ // logAndBackoffForRetry stops exponential backoff when the retries are more than maxExpoentialRetries and is set to a constant backoff afterward.
+ retries := d.retries
+ if retries > maxExpoentialRetries {
+ retries = maxExpoentialRetries
+ }
+ retryTimeInSecond := time.Duration(0x1<<retries) * time.Second
+ d.lg.Info(
+ "retry connecting to discovery service",
+ zap.String("url", d.url.String()),
+ zap.String("reason", step),
+ zap.Duration("backoff", retryTimeInSecond),
+ )
+ d.clock.Sleep(retryTimeInSecond)
+}
+
+func (d *discovery) checkClusterRetry() ([]*client.Node, uint64, uint64, error) {
+ if d.retries < nRetries {
+ d.logAndBackoffForRetry("cluster status check")
+ return d.checkCluster()
+ }
+ return nil, 0, 0, ErrTooManyRetries
+}
+
+func (d *discovery) waitNodesRetry() ([]*client.Node, error) {
+ if d.retries < nRetries {
+ d.logAndBackoffForRetry("waiting for other nodes")
+ nodes, n, index, err := d.checkCluster()
+ if err != nil {
+ return nil, err
+ }
+ return d.waitNodes(nodes, n, index)
+ }
+ return nil, ErrTooManyRetries
+}
+
+func (d *discovery) waitNodes(nodes []*client.Node, size uint64, index uint64) ([]*client.Node, error) {
+ if uint64(len(nodes)) > size {
+ nodes = nodes[:size]
+ }
+ // watch from the next index
+ w := d.c.Watcher(d.cluster, &client.WatcherOptions{AfterIndex: index, Recursive: true})
+ all := make([]*client.Node, len(nodes))
+ copy(all, nodes)
+ for _, n := range all {
+ if path.Base(n.Key) == path.Base(d.selfKey()) {
+ d.lg.Info(
+ "found self from discovery server",
+ zap.String("discovery-url", d.url.String()),
+ zap.String("self", path.Base(d.selfKey())),
+ )
+ } else {
+ d.lg.Info(
+ "found peer from discovery server",
+ zap.String("discovery-url", d.url.String()),
+ zap.String("peer", path.Base(n.Key)),
+ )
+ }
+ }
+
+ // wait for others
+ for uint64(len(all)) < size {
+ d.lg.Info(
+ "found peers from discovery server; waiting for more",
+ zap.String("discovery-url", d.url.String()),
+ zap.Int("found-peers", len(all)),
+ zap.Int("needed-peers", int(size-uint64(len(all)))),
+ )
+ resp, err := w.Next(context.Background())
+ if err != nil {
+ var ce *client.ClusterError
+ if errors.As(err, &ce) {
+ d.lg.Warn(
+ "error while waiting for peers",
+ zap.String("discovery-url", d.url.String()),
+ zap.Error(err),
+ zap.String("err-detail", ce.Detail()),
+ )
+ return d.waitNodesRetry()
+ }
+ return nil, err
+ }
+ d.lg.Info(
+ "found peer from discovery server",
+ zap.String("discovery-url", d.url.String()),
+ zap.String("peer", path.Base(resp.Node.Key)),
+ )
+ all = append(all, resp.Node)
+ }
+ d.lg.Info(
+ "found all needed peers from discovery server",
+ zap.String("discovery-url", d.url.String()),
+ zap.Int("found-peers", len(all)),
+ )
+ return all, nil
+}
+
+func (d *discovery) selfKey() string {
+ return path.Join("/", d.cluster, d.id.String())
+}
+
+func nodesToCluster(ns []*client.Node, size uint64) (string, error) {
+ s := make([]string, len(ns))
+ for i, n := range ns {
+ s[i] = n.Value
+ }
+ us := strings.Join(s, ",")
+ m, err := types.NewURLsMap(us)
+ if err != nil {
+ return us, ErrInvalidURL
+ }
+ if uint64(m.Len()) != size {
+ return us, ErrDuplicateName
+ }
+ return us, nil
+}
+
+type sortableNodes struct{ Nodes []*client.Node }
+
+func (ns sortableNodes) Len() int { return len(ns.Nodes) }
+func (ns sortableNodes) Less(i, j int) bool {
+ return ns.Nodes[i].CreatedIndex < ns.Nodes[j].CreatedIndex
+}
+func (ns sortableNodes) Swap(i, j int) { ns.Nodes[i], ns.Nodes[j] = ns.Nodes[j], ns.Nodes[i] }
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2error/error.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2error/error.go
new file mode 100644
index 0000000..168796e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2error/error.go
@@ -0,0 +1,161 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v2error describes errors in etcd project. When any change happens,
+// https://github.com/etcd-io/website/blob/main/content/docs/v2/errorcode.md
+// needs to be updated correspondingly.
+// To be deprecated in favor of v3 APIs.
+package v2error
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+)
+
+var errors = map[int]string{
+ // command related errors
+ EcodeKeyNotFound: "Key not found",
+ EcodeTestFailed: "Compare failed", // test and set
+ EcodeNotFile: "Not a file",
+ ecodeNoMorePeer: "Reached the max number of peers in the cluster",
+ EcodeNotDir: "Not a directory",
+ EcodeNodeExist: "Key already exists", // create
+ ecodeKeyIsPreserved: "The prefix of given key is a keyword in etcd",
+ EcodeRootROnly: "Root is read only",
+ EcodeDirNotEmpty: "Directory not empty",
+ ecodeExistingPeerAddr: "Peer address has existed",
+ EcodeUnauthorized: "The request requires user authentication",
+
+ // Post form related errors
+ ecodeValueRequired: "Value is Required in POST form",
+ EcodePrevValueRequired: "PrevValue is Required in POST form",
+ EcodeTTLNaN: "The given TTL in POST form is not a number",
+ EcodeIndexNaN: "The given index in POST form is not a number",
+ ecodeValueOrTTLRequired: "Value or TTL is required in POST form",
+ ecodeTimeoutNaN: "The given timeout in POST form is not a number",
+ ecodeNameRequired: "Name is required in POST form",
+ ecodeIndexOrValueRequired: "Index or value is required",
+ ecodeIndexValueMutex: "Index and value cannot both be specified",
+ EcodeInvalidField: "Invalid field",
+ EcodeInvalidForm: "Invalid POST form",
+ EcodeRefreshValue: "Value provided on refresh",
+ EcodeRefreshTTLRequired: "A TTL must be provided on refresh",
+
+ // raft related errors
+ EcodeRaftInternal: "Raft Internal Error",
+ EcodeLeaderElect: "During Leader Election",
+
+ // etcd related errors
+ EcodeWatcherCleared: "watcher is cleared due to etcd recovery",
+ EcodeEventIndexCleared: "The event in requested index is outdated and cleared",
+ ecodeStandbyInternal: "Standby Internal Error",
+ ecodeInvalidActiveSize: "Invalid active size",
+ ecodeInvalidRemoveDelay: "Standby remove delay",
+
+ // client related errors
+ ecodeClientInternal: "Client Internal Error",
+}
+
+var errorStatus = map[int]int{
+ EcodeKeyNotFound: http.StatusNotFound,
+ EcodeNotFile: http.StatusForbidden,
+ EcodeDirNotEmpty: http.StatusForbidden,
+ EcodeUnauthorized: http.StatusUnauthorized,
+ EcodeTestFailed: http.StatusPreconditionFailed,
+ EcodeNodeExist: http.StatusPreconditionFailed,
+ EcodeRaftInternal: http.StatusInternalServerError,
+ EcodeLeaderElect: http.StatusInternalServerError,
+}
+
+const (
+ EcodeKeyNotFound = 100
+ EcodeTestFailed = 101
+ EcodeNotFile = 102
+ ecodeNoMorePeer = 103
+ EcodeNotDir = 104
+ EcodeNodeExist = 105
+ ecodeKeyIsPreserved = 106
+ EcodeRootROnly = 107
+ EcodeDirNotEmpty = 108
+ ecodeExistingPeerAddr = 109
+ EcodeUnauthorized = 110
+
+ ecodeValueRequired = 200
+ EcodePrevValueRequired = 201
+ EcodeTTLNaN = 202
+ EcodeIndexNaN = 203
+ ecodeValueOrTTLRequired = 204
+ ecodeTimeoutNaN = 205
+ ecodeNameRequired = 206
+ ecodeIndexOrValueRequired = 207
+ ecodeIndexValueMutex = 208
+ EcodeInvalidField = 209
+ EcodeInvalidForm = 210
+ EcodeRefreshValue = 211
+ EcodeRefreshTTLRequired = 212
+
+ EcodeRaftInternal = 300
+ EcodeLeaderElect = 301
+
+ EcodeWatcherCleared = 400
+ EcodeEventIndexCleared = 401
+ ecodeStandbyInternal = 402
+ ecodeInvalidActiveSize = 403
+ ecodeInvalidRemoveDelay = 404
+
+ ecodeClientInternal = 500
+)
+
+type Error struct {
+ ErrorCode int `json:"errorCode"`
+ Message string `json:"message"`
+ Cause string `json:"cause,omitempty"`
+ Index uint64 `json:"index"`
+}
+
+func NewError(errorCode int, cause string, index uint64) *Error {
+ return &Error{
+ ErrorCode: errorCode,
+ Message: errors[errorCode],
+ Cause: cause,
+ Index: index,
+ }
+}
+
+// Error is for the error interface
+func (e Error) Error() string {
+ return e.Message + " (" + e.Cause + ")"
+}
+
+func (e Error) toJSONString() string {
+ b, _ := json.Marshal(e)
+ return string(b)
+}
+
+func (e Error) StatusCode() int {
+ status, ok := errorStatus[e.ErrorCode]
+ if !ok {
+ status = http.StatusBadRequest
+ }
+ return status
+}
+
+func (e Error) WriteTo(w http.ResponseWriter) error {
+ w.Header().Add("X-Etcd-Index", fmt.Sprint(e.Index))
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(e.StatusCode())
+ _, err := w.Write([]byte(e.toJSONString() + "\n"))
+ return err
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/leader.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/leader.go
new file mode 100644
index 0000000..f17cecc
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/leader.go
@@ -0,0 +1,135 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2stats
+
+import (
+ "encoding/json"
+ "math"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+)
+
+// LeaderStats is used by the leader in an etcd cluster, and encapsulates
+// statistics about communication with its followers
+type LeaderStats struct {
+ lg *zap.Logger
+ leaderStats
+ sync.Mutex
+}
+
+type leaderStats struct {
+ // Leader is the ID of the leader in the etcd cluster.
+ // TODO(jonboulle): clarify that these are IDs, not names
+ Leader string `json:"leader"`
+ Followers map[string]*FollowerStats `json:"followers"`
+}
+
+// NewLeaderStats generates a new LeaderStats with the given id as leader
+func NewLeaderStats(lg *zap.Logger, id string) *LeaderStats {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ return &LeaderStats{
+ lg: lg,
+ leaderStats: leaderStats{
+ Leader: id,
+ Followers: make(map[string]*FollowerStats),
+ },
+ }
+}
+
+func (ls *LeaderStats) JSON() []byte {
+ ls.Lock()
+ stats := ls.leaderStats
+ ls.Unlock()
+ b, err := json.Marshal(stats)
+ // TODO(jonboulle): appropriate error handling?
+ if err != nil {
+ ls.lg.Error("failed to marshal leader stats", zap.Error(err))
+ }
+ return b
+}
+
+func (ls *LeaderStats) Follower(name string) *FollowerStats {
+ ls.Lock()
+ defer ls.Unlock()
+ fs, ok := ls.Followers[name]
+ if !ok {
+ fs = &FollowerStats{}
+ fs.Latency.Minimum = 1 << 63
+ ls.Followers[name] = fs
+ }
+ return fs
+}
+
+// FollowerStats encapsulates various statistics about a follower in an etcd cluster
+type FollowerStats struct {
+ Latency LatencyStats `json:"latency"`
+ Counts CountsStats `json:"counts"`
+
+ sync.Mutex
+}
+
+// LatencyStats encapsulates latency statistics.
+type LatencyStats struct {
+ Current float64 `json:"current"`
+ Average float64 `json:"average"`
+ averageSquare float64
+ StandardDeviation float64 `json:"standardDeviation"`
+ Minimum float64 `json:"minimum"`
+ Maximum float64 `json:"maximum"`
+}
+
+// CountsStats encapsulates raft statistics.
+type CountsStats struct {
+ Fail uint64 `json:"fail"`
+ Success uint64 `json:"success"`
+}
+
+// Succ updates the FollowerStats with a successful send
+func (fs *FollowerStats) Succ(d time.Duration) {
+ fs.Lock()
+ defer fs.Unlock()
+
+ total := float64(fs.Counts.Success) * fs.Latency.Average
+ totalSquare := float64(fs.Counts.Success) * fs.Latency.averageSquare
+
+ fs.Counts.Success++
+
+ fs.Latency.Current = float64(d) / (1000000.0)
+
+ if fs.Latency.Current > fs.Latency.Maximum {
+ fs.Latency.Maximum = fs.Latency.Current
+ }
+
+ if fs.Latency.Current < fs.Latency.Minimum {
+ fs.Latency.Minimum = fs.Latency.Current
+ }
+
+ fs.Latency.Average = (total + fs.Latency.Current) / float64(fs.Counts.Success)
+ fs.Latency.averageSquare = (totalSquare + fs.Latency.Current*fs.Latency.Current) / float64(fs.Counts.Success)
+
+ // sdv = sqrt(avg(x^2) - avg(x)^2)
+ fs.Latency.StandardDeviation = math.Sqrt(fs.Latency.averageSquare - fs.Latency.Average*fs.Latency.Average)
+}
+
+// Fail updates the FollowerStats with an unsuccessful send
+func (fs *FollowerStats) Fail() {
+ fs.Lock()
+ defer fs.Unlock()
+ fs.Counts.Fail++
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/queue.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/queue.go
new file mode 100644
index 0000000..e16cec1
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/queue.go
@@ -0,0 +1,109 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2stats
+
+import (
+ "sync"
+ "time"
+)
+
+const (
+ queueCapacity = 200
+)
+
+// RequestStats represent the stats for a request.
+// It encapsulates the sending time and the size of the request.
+type RequestStats struct {
+ SendingTime time.Time
+ Size int
+}
+
+type statsQueue struct {
+ items [queueCapacity]*RequestStats
+ size int
+ front int
+ back int
+ totalReqSize int
+ rwl sync.RWMutex
+}
+
+func (q *statsQueue) Len() int {
+ return q.size
+}
+
+func (q *statsQueue) ReqSize() int {
+ return q.totalReqSize
+}
+
+// FrontAndBack gets the front and back elements in the queue
+// We must grab front and back together with the protection of the lock
+func (q *statsQueue) frontAndBack() (*RequestStats, *RequestStats) {
+ q.rwl.RLock()
+ defer q.rwl.RUnlock()
+ if q.size != 0 {
+ return q.items[q.front], q.items[q.back]
+ }
+ return nil, nil
+}
+
+// Insert function insert a RequestStats into the queue and update the records
+func (q *statsQueue) Insert(p *RequestStats) {
+ q.rwl.Lock()
+ defer q.rwl.Unlock()
+
+ q.back = (q.back + 1) % queueCapacity
+
+ if q.size == queueCapacity { // dequeue
+ q.totalReqSize -= q.items[q.front].Size
+ q.front = (q.back + 1) % queueCapacity
+ } else {
+ q.size++
+ }
+
+ q.items[q.back] = p
+ q.totalReqSize += q.items[q.back].Size
+}
+
+// Rate function returns the package rate and byte rate
+func (q *statsQueue) Rate() (float64, float64) {
+ front, back := q.frontAndBack()
+
+ if front == nil || back == nil {
+ return 0, 0
+ }
+
+ if time.Since(back.SendingTime) > time.Second {
+ q.Clear()
+ return 0, 0
+ }
+
+ sampleDuration := back.SendingTime.Sub(front.SendingTime)
+
+ pr := float64(q.Len()) / float64(sampleDuration) * float64(time.Second)
+
+ br := float64(q.ReqSize()) / float64(sampleDuration) * float64(time.Second)
+
+ return pr, br
+}
+
+// Clear function clear up the statsQueue
+func (q *statsQueue) Clear() {
+ q.rwl.Lock()
+ defer q.rwl.Unlock()
+ q.back = -1
+ q.front = 0
+ q.size = 0
+ q.totalReqSize = 0
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/server.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/server.go
new file mode 100644
index 0000000..e8d218a
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2stats/server.go
@@ -0,0 +1,142 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2stats
+
+import (
+ "encoding/json"
+ "log"
+ "sync"
+ "time"
+
+ "go.etcd.io/raft/v3"
+)
+
+// ServerStats encapsulates various statistics about an EtcdServer and its
+// communication with other members of the cluster
+type ServerStats struct {
+ serverStats
+ sync.Mutex
+}
+
+func NewServerStats(name, id string) *ServerStats {
+ ss := &ServerStats{
+ serverStats: serverStats{
+ Name: name,
+ ID: id,
+ },
+ }
+ now := time.Now()
+ ss.StartTime = now
+ ss.LeaderInfo.StartTime = now
+ ss.sendRateQueue = &statsQueue{back: -1}
+ ss.recvRateQueue = &statsQueue{back: -1}
+ return ss
+}
+
+type serverStats struct {
+ Name string `json:"name"`
+ // ID is the raft ID of the node.
+ // TODO(jonboulle): use ID instead of name?
+ ID string `json:"id"`
+ State raft.StateType `json:"state"`
+ StartTime time.Time `json:"startTime"`
+
+ LeaderInfo struct {
+ Name string `json:"leader"`
+ Uptime string `json:"uptime"`
+ StartTime time.Time `json:"startTime"`
+ } `json:"leaderInfo"`
+
+ RecvAppendRequestCnt uint64 `json:"recvAppendRequestCnt"`
+ RecvingPkgRate float64 `json:"recvPkgRate,omitempty"`
+ RecvingBandwidthRate float64 `json:"recvBandwidthRate,omitempty"`
+
+ SendAppendRequestCnt uint64 `json:"sendAppendRequestCnt"`
+ SendingPkgRate float64 `json:"sendPkgRate,omitempty"`
+ SendingBandwidthRate float64 `json:"sendBandwidthRate,omitempty"`
+
+ sendRateQueue *statsQueue
+ recvRateQueue *statsQueue
+}
+
+func (ss *ServerStats) JSON() []byte {
+ ss.Lock()
+ stats := ss.serverStats
+ stats.SendingPkgRate, stats.SendingBandwidthRate = stats.sendRateQueue.Rate()
+ stats.RecvingPkgRate, stats.RecvingBandwidthRate = stats.recvRateQueue.Rate()
+ stats.LeaderInfo.Uptime = time.Since(stats.LeaderInfo.StartTime).String()
+ ss.Unlock()
+ b, err := json.Marshal(stats)
+ // TODO(jonboulle): appropriate error handling?
+ if err != nil {
+ log.Printf("stats: error marshalling server stats: %v", err)
+ }
+ return b
+}
+
+// RecvAppendReq updates the ServerStats in response to an AppendRequest
+// from the given leader being received
+func (ss *ServerStats) RecvAppendReq(leader string, reqSize int) {
+ ss.Lock()
+ defer ss.Unlock()
+
+ now := time.Now()
+
+ ss.State = raft.StateFollower
+ if leader != ss.LeaderInfo.Name {
+ ss.LeaderInfo.Name = leader
+ ss.LeaderInfo.StartTime = now
+ }
+
+ ss.recvRateQueue.Insert(
+ &RequestStats{
+ SendingTime: now,
+ Size: reqSize,
+ },
+ )
+ ss.RecvAppendRequestCnt++
+}
+
+// SendAppendReq updates the ServerStats in response to an AppendRequest
+// being sent by this server
+func (ss *ServerStats) SendAppendReq(reqSize int) {
+ ss.Lock()
+ defer ss.Unlock()
+
+ ss.becomeLeader()
+
+ ss.sendRateQueue.Insert(
+ &RequestStats{
+ SendingTime: time.Now(),
+ Size: reqSize,
+ },
+ )
+
+ ss.SendAppendRequestCnt++
+}
+
+func (ss *ServerStats) BecomeLeader() {
+ ss.Lock()
+ defer ss.Unlock()
+ ss.becomeLeader()
+}
+
+func (ss *ServerStats) becomeLeader() {
+ if ss.State != raft.StateLeader {
+ ss.State = raft.StateLeader
+ ss.LeaderInfo.Name = ss.ID
+ ss.LeaderInfo.StartTime = time.Now()
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/doc.go
new file mode 100644
index 0000000..1933e4c
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/doc.go
@@ -0,0 +1,17 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v2store defines etcd's in-memory key/value store in v2 API.
+// To be deprecated in favor of v3 storage.
+package v2store
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event.go
new file mode 100644
index 0000000..33e9017
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event.go
@@ -0,0 +1,71 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+const (
+ Get = "get"
+ Create = "create"
+ Set = "set"
+ Update = "update"
+ Delete = "delete"
+ CompareAndSwap = "compareAndSwap"
+ CompareAndDelete = "compareAndDelete"
+ Expire = "expire"
+)
+
+type Event struct {
+ Action string `json:"action"`
+ Node *NodeExtern `json:"node,omitempty"`
+ PrevNode *NodeExtern `json:"prevNode,omitempty"`
+ EtcdIndex uint64 `json:"-"`
+ Refresh bool `json:"refresh,omitempty"`
+}
+
+func newEvent(action string, key string, modifiedIndex, createdIndex uint64) *Event {
+ n := &NodeExtern{
+ Key: key,
+ ModifiedIndex: modifiedIndex,
+ CreatedIndex: createdIndex,
+ }
+
+ return &Event{
+ Action: action,
+ Node: n,
+ }
+}
+
+func (e *Event) IsCreated() bool {
+ if e.Action == Create {
+ return true
+ }
+ return e.Action == Set && e.PrevNode == nil
+}
+
+func (e *Event) Index() uint64 {
+ return e.Node.ModifiedIndex
+}
+
+func (e *Event) Clone() *Event {
+ return &Event{
+ Action: e.Action,
+ EtcdIndex: e.EtcdIndex,
+ Node: e.Node.Clone(),
+ PrevNode: e.PrevNode.Clone(),
+ }
+}
+
+func (e *Event) SetRefresh() {
+ e.Refresh = true
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event_history.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event_history.go
new file mode 100644
index 0000000..11c8b01
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event_history.go
@@ -0,0 +1,128 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import (
+ "fmt"
+ "path"
+ "strings"
+ "sync"
+
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2error"
+)
+
+type EventHistory struct {
+ Queue eventQueue
+ StartIndex uint64
+ LastIndex uint64
+ rwl sync.RWMutex
+}
+
+func newEventHistory(capacity int) *EventHistory {
+ return &EventHistory{
+ Queue: eventQueue{
+ Capacity: capacity,
+ Events: make([]*Event, capacity),
+ },
+ }
+}
+
+// addEvent function adds event into the eventHistory
+func (eh *EventHistory) addEvent(e *Event) *Event {
+ eh.rwl.Lock()
+ defer eh.rwl.Unlock()
+
+ eh.Queue.insert(e)
+
+ eh.LastIndex = e.Index()
+
+ eh.StartIndex = eh.Queue.Events[eh.Queue.Front].Index()
+
+ return e
+}
+
+// scan enumerates events from the index history and stops at the first point
+// where the key matches.
+func (eh *EventHistory) scan(key string, recursive bool, index uint64) (*Event, *v2error.Error) {
+ eh.rwl.RLock()
+ defer eh.rwl.RUnlock()
+
+ // index should be after the event history's StartIndex
+ if index < eh.StartIndex {
+ return nil,
+ v2error.NewError(v2error.EcodeEventIndexCleared,
+ fmt.Sprintf("the requested history has been cleared [%v/%v]",
+ eh.StartIndex, index), 0)
+ }
+
+ // the index should come before the size of the queue minus the duplicate count
+ if index > eh.LastIndex { // future index
+ return nil, nil
+ }
+
+ offset := index - eh.StartIndex
+ i := (eh.Queue.Front + int(offset)) % eh.Queue.Capacity
+
+ for {
+ e := eh.Queue.Events[i]
+
+ if !e.Refresh {
+ ok := e.Node.Key == key
+
+ if recursive {
+ // add tailing slash
+ nkey := path.Clean(key)
+ if nkey[len(nkey)-1] != '/' {
+ nkey = nkey + "/"
+ }
+
+ ok = ok || strings.HasPrefix(e.Node.Key, nkey)
+ }
+
+ if (e.Action == Delete || e.Action == Expire) && e.PrevNode != nil && e.PrevNode.Dir {
+ ok = ok || strings.HasPrefix(key, e.PrevNode.Key)
+ }
+
+ if ok {
+ return e, nil
+ }
+ }
+
+ i = (i + 1) % eh.Queue.Capacity
+
+ if i == eh.Queue.Back {
+ return nil, nil
+ }
+ }
+}
+
+// clone will be protected by a stop-world lock
+// do not need to obtain internal lock
+func (eh *EventHistory) clone() *EventHistory {
+ clonedQueue := eventQueue{
+ Capacity: eh.Queue.Capacity,
+ Events: make([]*Event, eh.Queue.Capacity),
+ Size: eh.Queue.Size,
+ Front: eh.Queue.Front,
+ Back: eh.Queue.Back,
+ }
+
+ copy(clonedQueue.Events, eh.Queue.Events)
+ return &EventHistory{
+ StartIndex: eh.StartIndex,
+ Queue: clonedQueue,
+ LastIndex: eh.LastIndex,
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event_queue.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event_queue.go
new file mode 100644
index 0000000..aa2a645
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/event_queue.go
@@ -0,0 +1,34 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+type eventQueue struct {
+ Events []*Event
+ Size int
+ Front int
+ Back int
+ Capacity int
+}
+
+func (eq *eventQueue) insert(e *Event) {
+ eq.Events[eq.Back] = e
+ eq.Back = (eq.Back + 1) % eq.Capacity
+
+ if eq.Size == eq.Capacity { // dequeue
+ eq.Front = (eq.Front + 1) % eq.Capacity
+ } else {
+ eq.Size++
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/metrics.go
new file mode 100644
index 0000000..943457b
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/metrics.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import "github.com/prometheus/client_golang/prometheus"
+
+// Set of raw Prometheus metrics.
+// Labels
+// * action = declared in event.go
+// * outcome = Outcome
+// Do not increment directly, use Report* methods.
+var (
+ readCounter = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "reads_total",
+ Help: "Total number of reads action by (get/getRecursive), local to this member.",
+ },
+ []string{"action"},
+ )
+
+ writeCounter = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "writes_total",
+ Help: "Total number of writes (e.g. set/compareAndDelete) seen by this member.",
+ },
+ []string{"action"},
+ )
+
+ readFailedCounter = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "reads_failed_total",
+ Help: "Failed read actions by (get/getRecursive), local to this member.",
+ },
+ []string{"action"},
+ )
+
+ writeFailedCounter = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "writes_failed_total",
+ Help: "Failed write actions (e.g. set/compareAndDelete), seen by this member.",
+ },
+ []string{"action"},
+ )
+
+ expireCounter = prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "expires_total",
+ Help: "Total number of expired keys.",
+ },
+ )
+
+ watchRequests = prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "watch_requests_total",
+ Help: "Total number of incoming watch requests (new or reestablished).",
+ },
+ )
+
+ watcherCount = prometheus.NewGauge(
+ prometheus.GaugeOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "store",
+ Name: "watchers",
+ Help: "Count of currently active watchers.",
+ },
+ )
+)
+
+const (
+ GetRecursive = "getRecursive"
+)
+
+func init() {
+ if prometheus.Register(readCounter) != nil {
+ // Tests will try to double register since the tests use both
+ // store and store_test packages; ignore second attempts.
+ return
+ }
+ prometheus.MustRegister(writeCounter)
+ prometheus.MustRegister(expireCounter)
+ prometheus.MustRegister(watchRequests)
+ prometheus.MustRegister(watcherCount)
+}
+
+func reportReadSuccess(readAction string) {
+ readCounter.WithLabelValues(readAction).Inc()
+}
+
+func reportReadFailure(readAction string) {
+ readCounter.WithLabelValues(readAction).Inc()
+ readFailedCounter.WithLabelValues(readAction).Inc()
+}
+
+func reportWriteSuccess(writeAction string) {
+ writeCounter.WithLabelValues(writeAction).Inc()
+}
+
+func reportWriteFailure(writeAction string) {
+ writeCounter.WithLabelValues(writeAction).Inc()
+ writeFailedCounter.WithLabelValues(writeAction).Inc()
+}
+
+func reportExpiredKey() {
+ expireCounter.Inc()
+}
+
+func reportWatchRequest() {
+ watchRequests.Inc()
+}
+
+func reportWatcherAdded() {
+ watcherCount.Inc()
+}
+
+func reportWatcherRemoved() {
+ watcherCount.Dec()
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/node.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/node.go
new file mode 100644
index 0000000..7e5c3e8
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/node.go
@@ -0,0 +1,395 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import (
+ "path"
+ "sort"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2error"
+)
+
+// explanations of Compare function result
+const (
+ CompareMatch = iota
+ CompareIndexNotMatch
+ CompareValueNotMatch
+ CompareNotMatch
+)
+
+var Permanent time.Time
+
+// node is the basic element in the store system.
+// A key-value pair will have a string value
+// A directory will have a children map
+type node struct {
+ Path string
+
+ CreatedIndex uint64
+ ModifiedIndex uint64
+
+ Parent *node `json:"-"` // should not encode this field! avoid circular dependency.
+
+ ExpireTime time.Time
+ Value string // for key-value pair
+ Children map[string]*node // for directory
+
+ // A reference to the store this node is attached to.
+ store *store
+}
+
+// newKV creates a Key-Value pair
+func newKV(store *store, nodePath string, value string, createdIndex uint64, parent *node, expireTime time.Time) *node {
+ return &node{
+ Path: nodePath,
+ CreatedIndex: createdIndex,
+ ModifiedIndex: createdIndex,
+ Parent: parent,
+ store: store,
+ ExpireTime: expireTime,
+ Value: value,
+ }
+}
+
+// newDir creates a directory
+func newDir(store *store, nodePath string, createdIndex uint64, parent *node, expireTime time.Time) *node {
+ return &node{
+ Path: nodePath,
+ CreatedIndex: createdIndex,
+ ModifiedIndex: createdIndex,
+ Parent: parent,
+ ExpireTime: expireTime,
+ Children: make(map[string]*node),
+ store: store,
+ }
+}
+
+// IsHidden function checks if the node is a hidden node. A hidden node
+// will begin with '_'
+// A hidden node will not be shown via get command under a directory
+// For example if we have /foo/_hidden and /foo/notHidden, get "/foo"
+// will only return /foo/notHidden
+func (n *node) IsHidden() bool {
+ _, name := path.Split(n.Path)
+
+ return name[0] == '_'
+}
+
+// IsPermanent function checks if the node is a permanent one.
+func (n *node) IsPermanent() bool {
+ // we use a uninitialized time.Time to indicate the node is a
+ // permanent one.
+ // the uninitialized time.Time should equal zero.
+ return n.ExpireTime.IsZero()
+}
+
+// IsDir function checks whether the node is a directory.
+// If the node is a directory, the function will return true.
+// Otherwise the function will return false.
+func (n *node) IsDir() bool {
+ return n.Children != nil
+}
+
+// Read function gets the value of the node.
+// If the receiver node is not a key-value pair, a "Not A File" error will be returned.
+func (n *node) Read() (string, *v2error.Error) {
+ if n.IsDir() {
+ return "", v2error.NewError(v2error.EcodeNotFile, "", n.store.CurrentIndex)
+ }
+
+ return n.Value, nil
+}
+
+// Write function set the value of the node to the given value.
+// If the receiver node is a directory, a "Not A File" error will be returned.
+func (n *node) Write(value string, index uint64) *v2error.Error {
+ if n.IsDir() {
+ return v2error.NewError(v2error.EcodeNotFile, "", n.store.CurrentIndex)
+ }
+
+ n.Value = value
+ n.ModifiedIndex = index
+
+ return nil
+}
+
+func (n *node) expirationAndTTL(clock clockwork.Clock) (*time.Time, int64) {
+ if !n.IsPermanent() {
+ /* compute ttl as:
+ ceiling( (expireTime - timeNow) / nanosecondsPerSecond )
+ which ranges from 1..n
+ rather than as:
+ ( (expireTime - timeNow) / nanosecondsPerSecond ) + 1
+ which ranges 1..n+1
+ */
+ ttlN := n.ExpireTime.Sub(clock.Now())
+ ttl := ttlN / time.Second
+ if (ttlN % time.Second) > 0 {
+ ttl++
+ }
+ t := n.ExpireTime.UTC()
+ return &t, int64(ttl)
+ }
+ return nil, 0
+}
+
+// List function return a slice of nodes under the receiver node.
+// If the receiver node is not a directory, a "Not A Directory" error will be returned.
+func (n *node) List() ([]*node, *v2error.Error) {
+ if !n.IsDir() {
+ return nil, v2error.NewError(v2error.EcodeNotDir, "", n.store.CurrentIndex)
+ }
+
+ nodes := make([]*node, len(n.Children))
+
+ i := 0
+ for _, node := range n.Children {
+ nodes[i] = node
+ i++
+ }
+
+ return nodes, nil
+}
+
+// GetChild function returns the child node under the directory node.
+// On success, it returns the file node
+func (n *node) GetChild(name string) (*node, *v2error.Error) {
+ if !n.IsDir() {
+ return nil, v2error.NewError(v2error.EcodeNotDir, n.Path, n.store.CurrentIndex)
+ }
+
+ child, ok := n.Children[name]
+
+ if ok {
+ return child, nil
+ }
+
+ return nil, nil
+}
+
+// Add function adds a node to the receiver node.
+// If the receiver is not a directory, a "Not A Directory" error will be returned.
+// If there is an existing node with the same name under the directory, a "Already Exist"
+// error will be returned
+func (n *node) Add(child *node) *v2error.Error {
+ if !n.IsDir() {
+ return v2error.NewError(v2error.EcodeNotDir, "", n.store.CurrentIndex)
+ }
+
+ _, name := path.Split(child.Path)
+
+ if _, ok := n.Children[name]; ok {
+ return v2error.NewError(v2error.EcodeNodeExist, "", n.store.CurrentIndex)
+ }
+
+ n.Children[name] = child
+
+ return nil
+}
+
+// Remove function remove the node.
+func (n *node) Remove(dir, recursive bool, callback func(path string)) *v2error.Error {
+ if !n.IsDir() { // key-value pair
+ _, name := path.Split(n.Path)
+
+ // find its parent and remove the node from the map
+ if n.Parent != nil && n.Parent.Children[name] == n {
+ delete(n.Parent.Children, name)
+ }
+
+ if callback != nil {
+ callback(n.Path)
+ }
+
+ if !n.IsPermanent() {
+ n.store.ttlKeyHeap.remove(n)
+ }
+
+ return nil
+ }
+
+ if !dir {
+ // cannot delete a directory without dir set to true
+ return v2error.NewError(v2error.EcodeNotFile, n.Path, n.store.CurrentIndex)
+ }
+
+ if len(n.Children) != 0 && !recursive {
+ // cannot delete a directory if it is not empty and the operation
+ // is not recursive
+ return v2error.NewError(v2error.EcodeDirNotEmpty, n.Path, n.store.CurrentIndex)
+ }
+
+ for _, child := range n.Children { // delete all children
+ child.Remove(true, true, callback)
+ }
+
+ // delete self
+ _, name := path.Split(n.Path)
+ if n.Parent != nil && n.Parent.Children[name] == n {
+ delete(n.Parent.Children, name)
+
+ if callback != nil {
+ callback(n.Path)
+ }
+
+ if !n.IsPermanent() {
+ n.store.ttlKeyHeap.remove(n)
+ }
+ }
+
+ return nil
+}
+
+func (n *node) Repr(recursive, sorted bool, clock clockwork.Clock) *NodeExtern {
+ if n.IsDir() {
+ node := &NodeExtern{
+ Key: n.Path,
+ Dir: true,
+ ModifiedIndex: n.ModifiedIndex,
+ CreatedIndex: n.CreatedIndex,
+ }
+ node.Expiration, node.TTL = n.expirationAndTTL(clock)
+
+ if !recursive {
+ return node
+ }
+
+ children, _ := n.List()
+ node.Nodes = make(NodeExterns, len(children))
+
+ // we do not use the index in the children slice directly
+ // we need to skip the hidden one
+ i := 0
+
+ for _, child := range children {
+ if child.IsHidden() { // get will not list hidden node
+ continue
+ }
+
+ node.Nodes[i] = child.Repr(recursive, sorted, clock)
+
+ i++
+ }
+
+ // eliminate hidden nodes
+ node.Nodes = node.Nodes[:i]
+ if sorted {
+ sort.Sort(node.Nodes)
+ }
+
+ return node
+ }
+
+ // since n.Value could be changed later, so we need to copy the value out
+ value := n.Value
+ node := &NodeExtern{
+ Key: n.Path,
+ Value: &value,
+ ModifiedIndex: n.ModifiedIndex,
+ CreatedIndex: n.CreatedIndex,
+ }
+ node.Expiration, node.TTL = n.expirationAndTTL(clock)
+ return node
+}
+
+func (n *node) UpdateTTL(expireTime time.Time) {
+ if !n.IsPermanent() {
+ if expireTime.IsZero() {
+ // from ttl to permanent
+ n.ExpireTime = expireTime
+ // remove from ttl heap
+ n.store.ttlKeyHeap.remove(n)
+ return
+ }
+
+ // update ttl
+ n.ExpireTime = expireTime
+ // update ttl heap
+ n.store.ttlKeyHeap.update(n)
+ return
+ }
+
+ if expireTime.IsZero() {
+ return
+ }
+
+ // from permanent to ttl
+ n.ExpireTime = expireTime
+ // push into ttl heap
+ n.store.ttlKeyHeap.push(n)
+}
+
+// Compare function compares node index and value with provided ones.
+// second result value explains result and equals to one of Compare.. constants
+func (n *node) Compare(prevValue string, prevIndex uint64) (ok bool, which int) {
+ indexMatch := prevIndex == 0 || n.ModifiedIndex == prevIndex
+ valueMatch := prevValue == "" || n.Value == prevValue
+ ok = valueMatch && indexMatch
+ switch {
+ case valueMatch && indexMatch:
+ which = CompareMatch
+ case indexMatch && !valueMatch:
+ which = CompareValueNotMatch
+ case valueMatch && !indexMatch:
+ which = CompareIndexNotMatch
+ default:
+ which = CompareNotMatch
+ }
+ return ok, which
+}
+
+// Clone function clone the node recursively and return the new node.
+// If the node is a directory, it will clone all the content under this directory.
+// If the node is a key-value pair, it will clone the pair.
+func (n *node) Clone() *node {
+ if !n.IsDir() {
+ newkv := newKV(n.store, n.Path, n.Value, n.CreatedIndex, n.Parent, n.ExpireTime)
+ newkv.ModifiedIndex = n.ModifiedIndex
+ return newkv
+ }
+
+ clone := newDir(n.store, n.Path, n.CreatedIndex, n.Parent, n.ExpireTime)
+ clone.ModifiedIndex = n.ModifiedIndex
+
+ for key, child := range n.Children {
+ clone.Children[key] = child.Clone()
+ }
+
+ return clone
+}
+
+// recoverAndclean function help to do recovery.
+// Two things need to be done: 1. recovery structure; 2. delete expired nodes
+//
+// If the node is a directory, it will help recover children's parent pointer and recursively
+// call this function on its children.
+// We check the expire last since we need to recover the whole structure first and add all the
+// notifications into the event history.
+func (n *node) recoverAndclean() {
+ if n.IsDir() {
+ for _, child := range n.Children {
+ child.Parent = n
+ child.store = n.store
+ child.recoverAndclean()
+ }
+ }
+
+ if !n.ExpireTime.IsZero() {
+ n.store.ttlKeyHeap.push(n)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/node_extern.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/node_extern.go
new file mode 100644
index 0000000..ff2e13e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/node_extern.go
@@ -0,0 +1,115 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import (
+ "sort"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+)
+
+// NodeExtern is the external representation of the
+// internal node with additional fields
+// PrevValue is the previous value of the node
+// TTL is time to live in second
+type NodeExtern struct {
+ Key string `json:"key,omitempty"`
+ Value *string `json:"value,omitempty"`
+ Dir bool `json:"dir,omitempty"`
+ Expiration *time.Time `json:"expiration,omitempty"`
+ TTL int64 `json:"ttl,omitempty"`
+ Nodes NodeExterns `json:"nodes,omitempty"`
+ ModifiedIndex uint64 `json:"modifiedIndex,omitempty"`
+ CreatedIndex uint64 `json:"createdIndex,omitempty"`
+}
+
+func (eNode *NodeExtern) loadInternalNode(n *node, recursive, sorted bool, clock clockwork.Clock) {
+ if n.IsDir() { // node is a directory
+ eNode.Dir = true
+
+ children, _ := n.List()
+ eNode.Nodes = make(NodeExterns, len(children))
+
+ // we do not use the index in the children slice directly
+ // we need to skip the hidden one
+ i := 0
+
+ for _, child := range children {
+ if child.IsHidden() { // get will not return hidden nodes
+ continue
+ }
+
+ eNode.Nodes[i] = child.Repr(recursive, sorted, clock)
+ i++
+ }
+
+ // eliminate hidden nodes
+ eNode.Nodes = eNode.Nodes[:i]
+
+ if sorted {
+ sort.Sort(eNode.Nodes)
+ }
+ } else { // node is a file
+ value, _ := n.Read()
+ eNode.Value = &value
+ }
+
+ eNode.Expiration, eNode.TTL = n.expirationAndTTL(clock)
+}
+
+func (eNode *NodeExtern) Clone() *NodeExtern {
+ if eNode == nil {
+ return nil
+ }
+ nn := &NodeExtern{
+ Key: eNode.Key,
+ Dir: eNode.Dir,
+ TTL: eNode.TTL,
+ ModifiedIndex: eNode.ModifiedIndex,
+ CreatedIndex: eNode.CreatedIndex,
+ }
+ if eNode.Value != nil {
+ s := *eNode.Value
+ nn.Value = &s
+ }
+ if eNode.Expiration != nil {
+ t := *eNode.Expiration
+ nn.Expiration = &t
+ }
+ if eNode.Nodes != nil {
+ nn.Nodes = make(NodeExterns, len(eNode.Nodes))
+ for i, n := range eNode.Nodes {
+ nn.Nodes[i] = n.Clone()
+ }
+ }
+ return nn
+}
+
+type NodeExterns []*NodeExtern
+
+// interfaces for sorting
+
+func (ns NodeExterns) Len() int {
+ return len(ns)
+}
+
+func (ns NodeExterns) Less(i, j int) bool {
+ return ns[i].Key < ns[j].Key
+}
+
+func (ns NodeExterns) Swap(i, j int) {
+ ns[i], ns[j] = ns[j], ns[i]
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/stats.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/stats.go
new file mode 100644
index 0000000..55ede56
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/stats.go
@@ -0,0 +1,145 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import (
+ "encoding/json"
+ "sync/atomic"
+)
+
+const (
+ SetSuccess = iota
+ SetFail
+ DeleteSuccess
+ DeleteFail
+ CreateSuccess
+ CreateFail
+ UpdateSuccess
+ UpdateFail
+ CompareAndSwapSuccess
+ CompareAndSwapFail
+ GetSuccess
+ GetFail
+ ExpireCount
+ CompareAndDeleteSuccess
+ CompareAndDeleteFail
+)
+
+type Stats struct {
+ // Number of get requests
+
+ GetSuccess uint64 `json:"getsSuccess"`
+ GetFail uint64 `json:"getsFail"`
+
+ // Number of sets requests
+
+ SetSuccess uint64 `json:"setsSuccess"`
+ SetFail uint64 `json:"setsFail"`
+
+ // Number of delete requests
+
+ DeleteSuccess uint64 `json:"deleteSuccess"`
+ DeleteFail uint64 `json:"deleteFail"`
+
+ // Number of update requests
+
+ UpdateSuccess uint64 `json:"updateSuccess"`
+ UpdateFail uint64 `json:"updateFail"`
+
+ // Number of create requests
+
+ CreateSuccess uint64 `json:"createSuccess"`
+ CreateFail uint64 `json:"createFail"`
+
+ // Number of testAndSet requests
+
+ CompareAndSwapSuccess uint64 `json:"compareAndSwapSuccess"`
+ CompareAndSwapFail uint64 `json:"compareAndSwapFail"`
+
+ // Number of compareAndDelete requests
+
+ CompareAndDeleteSuccess uint64 `json:"compareAndDeleteSuccess"`
+ CompareAndDeleteFail uint64 `json:"compareAndDeleteFail"`
+
+ ExpireCount uint64 `json:"expireCount"`
+
+ Watchers uint64 `json:"watchers"`
+}
+
+func newStats() *Stats {
+ s := new(Stats)
+ return s
+}
+
+func (s *Stats) clone() *Stats {
+ return &Stats{
+ GetSuccess: atomic.LoadUint64(&s.GetSuccess),
+ GetFail: atomic.LoadUint64(&s.GetFail),
+ SetSuccess: atomic.LoadUint64(&s.SetSuccess),
+ SetFail: atomic.LoadUint64(&s.SetFail),
+ DeleteSuccess: atomic.LoadUint64(&s.DeleteSuccess),
+ DeleteFail: atomic.LoadUint64(&s.DeleteFail),
+ UpdateSuccess: atomic.LoadUint64(&s.UpdateSuccess),
+ UpdateFail: atomic.LoadUint64(&s.UpdateFail),
+ CreateSuccess: atomic.LoadUint64(&s.CreateSuccess),
+ CreateFail: atomic.LoadUint64(&s.CreateFail),
+ CompareAndSwapSuccess: atomic.LoadUint64(&s.CompareAndSwapSuccess),
+ CompareAndSwapFail: atomic.LoadUint64(&s.CompareAndSwapFail),
+ CompareAndDeleteSuccess: atomic.LoadUint64(&s.CompareAndDeleteSuccess),
+ CompareAndDeleteFail: atomic.LoadUint64(&s.CompareAndDeleteFail),
+ ExpireCount: atomic.LoadUint64(&s.ExpireCount),
+ Watchers: atomic.LoadUint64(&s.Watchers),
+ }
+}
+
+func (s *Stats) toJSON() []byte {
+ b, _ := json.Marshal(s)
+ return b
+}
+
+func (s *Stats) Inc(field int) {
+ switch field {
+ case SetSuccess:
+ atomic.AddUint64(&s.SetSuccess, 1)
+ case SetFail:
+ atomic.AddUint64(&s.SetFail, 1)
+ case CreateSuccess:
+ atomic.AddUint64(&s.CreateSuccess, 1)
+ case CreateFail:
+ atomic.AddUint64(&s.CreateFail, 1)
+ case DeleteSuccess:
+ atomic.AddUint64(&s.DeleteSuccess, 1)
+ case DeleteFail:
+ atomic.AddUint64(&s.DeleteFail, 1)
+ case GetSuccess:
+ atomic.AddUint64(&s.GetSuccess, 1)
+ case GetFail:
+ atomic.AddUint64(&s.GetFail, 1)
+ case UpdateSuccess:
+ atomic.AddUint64(&s.UpdateSuccess, 1)
+ case UpdateFail:
+ atomic.AddUint64(&s.UpdateFail, 1)
+ case CompareAndSwapSuccess:
+ atomic.AddUint64(&s.CompareAndSwapSuccess, 1)
+ case CompareAndSwapFail:
+ atomic.AddUint64(&s.CompareAndSwapFail, 1)
+ case CompareAndDeleteSuccess:
+ atomic.AddUint64(&s.CompareAndDeleteSuccess, 1)
+ case CompareAndDeleteFail:
+ atomic.AddUint64(&s.CompareAndDeleteFail, 1)
+ case ExpireCount:
+ atomic.AddUint64(&s.ExpireCount, 1)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/store.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/store.go
new file mode 100644
index 0000000..2129bd6
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/store.go
@@ -0,0 +1,794 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import (
+ "encoding/json"
+ "fmt"
+ "path"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2error"
+)
+
+// The default version to set when the store is first initialized.
+const defaultVersion = 2
+
+var minExpireTime time.Time
+
+func init() {
+ minExpireTime, _ = time.Parse(time.RFC3339, "2000-01-01T00:00:00Z")
+}
+
+type Store interface {
+ Version() int
+ Index() uint64
+
+ Get(nodePath string, recursive, sorted bool) (*Event, error)
+ Set(nodePath string, dir bool, value string, expireOpts TTLOptionSet) (*Event, error)
+ Update(nodePath string, newValue string, expireOpts TTLOptionSet) (*Event, error)
+ Create(nodePath string, dir bool, value string, unique bool,
+ expireOpts TTLOptionSet) (*Event, error)
+ CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
+ value string, expireOpts TTLOptionSet) (*Event, error)
+ Delete(nodePath string, dir, recursive bool) (*Event, error)
+ CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error)
+
+ Watch(prefix string, recursive, stream bool, sinceIndex uint64) (Watcher, error)
+
+ Save() ([]byte, error)
+ Recovery(state []byte) error
+
+ Clone() Store
+ SaveNoCopy() ([]byte, error)
+
+ JsonStats() []byte
+ DeleteExpiredKeys(cutoff time.Time)
+
+ HasTTLKeys() bool
+}
+
+type TTLOptionSet struct {
+ ExpireTime time.Time
+ Refresh bool
+}
+
+type store struct {
+ Root *node
+ WatcherHub *watcherHub
+ CurrentIndex uint64
+ Stats *Stats
+ CurrentVersion int
+ ttlKeyHeap *ttlKeyHeap // need to recovery manually
+ worldLock sync.RWMutex // stop the world lock
+ clock clockwork.Clock
+ readonlySet types.Set
+}
+
+// New creates a store where the given namespaces will be created as initial directories.
+func New(namespaces ...string) Store {
+ s := newStore(namespaces...)
+ s.clock = clockwork.NewRealClock()
+ return s
+}
+
+func newStore(namespaces ...string) *store {
+ s := new(store)
+ s.CurrentVersion = defaultVersion
+ s.Root = newDir(s, "/", s.CurrentIndex, nil, Permanent)
+ for _, namespace := range namespaces {
+ s.Root.Add(newDir(s, namespace, s.CurrentIndex, s.Root, Permanent))
+ }
+ s.Stats = newStats()
+ s.WatcherHub = newWatchHub(1000)
+ s.ttlKeyHeap = newTTLKeyHeap()
+ s.readonlySet = types.NewUnsafeSet(append(namespaces, "/")...)
+ return s
+}
+
+// Version retrieves current version of the store.
+func (s *store) Version() int {
+ return s.CurrentVersion
+}
+
+// Index retrieves the current index of the store.
+func (s *store) Index() uint64 {
+ s.worldLock.RLock()
+ defer s.worldLock.RUnlock()
+ return s.CurrentIndex
+}
+
+// Get returns a get event.
+// If recursive is true, it will return all the content under the node path.
+// If sorted is true, it will sort the content by keys.
+func (s *store) Get(nodePath string, recursive, sorted bool) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.RLock()
+ defer s.worldLock.RUnlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(GetSuccess)
+ if recursive {
+ reportReadSuccess(GetRecursive)
+ } else {
+ reportReadSuccess(Get)
+ }
+ return
+ }
+
+ s.Stats.Inc(GetFail)
+ if recursive {
+ reportReadFailure(GetRecursive)
+ } else {
+ reportReadFailure(Get)
+ }
+ }()
+
+ n, err := s.internalGet(nodePath)
+ if err != nil {
+ return nil, err
+ }
+
+ e := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
+ e.EtcdIndex = s.CurrentIndex
+ e.Node.loadInternalNode(n, recursive, sorted, s.clock)
+
+ return e, nil
+}
+
+// Create creates the node at nodePath. Create will help to create intermediate directories with no ttl.
+// If the node has already existed, create will fail.
+// If any node on the path is a file, create will fail.
+func (s *store) Create(nodePath string, dir bool, value string, unique bool, expireOpts TTLOptionSet) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(CreateSuccess)
+ reportWriteSuccess(Create)
+ return
+ }
+
+ s.Stats.Inc(CreateFail)
+ reportWriteFailure(Create)
+ }()
+
+ e, err := s.internalCreate(nodePath, dir, value, unique, false, expireOpts.ExpireTime, Create)
+ if err != nil {
+ return nil, err
+ }
+
+ e.EtcdIndex = s.CurrentIndex
+ s.WatcherHub.notify(e)
+
+ return e, nil
+}
+
+// Set creates or replace the node at nodePath.
+func (s *store) Set(nodePath string, dir bool, value string, expireOpts TTLOptionSet) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(SetSuccess)
+ reportWriteSuccess(Set)
+ return
+ }
+
+ s.Stats.Inc(SetFail)
+ reportWriteFailure(Set)
+ }()
+
+ // Get prevNode value
+ n, getErr := s.internalGet(nodePath)
+ if getErr != nil && getErr.ErrorCode != v2error.EcodeKeyNotFound {
+ err = getErr
+ return nil, err
+ }
+
+ if expireOpts.Refresh {
+ if getErr != nil {
+ err = getErr
+ return nil, err
+ }
+ value = n.Value
+ }
+
+ // Set new value
+ e, err := s.internalCreate(nodePath, dir, value, false, true, expireOpts.ExpireTime, Set)
+ if err != nil {
+ return nil, err
+ }
+ e.EtcdIndex = s.CurrentIndex
+
+ // Put prevNode into event
+ if getErr == nil {
+ prev := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
+ prev.Node.loadInternalNode(n, false, false, s.clock)
+ e.PrevNode = prev.Node
+ }
+
+ if !expireOpts.Refresh {
+ s.WatcherHub.notify(e)
+ } else {
+ e.SetRefresh()
+ s.WatcherHub.add(e)
+ }
+
+ return e, nil
+}
+
+// returns user-readable cause of failed comparison
+func getCompareFailCause(n *node, which int, prevValue string, prevIndex uint64) string {
+ switch which {
+ case CompareIndexNotMatch:
+ return fmt.Sprintf("[%v != %v]", prevIndex, n.ModifiedIndex)
+ case CompareValueNotMatch:
+ return fmt.Sprintf("[%v != %v]", prevValue, n.Value)
+ default:
+ return fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex)
+ }
+}
+
+func (s *store) CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
+ value string, expireOpts TTLOptionSet,
+) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(CompareAndSwapSuccess)
+ reportWriteSuccess(CompareAndSwap)
+ return
+ }
+
+ s.Stats.Inc(CompareAndSwapFail)
+ reportWriteFailure(CompareAndSwap)
+ }()
+
+ nodePath = path.Clean(path.Join("/", nodePath))
+ // we do not allow the user to change "/"
+ if s.readonlySet.Contains(nodePath) {
+ return nil, v2error.NewError(v2error.EcodeRootROnly, "/", s.CurrentIndex)
+ }
+
+ n, err := s.internalGet(nodePath)
+ if err != nil {
+ return nil, err
+ }
+ if n.IsDir() { // can only compare and swap file
+ err = v2error.NewError(v2error.EcodeNotFile, nodePath, s.CurrentIndex)
+ return nil, err
+ }
+
+ // If both of the prevValue and prevIndex are given, we will test both of them.
+ // Command will be executed, only if both of the tests are successful.
+ if ok, which := n.Compare(prevValue, prevIndex); !ok {
+ cause := getCompareFailCause(n, which, prevValue, prevIndex)
+ err = v2error.NewError(v2error.EcodeTestFailed, cause, s.CurrentIndex)
+ return nil, err
+ }
+
+ if expireOpts.Refresh {
+ value = n.Value
+ }
+
+ // update etcd index
+ s.CurrentIndex++
+
+ e := newEvent(CompareAndSwap, nodePath, s.CurrentIndex, n.CreatedIndex)
+ e.EtcdIndex = s.CurrentIndex
+ e.PrevNode = n.Repr(false, false, s.clock)
+ eNode := e.Node
+
+ // if test succeed, write the value
+ if err := n.Write(value, s.CurrentIndex); err != nil {
+ return nil, err
+ }
+ n.UpdateTTL(expireOpts.ExpireTime)
+
+ // copy the value for safety
+ valueCopy := value
+ eNode.Value = &valueCopy
+ eNode.Expiration, eNode.TTL = n.expirationAndTTL(s.clock)
+
+ if !expireOpts.Refresh {
+ s.WatcherHub.notify(e)
+ } else {
+ e.SetRefresh()
+ s.WatcherHub.add(e)
+ }
+
+ return e, nil
+}
+
+// Delete deletes the node at the given path.
+// If the node is a directory, recursive must be true to delete it.
+func (s *store) Delete(nodePath string, dir, recursive bool) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(DeleteSuccess)
+ reportWriteSuccess(Delete)
+ return
+ }
+
+ s.Stats.Inc(DeleteFail)
+ reportWriteFailure(Delete)
+ }()
+
+ nodePath = path.Clean(path.Join("/", nodePath))
+ // we do not allow the user to change "/"
+ if s.readonlySet.Contains(nodePath) {
+ return nil, v2error.NewError(v2error.EcodeRootROnly, "/", s.CurrentIndex)
+ }
+
+ // recursive implies dir
+ if recursive {
+ dir = true
+ }
+
+ n, err := s.internalGet(nodePath)
+ if err != nil { // if the node does not exist, return error
+ return nil, err
+ }
+
+ nextIndex := s.CurrentIndex + 1
+ e := newEvent(Delete, nodePath, nextIndex, n.CreatedIndex)
+ e.EtcdIndex = nextIndex
+ e.PrevNode = n.Repr(false, false, s.clock)
+ eNode := e.Node
+
+ if n.IsDir() {
+ eNode.Dir = true
+ }
+
+ callback := func(path string) { // notify function
+ // notify the watchers with deleted set true
+ s.WatcherHub.notifyWatchers(e, path, true)
+ }
+
+ err = n.Remove(dir, recursive, callback)
+ if err != nil {
+ return nil, err
+ }
+
+ // update etcd index
+ s.CurrentIndex++
+
+ s.WatcherHub.notify(e)
+
+ return e, nil
+}
+
+func (s *store) CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(CompareAndDeleteSuccess)
+ reportWriteSuccess(CompareAndDelete)
+ return
+ }
+
+ s.Stats.Inc(CompareAndDeleteFail)
+ reportWriteFailure(CompareAndDelete)
+ }()
+
+ nodePath = path.Clean(path.Join("/", nodePath))
+
+ n, err := s.internalGet(nodePath)
+ if err != nil { // if the node does not exist, return error
+ return nil, err
+ }
+ if n.IsDir() { // can only compare and delete file
+ return nil, v2error.NewError(v2error.EcodeNotFile, nodePath, s.CurrentIndex)
+ }
+
+ // If both of the prevValue and prevIndex are given, we will test both of them.
+ // Command will be executed, only if both of the tests are successful.
+ if ok, which := n.Compare(prevValue, prevIndex); !ok {
+ cause := getCompareFailCause(n, which, prevValue, prevIndex)
+ return nil, v2error.NewError(v2error.EcodeTestFailed, cause, s.CurrentIndex)
+ }
+
+ // update etcd index
+ s.CurrentIndex++
+
+ e := newEvent(CompareAndDelete, nodePath, s.CurrentIndex, n.CreatedIndex)
+ e.EtcdIndex = s.CurrentIndex
+ e.PrevNode = n.Repr(false, false, s.clock)
+
+ callback := func(path string) { // notify function
+ // notify the watchers with deleted set true
+ s.WatcherHub.notifyWatchers(e, path, true)
+ }
+
+ err = n.Remove(false, false, callback)
+ if err != nil {
+ return nil, err
+ }
+
+ s.WatcherHub.notify(e)
+
+ return e, nil
+}
+
+func (s *store) Watch(key string, recursive, stream bool, sinceIndex uint64) (Watcher, error) {
+ s.worldLock.RLock()
+ defer s.worldLock.RUnlock()
+
+ key = path.Clean(path.Join("/", key))
+ if sinceIndex == 0 {
+ sinceIndex = s.CurrentIndex + 1
+ }
+ // WatcherHub does not know about the current index, so we need to pass it in
+ w, err := s.WatcherHub.watch(key, recursive, stream, sinceIndex, s.CurrentIndex)
+ if err != nil {
+ return nil, err
+ }
+
+ return w, nil
+}
+
+// walk walks all the nodePath and apply the walkFunc on each directory
+func (s *store) walk(nodePath string, walkFunc func(prev *node, component string) (*node, *v2error.Error)) (*node, *v2error.Error) {
+ components := strings.Split(nodePath, "/")
+
+ curr := s.Root
+ var err *v2error.Error
+
+ for i := 1; i < len(components); i++ {
+ if len(components[i]) == 0 { // ignore empty string
+ return curr, nil
+ }
+
+ curr, err = walkFunc(curr, components[i])
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return curr, nil
+}
+
+// Update updates the value/ttl of the node.
+// If the node is a file, the value and the ttl can be updated.
+// If the node is a directory, only the ttl can be updated.
+func (s *store) Update(nodePath string, newValue string, expireOpts TTLOptionSet) (*Event, error) {
+ var err *v2error.Error
+
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ defer func() {
+ if err == nil {
+ s.Stats.Inc(UpdateSuccess)
+ reportWriteSuccess(Update)
+ return
+ }
+
+ s.Stats.Inc(UpdateFail)
+ reportWriteFailure(Update)
+ }()
+
+ nodePath = path.Clean(path.Join("/", nodePath))
+ // we do not allow the user to change "/"
+ if s.readonlySet.Contains(nodePath) {
+ return nil, v2error.NewError(v2error.EcodeRootROnly, "/", s.CurrentIndex)
+ }
+
+ currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
+
+ n, err := s.internalGet(nodePath)
+ if err != nil { // if the node does not exist, return error
+ return nil, err
+ }
+ if n.IsDir() && len(newValue) != 0 {
+ // if the node is a directory, we cannot update value to non-empty
+ return nil, v2error.NewError(v2error.EcodeNotFile, nodePath, currIndex)
+ }
+
+ if expireOpts.Refresh {
+ newValue = n.Value
+ }
+
+ e := newEvent(Update, nodePath, nextIndex, n.CreatedIndex)
+ e.EtcdIndex = nextIndex
+ e.PrevNode = n.Repr(false, false, s.clock)
+ eNode := e.Node
+
+ if err := n.Write(newValue, nextIndex); err != nil {
+ return nil, fmt.Errorf("nodePath %v : %w", nodePath, err)
+ }
+
+ if n.IsDir() {
+ eNode.Dir = true
+ } else {
+ // copy the value for safety
+ newValueCopy := newValue
+ eNode.Value = &newValueCopy
+ }
+
+ // update ttl
+ n.UpdateTTL(expireOpts.ExpireTime)
+
+ eNode.Expiration, eNode.TTL = n.expirationAndTTL(s.clock)
+
+ if !expireOpts.Refresh {
+ s.WatcherHub.notify(e)
+ } else {
+ e.SetRefresh()
+ s.WatcherHub.add(e)
+ }
+
+ s.CurrentIndex = nextIndex
+
+ return e, nil
+}
+
+func (s *store) internalCreate(nodePath string, dir bool, value string, unique, replace bool,
+ expireTime time.Time, action string,
+) (*Event, *v2error.Error) {
+ currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
+
+ if unique { // append unique item under the node path
+ nodePath += "/" + fmt.Sprintf("%020s", strconv.FormatUint(nextIndex, 10))
+ }
+
+ nodePath = path.Clean(path.Join("/", nodePath))
+
+ // we do not allow the user to change "/"
+ if s.readonlySet.Contains(nodePath) {
+ return nil, v2error.NewError(v2error.EcodeRootROnly, "/", currIndex)
+ }
+
+ // Assume expire times that are way in the past are
+ // This can occur when the time is serialized to JS
+ if expireTime.Before(minExpireTime) {
+ expireTime = Permanent
+ }
+
+ dirName, nodeName := path.Split(nodePath)
+
+ // walk through the nodePath, create dirs and get the last directory node
+ d, err := s.walk(dirName, s.checkDir)
+ if err != nil {
+ s.Stats.Inc(SetFail)
+ reportWriteFailure(action)
+ err.Index = currIndex
+ return nil, err
+ }
+
+ e := newEvent(action, nodePath, nextIndex, nextIndex)
+ eNode := e.Node
+
+ n, _ := d.GetChild(nodeName)
+
+ // force will try to replace an existing file
+ if n != nil {
+ if !replace {
+ return nil, v2error.NewError(v2error.EcodeNodeExist, nodePath, currIndex)
+ }
+ if n.IsDir() {
+ return nil, v2error.NewError(v2error.EcodeNotFile, nodePath, currIndex)
+ }
+ e.PrevNode = n.Repr(false, false, s.clock)
+
+ if err := n.Remove(false, false, nil); err != nil {
+ return nil, err
+ }
+ }
+
+ if !dir { // create file
+ // copy the value for safety
+ valueCopy := value
+ eNode.Value = &valueCopy
+
+ n = newKV(s, nodePath, value, nextIndex, d, expireTime)
+ } else { // create directory
+ eNode.Dir = true
+
+ n = newDir(s, nodePath, nextIndex, d, expireTime)
+ }
+
+ // we are sure d is a directory and does not have the children with name n.Name
+ if err := d.Add(n); err != nil {
+ return nil, err
+ }
+
+ // node with TTL
+ if !n.IsPermanent() {
+ s.ttlKeyHeap.push(n)
+
+ eNode.Expiration, eNode.TTL = n.expirationAndTTL(s.clock)
+ }
+
+ s.CurrentIndex = nextIndex
+
+ return e, nil
+}
+
+// InternalGet gets the node of the given nodePath.
+func (s *store) internalGet(nodePath string) (*node, *v2error.Error) {
+ nodePath = path.Clean(path.Join("/", nodePath))
+
+ walkFunc := func(parent *node, name string) (*node, *v2error.Error) {
+ if !parent.IsDir() {
+ err := v2error.NewError(v2error.EcodeNotDir, parent.Path, s.CurrentIndex)
+ return nil, err
+ }
+
+ child, ok := parent.Children[name]
+ if ok {
+ return child, nil
+ }
+
+ return nil, v2error.NewError(v2error.EcodeKeyNotFound, path.Join(parent.Path, name), s.CurrentIndex)
+ }
+
+ f, err := s.walk(nodePath, walkFunc)
+ if err != nil {
+ return nil, err
+ }
+ return f, nil
+}
+
+// DeleteExpiredKeys will delete all expired keys
+func (s *store) DeleteExpiredKeys(cutoff time.Time) {
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+
+ for {
+ node := s.ttlKeyHeap.top()
+ if node == nil || node.ExpireTime.After(cutoff) {
+ break
+ }
+
+ s.CurrentIndex++
+ e := newEvent(Expire, node.Path, s.CurrentIndex, node.CreatedIndex)
+ e.EtcdIndex = s.CurrentIndex
+ e.PrevNode = node.Repr(false, false, s.clock)
+ if node.IsDir() {
+ e.Node.Dir = true
+ }
+
+ callback := func(path string) { // notify function
+ // notify the watchers with deleted set true
+ s.WatcherHub.notifyWatchers(e, path, true)
+ }
+
+ s.ttlKeyHeap.pop()
+ node.Remove(true, true, callback)
+
+ reportExpiredKey()
+ s.Stats.Inc(ExpireCount)
+
+ s.WatcherHub.notify(e)
+ }
+}
+
+// checkDir will check whether the component is a directory under parent node.
+// If it is a directory, this function will return the pointer to that node.
+// If it does not exist, this function will create a new directory and return the pointer to that node.
+// If it is a file, this function will return error.
+func (s *store) checkDir(parent *node, dirName string) (*node, *v2error.Error) {
+ node, ok := parent.Children[dirName]
+
+ if ok {
+ if node.IsDir() {
+ return node, nil
+ }
+
+ return nil, v2error.NewError(v2error.EcodeNotDir, node.Path, s.CurrentIndex)
+ }
+
+ n := newDir(s, path.Join(parent.Path, dirName), s.CurrentIndex+1, parent, Permanent)
+
+ parent.Children[dirName] = n
+
+ return n, nil
+}
+
+// Save saves the static state of the store system.
+// It will not be able to save the state of watchers.
+// It will not save the parent field of the node. Or there will
+// be cyclic dependencies issue for the json package.
+func (s *store) Save() ([]byte, error) {
+ b, err := json.Marshal(s.Clone())
+ if err != nil {
+ return nil, err
+ }
+
+ return b, nil
+}
+
+func (s *store) SaveNoCopy() ([]byte, error) {
+ b, err := json.Marshal(s)
+ if err != nil {
+ return nil, err
+ }
+
+ return b, nil
+}
+
+func (s *store) Clone() Store {
+ s.worldLock.RLock()
+
+ clonedStore := newStore()
+ clonedStore.CurrentIndex = s.CurrentIndex
+ clonedStore.Root = s.Root.Clone()
+ clonedStore.WatcherHub = s.WatcherHub.clone()
+ clonedStore.Stats = s.Stats.clone()
+ clonedStore.CurrentVersion = s.CurrentVersion
+
+ s.worldLock.RUnlock()
+ return clonedStore
+}
+
+// Recovery recovers the store system from a static state
+// It needs to recover the parent field of the nodes.
+// It needs to delete the expired nodes since the saved time and also
+// needs to create monitoring goroutines.
+func (s *store) Recovery(state []byte) error {
+ s.worldLock.Lock()
+ defer s.worldLock.Unlock()
+ err := json.Unmarshal(state, s)
+ if err != nil {
+ return err
+ }
+
+ s.ttlKeyHeap = newTTLKeyHeap()
+
+ s.Root.recoverAndclean()
+ return nil
+}
+
+//revive:disable:var-naming
+func (s *store) JsonStats() []byte {
+ //revive:enable:var-naming
+ s.Stats.Watchers = uint64(s.WatcherHub.count)
+ return s.Stats.toJSON()
+}
+
+func (s *store) HasTTLKeys() bool {
+ s.worldLock.RLock()
+ defer s.worldLock.RUnlock()
+ return s.ttlKeyHeap.Len() != 0
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/ttl_key_heap.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/ttl_key_heap.go
new file mode 100644
index 0000000..77ca8e9
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/ttl_key_heap.go
@@ -0,0 +1,97 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import "container/heap"
+
+// An TTLKeyHeap is a min-heap of TTLKeys order by expiration time
+type ttlKeyHeap struct {
+ array []*node
+ keyMap map[*node]int
+}
+
+func newTTLKeyHeap() *ttlKeyHeap {
+ h := &ttlKeyHeap{keyMap: make(map[*node]int)}
+ heap.Init(h)
+ return h
+}
+
+func (h ttlKeyHeap) Len() int {
+ return len(h.array)
+}
+
+func (h ttlKeyHeap) Less(i, j int) bool {
+ return h.array[i].ExpireTime.Before(h.array[j].ExpireTime)
+}
+
+func (h ttlKeyHeap) Swap(i, j int) {
+ // swap node
+ h.array[i], h.array[j] = h.array[j], h.array[i]
+
+ // update map
+ h.keyMap[h.array[i]] = i
+ h.keyMap[h.array[j]] = j
+}
+
+func (h *ttlKeyHeap) Push(x any) {
+ n, _ := x.(*node)
+ h.keyMap[n] = len(h.array)
+ h.array = append(h.array, n)
+}
+
+func (h *ttlKeyHeap) Pop() any {
+ old := h.array
+ n := len(old)
+ x := old[n-1]
+ // Set slice element to nil, so GC can recycle the node.
+ // This is due to golang GC doesn't support partial recycling:
+ // https://github.com/golang/go/issues/9618
+ old[n-1] = nil
+ h.array = old[0 : n-1]
+ delete(h.keyMap, x)
+ return x
+}
+
+func (h *ttlKeyHeap) top() *node {
+ if h.Len() != 0 {
+ return h.array[0]
+ }
+ return nil
+}
+
+func (h *ttlKeyHeap) pop() *node {
+ x := heap.Pop(h)
+ n, _ := x.(*node)
+ return n
+}
+
+func (h *ttlKeyHeap) push(x any) {
+ heap.Push(h, x)
+}
+
+func (h *ttlKeyHeap) update(n *node) {
+ index, ok := h.keyMap[n]
+ if ok {
+ heap.Remove(h, index)
+ heap.Push(h, n)
+ }
+}
+
+func (h *ttlKeyHeap) remove(n *node) {
+ index, ok := h.keyMap[n]
+ if ok {
+ heap.Remove(h, index)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/watcher.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/watcher.go
new file mode 100644
index 0000000..4b1e846
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/watcher.go
@@ -0,0 +1,95 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+type Watcher interface {
+ EventChan() chan *Event
+ StartIndex() uint64 // The EtcdIndex at which the Watcher was created
+ Remove()
+}
+
+type watcher struct {
+ eventChan chan *Event
+ stream bool
+ recursive bool
+ sinceIndex uint64
+ startIndex uint64
+ hub *watcherHub
+ removed bool
+ remove func()
+}
+
+func (w *watcher) EventChan() chan *Event {
+ return w.eventChan
+}
+
+func (w *watcher) StartIndex() uint64 {
+ return w.startIndex
+}
+
+// notify function notifies the watcher. If the watcher interests in the given path,
+// the function will return true.
+func (w *watcher) notify(e *Event, originalPath bool, deleted bool) bool {
+ // watcher is interested the path in three cases and under one condition
+ // the condition is that the event happens after the watcher's sinceIndex
+
+ // 1. the path at which the event happens is the path the watcher is watching at.
+ // For example if the watcher is watching at "/foo" and the event happens at "/foo",
+ // the watcher must be interested in that event.
+
+ // 2. the watcher is a recursive watcher, it interests in the event happens after
+ // its watching path. For example if watcher A watches at "/foo" and it is a recursive
+ // one, it will interest in the event happens at "/foo/bar".
+
+ // 3. when we delete a directory, we need to force notify all the watchers who watches
+ // at the file we need to delete.
+ // For example a watcher is watching at "/foo/bar". And we deletes "/foo". The watcher
+ // should get notified even if "/foo" is not the path it is watching.
+ if (w.recursive || originalPath || deleted) && e.Index() >= w.sinceIndex {
+ // We cannot block here if the eventChan capacity is full, otherwise
+ // etcd will hang. eventChan capacity is full when the rate of
+ // notifications are higher than our send rate.
+ // If this happens, we close the channel.
+ select {
+ case w.eventChan <- e:
+ default:
+ // We have missed a notification. Remove the watcher.
+ // Removing the watcher also closes the eventChan.
+ w.remove()
+ }
+ return true
+ }
+ return false
+}
+
+// Remove removes the watcher from watcherHub
+// The actual remove function is guaranteed to only be executed once
+func (w *watcher) Remove() {
+ w.hub.mutex.Lock()
+ defer w.hub.mutex.Unlock()
+
+ close(w.eventChan)
+ if w.remove != nil {
+ w.remove()
+ }
+}
+
+// nopWatcher is a watcher that receives nothing, always blocking.
+type nopWatcher struct{}
+
+func NewNopWatcher() Watcher { return &nopWatcher{} }
+func (w *nopWatcher) EventChan() chan *Event { return nil }
+func (w *nopWatcher) StartIndex() uint64 { return 0 }
+func (w *nopWatcher) Remove() {}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/watcher_hub.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/watcher_hub.go
new file mode 100644
index 0000000..df5ae78
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v2store/watcher_hub.go
@@ -0,0 +1,199 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v2store
+
+import (
+ "container/list"
+ "path"
+ "strings"
+ "sync"
+ "sync/atomic"
+
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2error"
+)
+
+// A watcherHub contains all subscribed watchers
+// watchers is a map with watched path as key and watcher as value
+// EventHistory keeps the old events for watcherHub. It is used to help
+// watcher to get a continuous event history. Or a watcher might miss the
+// event happens between the end of the first watch command and the start
+// of the second command.
+type watcherHub struct {
+ // count must be the first element to keep 64-bit alignment for atomic
+ // access
+
+ count int64 // current number of watchers.
+
+ mutex sync.Mutex
+ watchers map[string]*list.List
+ EventHistory *EventHistory
+}
+
+// newWatchHub creates a watcherHub. The capacity determines how many events we will
+// keep in the eventHistory.
+// Typically, we only need to keep a small size of history[smaller than 20K].
+// Ideally, it should smaller than 20K/s[max throughput] * 2 * 50ms[RTT] = 2000
+func newWatchHub(capacity int) *watcherHub {
+ return &watcherHub{
+ watchers: make(map[string]*list.List),
+ EventHistory: newEventHistory(capacity),
+ }
+}
+
+// Watch function returns a Watcher.
+// If recursive is true, the first change after index under key will be sent to the event channel of the watcher.
+// If recursive is false, the first change after index at key will be sent to the event channel of the watcher.
+// If index is zero, watch will start from the current index + 1.
+func (wh *watcherHub) watch(key string, recursive, stream bool, index, storeIndex uint64) (Watcher, *v2error.Error) {
+ reportWatchRequest()
+ event, err := wh.EventHistory.scan(key, recursive, index)
+ if err != nil {
+ err.Index = storeIndex
+ return nil, err
+ }
+
+ w := &watcher{
+ eventChan: make(chan *Event, 100), // use a buffered channel
+ recursive: recursive,
+ stream: stream,
+ sinceIndex: index,
+ startIndex: storeIndex,
+ hub: wh,
+ }
+
+ wh.mutex.Lock()
+ defer wh.mutex.Unlock()
+ // If the event exists in the known history, append the EtcdIndex and return immediately
+ if event != nil {
+ ne := event.Clone()
+ ne.EtcdIndex = storeIndex
+ w.eventChan <- ne
+ return w, nil
+ }
+
+ l, ok := wh.watchers[key]
+
+ var elem *list.Element
+
+ if ok { // add the new watcher to the back of the list
+ elem = l.PushBack(w)
+ } else { // create a new list and add the new watcher
+ l = list.New()
+ elem = l.PushBack(w)
+ wh.watchers[key] = l
+ }
+
+ w.remove = func() {
+ if w.removed { // avoid removing it twice
+ return
+ }
+ w.removed = true
+ l.Remove(elem)
+ atomic.AddInt64(&wh.count, -1)
+ reportWatcherRemoved()
+ if l.Len() == 0 {
+ delete(wh.watchers, key)
+ }
+ }
+
+ atomic.AddInt64(&wh.count, 1)
+ reportWatcherAdded()
+
+ return w, nil
+}
+
+func (wh *watcherHub) add(e *Event) {
+ wh.EventHistory.addEvent(e)
+}
+
+// notify function accepts an event and notify to the watchers.
+func (wh *watcherHub) notify(e *Event) {
+ e = wh.EventHistory.addEvent(e) // add event into the eventHistory
+
+ segments := strings.Split(e.Node.Key, "/")
+
+ currPath := "/"
+
+ // walk through all the segments of the path and notify the watchers
+ // if the path is "/foo/bar", it will notify watchers with path "/",
+ // "/foo" and "/foo/bar"
+
+ for _, segment := range segments {
+ currPath = path.Join(currPath, segment)
+ // notify the watchers who interests in the changes of current path
+ wh.notifyWatchers(e, currPath, false)
+ }
+}
+
+func (wh *watcherHub) notifyWatchers(e *Event, nodePath string, deleted bool) {
+ wh.mutex.Lock()
+ defer wh.mutex.Unlock()
+
+ l, ok := wh.watchers[nodePath]
+ if ok {
+ curr := l.Front()
+
+ for curr != nil {
+ next := curr.Next() // save reference to the next one in the list
+
+ w, _ := curr.Value.(*watcher)
+
+ originalPath := e.Node.Key == nodePath
+ if (originalPath || !isHidden(nodePath, e.Node.Key)) && w.notify(e, originalPath, deleted) {
+ if !w.stream { // do not remove the stream watcher
+ // if we successfully notify a watcher
+ // we need to remove the watcher from the list
+ // and decrease the counter
+ w.removed = true
+ l.Remove(curr)
+ atomic.AddInt64(&wh.count, -1)
+ reportWatcherRemoved()
+ }
+ }
+
+ curr = next // update current to the next element in the list
+ }
+
+ if l.Len() == 0 {
+ // if we have notified all watcher in the list
+ // we can delete the list
+ delete(wh.watchers, nodePath)
+ }
+ }
+}
+
+// clone function clones the watcherHub and return the cloned one.
+// only clone the static content. do not clone the current watchers.
+func (wh *watcherHub) clone() *watcherHub {
+ clonedHistory := wh.EventHistory.clone()
+
+ return &watcherHub{
+ EventHistory: clonedHistory,
+ }
+}
+
+// isHidden checks to see if key path is considered hidden to watch path i.e. the
+// last element is hidden or it's within a hidden directory
+func isHidden(watchPath, keyPath string) bool {
+ // When deleting a directory, watchPath might be deeper than the actual keyPath
+ // For example, when deleting /foo we also need to notify watchers on /foo/bar.
+ if len(watchPath) > len(keyPath) {
+ return false
+ }
+ // if watch path is just a "/", after path will start without "/"
+ // add a "/" to deal with the special case when watchPath is "/"
+ afterPath := path.Clean("/" + keyPath[len(watchPath):])
+ return strings.Contains(afterPath, "/_")
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm/alarms.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm/alarms.go
new file mode 100644
index 0000000..bf17929
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm/alarms.go
@@ -0,0 +1,128 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3alarm manages health status alarms in etcd.
+package v3alarm
+
+import (
+ "sync"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/schema"
+)
+
+type BackendGetter interface {
+ Backend() backend.Backend
+}
+
+type alarmSet map[types.ID]*pb.AlarmMember
+
+// AlarmStore persists alarms to the backend.
+type AlarmStore struct {
+ lg *zap.Logger
+ mu sync.Mutex
+ types map[pb.AlarmType]alarmSet
+
+ be schema.AlarmBackend
+}
+
+func NewAlarmStore(lg *zap.Logger, be schema.AlarmBackend) (*AlarmStore, error) {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ ret := &AlarmStore{lg: lg, types: make(map[pb.AlarmType]alarmSet), be: be}
+ err := ret.restore()
+ return ret, err
+}
+
+func (a *AlarmStore) Activate(id types.ID, at pb.AlarmType) *pb.AlarmMember {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ newAlarm := &pb.AlarmMember{MemberID: uint64(id), Alarm: at}
+ if m := a.addToMap(newAlarm); m != newAlarm {
+ return m
+ }
+
+ a.be.MustPutAlarm(newAlarm)
+ return newAlarm
+}
+
+func (a *AlarmStore) Deactivate(id types.ID, at pb.AlarmType) *pb.AlarmMember {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ t := a.types[at]
+ if t == nil {
+ t = make(alarmSet)
+ a.types[at] = t
+ }
+ m := t[id]
+ if m == nil {
+ return nil
+ }
+
+ delete(t, id)
+
+ a.be.MustDeleteAlarm(m)
+ return m
+}
+
+func (a *AlarmStore) Get(at pb.AlarmType) (ret []*pb.AlarmMember) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ if at == pb.AlarmType_NONE {
+ for _, t := range a.types {
+ for _, m := range t {
+ ret = append(ret, m)
+ }
+ }
+ return ret
+ }
+ for _, m := range a.types[at] {
+ ret = append(ret, m)
+ }
+ return ret
+}
+
+func (a *AlarmStore) restore() error {
+ a.be.CreateAlarmBucket()
+ ms, err := a.be.GetAllAlarms()
+ if err != nil {
+ return err
+ }
+ for _, m := range ms {
+ a.addToMap(m)
+ }
+ a.be.ForceCommit()
+ return err
+}
+
+func (a *AlarmStore) addToMap(newAlarm *pb.AlarmMember) *pb.AlarmMember {
+ t := a.types[newAlarm.Alarm]
+ if t == nil {
+ t = make(alarmSet)
+ a.types[newAlarm.Alarm] = t
+ }
+ m := t[types.ID(newAlarm.MemberID)]
+ if m != nil {
+ return m
+ }
+ t[types.ID(newAlarm.MemberID)] = newAlarm
+ return newAlarm
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3client/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3client/doc.go
new file mode 100644
index 0000000..a6a4d7e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3client/doc.go
@@ -0,0 +1,44 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3client provides clientv3 interfaces from an etcdserver.
+//
+// Use v3client by creating an EtcdServer instance, then wrapping it with v3client.New:
+//
+// import (
+// "context"
+//
+// "go.etcd.io/etcd/server/v3/embed"
+// "go.etcd.io/etcd/server/v3/etcdserver/api/v3client"
+// )
+//
+// ...
+//
+// // create an embedded EtcdServer from the default configuration
+// cfg := embed.NewConfig()
+// cfg.Dir = "default.etcd"
+// e, err := embed.StartEtcd(cfg)
+// if err != nil {
+// // handle error!
+// }
+//
+// // wrap the EtcdServer with v3client
+// cli := v3client.New(e.Server)
+//
+// // use like an ordinary clientv3
+// resp, err := cli.Put(context.TODO(), "some-key", "it works!")
+// if err != nil {
+// // handle error!
+// }
+package v3client
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3client/v3client.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3client/v3client.go
new file mode 100644
index 0000000..b9d1839
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3client/v3client.go
@@ -0,0 +1,67 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3client
+
+import (
+ "context"
+ "time"
+
+ clientv3 "go.etcd.io/etcd/client/v3"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc"
+ "go.etcd.io/etcd/server/v3/proxy/grpcproxy/adapter"
+)
+
+// New creates a clientv3 client that wraps an in-process EtcdServer. Instead
+// of making gRPC calls through sockets, the client makes direct function calls
+// to the etcd server through its api/v3rpc function interfaces.
+func New(s *etcdserver.EtcdServer) *clientv3.Client {
+ c := clientv3.NewCtxClient(context.Background(), clientv3.WithZapLogger(s.Logger()))
+
+ kvc := adapter.KvServerToKvClient(v3rpc.NewQuotaKVServer(s))
+ c.KV = clientv3.NewKVFromKVClient(kvc, c)
+
+ lc := adapter.LeaseServerToLeaseClient(v3rpc.NewQuotaLeaseServer(s))
+ c.Lease = clientv3.NewLeaseFromLeaseClient(lc, c, time.Second)
+
+ wc := adapter.WatchServerToWatchClient(v3rpc.NewWatchServer(s))
+ c.Watcher = &watchWrapper{clientv3.NewWatchFromWatchClient(wc, c)}
+
+ mc := adapter.MaintenanceServerToMaintenanceClient(v3rpc.NewMaintenanceServer(s, nil))
+ c.Maintenance = clientv3.NewMaintenanceFromMaintenanceClient(mc, c)
+
+ clc := adapter.ClusterServerToClusterClient(v3rpc.NewClusterServer(s))
+ c.Cluster = clientv3.NewClusterFromClusterClient(clc, c)
+
+ a := adapter.AuthServerToAuthClient(v3rpc.NewAuthServer(s))
+ c.Auth = clientv3.NewAuthFromAuthClient(a, c)
+
+ return c
+}
+
+// BlankContext implements Stringer on a context so the ctx string doesn't
+// depend on the context's WithValue data, which tends to be unsynchronized
+// (e.g., x/net/trace), causing ctx.String() to throw data races.
+type blankContext struct{ context.Context }
+
+func (*blankContext) String() string { return "(blankCtx)" }
+
+// watchWrapper wraps clientv3 watch calls to blank out the context
+// to avoid races on trace data.
+type watchWrapper struct{ clientv3.Watcher }
+
+func (ww *watchWrapper) Watch(ctx context.Context, key string, opts ...clientv3.OpOption) clientv3.WatchChan {
+ return ww.Watcher.Watch(&blankContext{ctx}, key, opts...)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/compactor.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/compactor.go
new file mode 100644
index 0000000..f916e71
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/compactor.go
@@ -0,0 +1,73 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3compactor
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+)
+
+const (
+ ModePeriodic = "periodic"
+ ModeRevision = "revision"
+)
+
+// Compactor purges old log from the storage periodically.
+type Compactor interface {
+ // Run starts the main loop of the compactor in background.
+ // Use Stop() to halt the loop and release the resource.
+ Run()
+ // Stop halts the main loop of the compactor.
+ Stop()
+ // Pause temporally suspend the compactor not to run compaction. Resume() to unpose.
+ Pause()
+ // Resume restarts the compactor suspended by Pause().
+ Resume()
+}
+
+type Compactable interface {
+ Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
+}
+
+type RevGetter interface {
+ Rev() int64
+}
+
+// New returns a new Compactor based on given "mode".
+func New(
+ lg *zap.Logger,
+ mode string,
+ retention time.Duration,
+ rg RevGetter,
+ c Compactable,
+) (Compactor, error) {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ switch mode {
+ case ModePeriodic:
+ return newPeriodic(lg, clockwork.NewRealClock(), retention, rg, c), nil
+ case ModeRevision:
+ return newRevision(lg, clockwork.NewRealClock(), int64(retention), rg, c), nil
+ default:
+ return nil, fmt.Errorf("unsupported compaction mode %s", mode)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/doc.go
new file mode 100644
index 0000000..bb28046
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3compactor implements automated policies for compacting etcd's mvcc storage.
+package v3compactor
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/periodic.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/periodic.go
new file mode 100644
index 0000000..7468d23
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/periodic.go
@@ -0,0 +1,208 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3compactor
+
+import (
+ "context"
+ "errors"
+ "sync"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+// Periodic compacts the log by purging revisions older than
+// the configured retention time.
+type Periodic struct {
+ lg *zap.Logger
+ clock clockwork.Clock
+ period time.Duration
+
+ rg RevGetter
+ c Compactable
+
+ revs []int64
+ ctx context.Context
+ cancel context.CancelFunc
+
+ // mu protects paused
+ mu sync.RWMutex
+ paused bool
+}
+
+// newPeriodic creates a new instance of Periodic compactor that purges
+// the log older than h Duration.
+func newPeriodic(lg *zap.Logger, clock clockwork.Clock, h time.Duration, rg RevGetter, c Compactable) *Periodic {
+ pc := &Periodic{
+ lg: lg,
+ clock: clock,
+ period: h,
+ rg: rg,
+ c: c,
+ }
+ // revs won't be longer than the retentions.
+ pc.revs = make([]int64, 0, pc.getRetentions())
+ pc.ctx, pc.cancel = context.WithCancel(context.Background())
+ return pc
+}
+
+/*
+Compaction period 1-hour:
+ 1. compute compaction period, which is 1-hour
+ 2. record revisions for every 1/10 of 1-hour (6-minute)
+ 3. keep recording revisions with no compaction for first 1-hour
+ 4. do compact with revs[0]
+ - success? continue on for-loop and move sliding window; revs = revs[1:]
+ - failure? update revs, and retry after 1/10 of 1-hour (6-minute)
+
+Compaction period 24-hour:
+ 1. compute compaction period, which is 24-hour
+ 2. record revisions for every 1/10 of 24-hour (144-minute)
+ 3. keep recording revisions with no compaction for first 24-hour
+ 4. do compact with revs[0]
+ - success? continue on for-loop and move sliding window; revs = revs[1:]
+ - failure? update revs, and retry after 1/10 of 24-hour (144-minute)
+
+Compaction period 59-min:
+ 1. compute compaction period, which is 59-min
+ 2. record revisions for every 1/10 of 59-min (5.9-min)
+ 3. keep recording revisions with no compaction for first 59-min
+ 4. do compact with revs[0]
+ - success? continue on for-loop and move sliding window; revs = revs[1:]
+ - failure? update revs, and retry after 1/10 of 59-min (5.9-min)
+
+Compaction period 5-sec:
+ 1. compute compaction period, which is 5-sec
+ 2. record revisions for every 1/10 of 5-sec (0.5-sec)
+ 3. keep recording revisions with no compaction for first 5-sec
+ 4. do compact with revs[0]
+ - success? continue on for-loop and move sliding window; revs = revs[1:]
+ - failure? update revs, and retry after 1/10 of 5-sec (0.5-sec)
+*/
+
+// Run runs periodic compactor.
+func (pc *Periodic) Run() {
+ compactInterval := pc.getCompactInterval()
+ retryInterval := pc.getRetryInterval()
+ retentions := pc.getRetentions()
+
+ go func() {
+ lastRevision := int64(0)
+ lastSuccess := pc.clock.Now()
+ baseInterval := pc.period
+ for {
+ pc.revs = append(pc.revs, pc.rg.Rev())
+ if len(pc.revs) > retentions {
+ pc.revs = pc.revs[1:] // pc.revs[0] is always the rev at pc.period ago
+ }
+
+ select {
+ case <-pc.ctx.Done():
+ return
+ case <-pc.clock.After(retryInterval):
+ pc.mu.RLock()
+ p := pc.paused
+ pc.mu.RUnlock()
+ if p {
+ continue
+ }
+ }
+ rev := pc.revs[0]
+ if pc.clock.Now().Sub(lastSuccess) < baseInterval || rev == lastRevision {
+ continue
+ }
+
+ // wait up to initial given period
+ if baseInterval == pc.period {
+ baseInterval = compactInterval
+ }
+
+ pc.lg.Info(
+ "starting auto periodic compaction",
+ zap.Int64("revision", rev),
+ zap.Duration("compact-period", pc.period),
+ )
+ startTime := pc.clock.Now()
+ _, err := pc.c.Compact(pc.ctx, &pb.CompactionRequest{Revision: rev})
+ if err == nil || errors.Is(err, mvcc.ErrCompacted) {
+ pc.lg.Info(
+ "completed auto periodic compaction",
+ zap.Int64("revision", rev),
+ zap.Duration("compact-period", pc.period),
+ zap.Duration("took", pc.clock.Now().Sub(startTime)),
+ )
+ lastRevision = rev
+ lastSuccess = pc.clock.Now()
+ } else {
+ pc.lg.Warn(
+ "failed auto periodic compaction",
+ zap.Int64("revision", rev),
+ zap.Duration("compact-period", pc.period),
+ zap.Duration("retry-interval", retryInterval),
+ zap.Error(err),
+ )
+ }
+ }
+ }()
+}
+
+// if given compaction period x is <1-hour, compact every x duration.
+// (e.g. --auto-compaction-mode 'periodic' --auto-compaction-retention='10m', then compact every 10-minute)
+// if given compaction period x is >1-hour, compact every hour.
+// (e.g. --auto-compaction-mode 'periodic' --auto-compaction-retention='2h', then compact every 1-hour)
+func (pc *Periodic) getCompactInterval() time.Duration {
+ itv := pc.period
+ if itv > time.Hour {
+ itv = time.Hour
+ }
+ return itv
+}
+
+func (pc *Periodic) getRetentions() int {
+ return int(pc.period/pc.getRetryInterval()) + 1
+}
+
+const retryDivisor = 10
+
+func (pc *Periodic) getRetryInterval() time.Duration {
+ itv := pc.period
+ if itv > time.Hour {
+ itv = time.Hour
+ }
+ return itv / retryDivisor
+}
+
+// Stop stops periodic compactor.
+func (pc *Periodic) Stop() {
+ pc.cancel()
+}
+
+// Pause pauses periodic compactor.
+func (pc *Periodic) Pause() {
+ pc.mu.Lock()
+ pc.paused = true
+ pc.mu.Unlock()
+}
+
+// Resume resumes periodic compactor.
+func (pc *Periodic) Resume() {
+ pc.mu.Lock()
+ pc.paused = false
+ pc.mu.Unlock()
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/revision.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/revision.go
new file mode 100644
index 0000000..4174861
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor/revision.go
@@ -0,0 +1,131 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3compactor
+
+import (
+ "context"
+ "errors"
+ "sync"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+// Revision compacts the log by purging revisions older than
+// the configured reivison number. Compaction happens every 5 minutes.
+type Revision struct {
+ lg *zap.Logger
+
+ clock clockwork.Clock
+ retention int64
+
+ rg RevGetter
+ c Compactable
+
+ ctx context.Context
+ cancel context.CancelFunc
+
+ mu sync.Mutex
+ paused bool
+}
+
+// newRevision creates a new instance of Revisonal compactor that purges
+// the log older than retention revisions from the current revision.
+func newRevision(lg *zap.Logger, clock clockwork.Clock, retention int64, rg RevGetter, c Compactable) *Revision {
+ rc := &Revision{
+ lg: lg,
+ clock: clock,
+ retention: retention,
+ rg: rg,
+ c: c,
+ }
+ rc.ctx, rc.cancel = context.WithCancel(context.Background())
+ return rc
+}
+
+const revInterval = 5 * time.Minute
+
+// Run runs revision-based compactor.
+func (rc *Revision) Run() {
+ prev := int64(0)
+ go func() {
+ for {
+ select {
+ case <-rc.ctx.Done():
+ return
+ case <-rc.clock.After(revInterval):
+ rc.mu.Lock()
+ p := rc.paused
+ rc.mu.Unlock()
+ if p {
+ continue
+ }
+ }
+
+ rev := rc.rg.Rev() - rc.retention
+ if rev <= 0 || rev == prev {
+ continue
+ }
+
+ now := time.Now()
+ rc.lg.Info(
+ "starting auto revision compaction",
+ zap.Int64("revision", rev),
+ zap.Int64("revision-compaction-retention", rc.retention),
+ )
+ _, err := rc.c.Compact(rc.ctx, &pb.CompactionRequest{Revision: rev})
+ if err == nil || errors.Is(err, mvcc.ErrCompacted) {
+ prev = rev
+ rc.lg.Info(
+ "completed auto revision compaction",
+ zap.Int64("revision", rev),
+ zap.Int64("revision-compaction-retention", rc.retention),
+ zap.Duration("took", time.Since(now)),
+ )
+ } else {
+ rc.lg.Warn(
+ "failed auto revision compaction",
+ zap.Int64("revision", rev),
+ zap.Int64("revision-compaction-retention", rc.retention),
+ zap.Duration("retry-interval", revInterval),
+ zap.Error(err),
+ )
+ }
+ }
+ }()
+}
+
+// Stop stops revision-based compactor.
+func (rc *Revision) Stop() {
+ rc.cancel()
+}
+
+// Pause pauses revision-based compactor.
+func (rc *Revision) Pause() {
+ rc.mu.Lock()
+ rc.paused = true
+ rc.mu.Unlock()
+}
+
+// Resume resumes revision-based compactor.
+func (rc *Revision) Resume() {
+ rc.mu.Lock()
+ rc.paused = false
+ rc.mu.Unlock()
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery/discovery.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery/discovery.go
new file mode 100644
index 0000000..7fe231c
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery/discovery.go
@@ -0,0 +1,509 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3discovery provides an implementation of the cluster discovery that
+// is used by etcd with v3 client.
+package v3discovery
+
+import (
+ "context"
+ "errors"
+ "math"
+ "path"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ clientv3 "go.etcd.io/etcd/client/v3"
+)
+
+const (
+ discoveryPrefix = "/_etcd/registry"
+)
+
+var (
+ ErrInvalidURL = errors.New("discovery: invalid peer URL")
+ ErrBadSizeKey = errors.New("discovery: size key is bad")
+ ErrSizeNotFound = errors.New("discovery: size key not found")
+ ErrFullCluster = errors.New("discovery: cluster is full")
+ ErrTooManyRetries = errors.New("discovery: too many retries")
+)
+
+var (
+ // Number of retries discovery will attempt before giving up and error out.
+ nRetries = uint(math.MaxUint32)
+ maxExponentialRetries = uint(8)
+)
+
+type DiscoveryConfig struct {
+ clientv3.ConfigSpec `json:"client"`
+ Token string `json:"token"`
+}
+
+type memberInfo struct {
+ // peerRegKey is the key used by the member when registering in the
+ // discovery service.
+ // Format: "/_etcd/registry/<ClusterToken>/members/<memberID>".
+ peerRegKey string
+ // peerURLsMap format: "peerName=peerURLs", i.e., "member1=http://127.0.0.1:2380".
+ peerURLsMap string
+ // createRev is the member's CreateRevision in the etcd cluster backing
+ // the discovery service.
+ createRev int64
+}
+
+type clusterInfo struct {
+ clusterToken string
+ members []memberInfo
+}
+
+// key prefix for each cluster: "/_etcd/registry/<ClusterToken>".
+func getClusterKeyPrefix(cluster string) string {
+ return path.Join(discoveryPrefix, cluster)
+}
+
+// key format for cluster size: "/_etcd/registry/<ClusterToken>/_config/size".
+func getClusterSizeKey(cluster string) string {
+ return path.Join(getClusterKeyPrefix(cluster), "_config/size")
+}
+
+// key prefix for each member: "/_etcd/registry/<ClusterToken>/members".
+func getMemberKeyPrefix(clusterToken string) string {
+ return path.Join(getClusterKeyPrefix(clusterToken), "members")
+}
+
+// key format for each member: "/_etcd/registry/<ClusterToken>/members/<memberID>".
+func getMemberKey(cluster, memberID string) string {
+ return path.Join(getMemberKeyPrefix(cluster), memberID)
+}
+
+// GetCluster will connect to the discovery service at the given endpoints and
+// retrieve a string describing the cluster
+func GetCluster(lg *zap.Logger, cfg *DiscoveryConfig) (cs string, rerr error) {
+ d, err := newDiscovery(lg, cfg, 0)
+ if err != nil {
+ return "", err
+ }
+
+ defer d.close()
+ defer func() {
+ if rerr != nil {
+ d.lg.Error(
+ "discovery failed to get cluster",
+ zap.String("cluster", cs),
+ zap.Error(rerr),
+ )
+ } else {
+ d.lg.Info(
+ "discovery got cluster successfully",
+ zap.String("cluster", cs),
+ )
+ }
+ }()
+
+ return d.getCluster()
+}
+
+// JoinCluster will connect to the discovery service at the endpoints, and
+// register the server represented by the given id and config to the cluster.
+// The parameter `config` is supposed to be in the format "memberName=peerURLs",
+// such as "member1=http://127.0.0.1:2380".
+//
+// The final returned string has the same format as "--initial-cluster", such as
+// "infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380".
+func JoinCluster(lg *zap.Logger, cfg *DiscoveryConfig, id types.ID, config string) (cs string, rerr error) {
+ d, err := newDiscovery(lg, cfg, id)
+ if err != nil {
+ return "", err
+ }
+
+ defer d.close()
+ defer func() {
+ if rerr != nil {
+ d.lg.Error(
+ "discovery failed to join cluster",
+ zap.String("cluster", cs),
+ zap.Error(rerr),
+ )
+ } else {
+ d.lg.Info(
+ "discovery joined cluster successfully",
+ zap.String("cluster", cs),
+ )
+ }
+ }()
+
+ return d.joinCluster(config)
+}
+
+type discovery struct {
+ lg *zap.Logger
+ clusterToken string
+ memberID types.ID
+ c *clientv3.Client
+ retries uint
+
+ cfg *DiscoveryConfig
+
+ clock clockwork.Clock
+}
+
+func newDiscovery(lg *zap.Logger, dcfg *DiscoveryConfig, id types.ID) (*discovery, error) {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+
+ lg = lg.With(zap.String("discovery-token", dcfg.Token), zap.String("discovery-endpoints", strings.Join(dcfg.Endpoints, ",")))
+ cfg, err := clientv3.NewClientConfig(&dcfg.ConfigSpec, lg)
+ if err != nil {
+ return nil, err
+ }
+
+ c, err := clientv3.New(*cfg)
+ if err != nil {
+ return nil, err
+ }
+ return &discovery{
+ lg: lg,
+ clusterToken: dcfg.Token,
+ memberID: id,
+ c: c,
+ cfg: dcfg,
+ clock: clockwork.NewRealClock(),
+ }, nil
+}
+
+func (d *discovery) getCluster() (string, error) {
+ cls, clusterSize, rev, err := d.checkCluster()
+ if err != nil {
+ if errors.Is(err, ErrFullCluster) {
+ return cls.getInitClusterStr(clusterSize)
+ }
+ return "", err
+ }
+
+ for cls.Len() < clusterSize {
+ d.waitPeers(cls, clusterSize, rev)
+ }
+
+ return cls.getInitClusterStr(clusterSize)
+}
+
+func (d *discovery) joinCluster(config string) (string, error) {
+ _, _, _, err := d.checkCluster()
+ if err != nil {
+ return "", err
+ }
+
+ if err = d.registerSelf(config); err != nil {
+ return "", err
+ }
+
+ cls, clusterSize, rev, err := d.checkCluster()
+ if err != nil {
+ return "", err
+ }
+
+ for cls.Len() < clusterSize {
+ d.waitPeers(cls, clusterSize, rev)
+ }
+
+ return cls.getInitClusterStr(clusterSize)
+}
+
+func (d *discovery) getClusterSize() (int, error) {
+ configKey := getClusterSizeKey(d.clusterToken)
+ ctx, cancel := context.WithTimeout(context.Background(), d.cfg.RequestTimeout)
+ defer cancel()
+
+ resp, err := d.c.Get(ctx, configKey)
+ if err != nil {
+ d.lg.Warn(
+ "failed to get cluster size from discovery service",
+ zap.String("clusterSizeKey", configKey),
+ zap.Error(err),
+ )
+ return 0, err
+ }
+
+ if len(resp.Kvs) == 0 {
+ return 0, ErrSizeNotFound
+ }
+
+ clusterSize, err := strconv.ParseInt(string(resp.Kvs[0].Value), 10, 0)
+ if err != nil || clusterSize <= 0 {
+ return 0, ErrBadSizeKey
+ }
+
+ return int(clusterSize), nil
+}
+
+func (d *discovery) getClusterMembers() (*clusterInfo, int64, error) {
+ membersKeyPrefix := getMemberKeyPrefix(d.clusterToken)
+ ctx, cancel := context.WithTimeout(context.Background(), d.cfg.RequestTimeout)
+ defer cancel()
+
+ resp, err := d.c.Get(ctx, membersKeyPrefix, clientv3.WithPrefix())
+ if err != nil {
+ d.lg.Warn(
+ "failed to get cluster members from discovery service",
+ zap.String("membersKeyPrefix", membersKeyPrefix),
+ zap.Error(err),
+ )
+ return nil, 0, err
+ }
+
+ cls := &clusterInfo{clusterToken: d.clusterToken}
+ for _, kv := range resp.Kvs {
+ mKey := strings.TrimSpace(string(kv.Key))
+ mValue := strings.TrimSpace(string(kv.Value))
+
+ if err := cls.add(mKey, mValue, kv.CreateRevision); err != nil {
+ d.lg.Warn(
+ err.Error(),
+ zap.String("memberKey", mKey),
+ zap.String("memberInfo", mValue),
+ )
+ } else {
+ d.lg.Info(
+ "found peer from discovery service",
+ zap.String("memberKey", mKey),
+ zap.String("memberInfo", mValue),
+ )
+ }
+ }
+
+ return cls, resp.Header.Revision, nil
+}
+
+func (d *discovery) checkClusterRetry() (*clusterInfo, int, int64, error) {
+ if d.retries < nRetries {
+ d.logAndBackoffForRetry("cluster status check")
+ return d.checkCluster()
+ }
+ return nil, 0, 0, ErrTooManyRetries
+}
+
+func (d *discovery) checkCluster() (*clusterInfo, int, int64, error) {
+ clusterSize, err := d.getClusterSize()
+ if err != nil {
+ if errors.Is(err, ErrSizeNotFound) || errors.Is(err, ErrBadSizeKey) {
+ return nil, 0, 0, err
+ }
+
+ return d.checkClusterRetry()
+ }
+
+ cls, rev, err := d.getClusterMembers()
+ if err != nil {
+ return d.checkClusterRetry()
+ }
+ d.retries = 0
+
+ // find self position
+ memberSelfID := getMemberKey(d.clusterToken, d.memberID.String())
+ idx := 0
+ for _, m := range cls.members {
+ if m.peerRegKey == memberSelfID {
+ break
+ }
+ if idx >= clusterSize-1 {
+ return cls, clusterSize, rev, ErrFullCluster
+ }
+ idx++
+ }
+ return cls, clusterSize, rev, nil
+}
+
+func (d *discovery) registerSelfRetry(contents string) error {
+ if d.retries < nRetries {
+ d.logAndBackoffForRetry("register member itself")
+ return d.registerSelf(contents)
+ }
+ return ErrTooManyRetries
+}
+
+func (d *discovery) registerSelf(contents string) error {
+ ctx, cancel := context.WithTimeout(context.Background(), d.cfg.RequestTimeout)
+ memberKey := getMemberKey(d.clusterToken, d.memberID.String())
+ _, err := d.c.Put(ctx, memberKey, contents)
+ cancel()
+
+ if err != nil {
+ d.lg.Warn(
+ "failed to register members itself to the discovery service",
+ zap.String("memberKey", memberKey),
+ zap.Error(err),
+ )
+ return d.registerSelfRetry(contents)
+ }
+ d.retries = 0
+
+ d.lg.Info(
+ "register member itself successfully",
+ zap.String("memberKey", memberKey),
+ zap.String("memberInfo", contents),
+ )
+
+ return nil
+}
+
+func (d *discovery) waitPeers(cls *clusterInfo, clusterSize int, rev int64) {
+ // watch from the next revision
+ membersKeyPrefix := getMemberKeyPrefix(d.clusterToken)
+ w := d.c.Watch(context.Background(), membersKeyPrefix, clientv3.WithPrefix(), clientv3.WithRev(rev+1))
+
+ d.lg.Info(
+ "waiting for peers from discovery service",
+ zap.Int("clusterSize", clusterSize),
+ zap.Int("found-peers", cls.Len()),
+ )
+
+ // waiting for peers until all needed peers are returned
+ for wresp := range w {
+ for _, ev := range wresp.Events {
+ mKey := strings.TrimSpace(string(ev.Kv.Key))
+ mValue := strings.TrimSpace(string(ev.Kv.Value))
+
+ if err := cls.add(mKey, mValue, ev.Kv.CreateRevision); err != nil {
+ d.lg.Warn(
+ err.Error(),
+ zap.String("memberKey", mKey),
+ zap.String("memberInfo", mValue),
+ )
+ } else {
+ d.lg.Info(
+ "found peer from discovery service",
+ zap.String("memberKey", mKey),
+ zap.String("memberInfo", mValue),
+ )
+ }
+ }
+
+ if cls.Len() >= clusterSize {
+ break
+ }
+ }
+
+ d.lg.Info(
+ "found all needed peers from discovery service",
+ zap.Int("clusterSize", clusterSize),
+ zap.Int("found-peers", cls.Len()),
+ )
+}
+
+func (d *discovery) logAndBackoffForRetry(step string) {
+ d.retries++
+ // logAndBackoffForRetry stops exponential backoff when the retries are
+ // more than maxExpoentialRetries and is set to a constant backoff afterward.
+ retries := d.retries
+ if retries > maxExponentialRetries {
+ retries = maxExponentialRetries
+ }
+ retryTimeInSecond := time.Duration(0x1<<retries) * time.Second
+ d.lg.Warn(
+ "retry connecting to discovery service",
+ zap.String("reason", step),
+ zap.Duration("backoff", retryTimeInSecond),
+ )
+ d.clock.Sleep(retryTimeInSecond)
+}
+
+func (d *discovery) close() error {
+ if d.c != nil {
+ return d.c.Close()
+ }
+ return nil
+}
+
+func (cls *clusterInfo) Len() int { return len(cls.members) }
+func (cls *clusterInfo) Less(i, j int) bool {
+ return cls.members[i].createRev < cls.members[j].createRev
+}
+
+func (cls *clusterInfo) Swap(i, j int) {
+ cls.members[i], cls.members[j] = cls.members[j], cls.members[i]
+}
+
+func (cls *clusterInfo) add(memberKey, memberValue string, rev int64) error {
+ membersKeyPrefix := getMemberKeyPrefix(cls.clusterToken)
+
+ if !strings.HasPrefix(memberKey, membersKeyPrefix) {
+ // It should never happen because previously we used exactly the
+ // same ${membersKeyPrefix} to get or watch the member list.
+ return errors.New("invalid peer registry key")
+ }
+
+ if !strings.ContainsRune(memberValue, '=') {
+ // It must be in the format "member1=http://127.0.0.1:2380".
+ return errors.New("invalid peer info returned from discovery service")
+ }
+
+ if cls.exist(memberKey) {
+ return errors.New("found duplicate peer from discovery service")
+ }
+
+ cls.members = append(cls.members, memberInfo{
+ peerRegKey: memberKey,
+ peerURLsMap: memberValue,
+ createRev: rev,
+ })
+
+ // When multiple members register at the same time, then number of
+ // registered members may be larger than the configured cluster size.
+ // So we sort all the members on the CreateRevision in ascending order,
+ // and get the first ${clusterSize} members in this case.
+ sort.Sort(cls)
+
+ return nil
+}
+
+func (cls *clusterInfo) exist(mKey string) bool {
+ // Usually there are just a couple of members, so performance shouldn't be a problem.
+ for _, m := range cls.members {
+ if mKey == m.peerRegKey {
+ return true
+ }
+ }
+ return false
+}
+
+func (cls *clusterInfo) getInitClusterStr(clusterSize int) (string, error) {
+ peerURLs := cls.getPeerURLs()
+
+ if len(peerURLs) > clusterSize {
+ peerURLs = peerURLs[:clusterSize]
+ }
+
+ us := strings.Join(peerURLs, ",")
+ _, err := types.NewURLsMap(us)
+ if err != nil {
+ return us, ErrInvalidURL
+ }
+
+ return us, nil
+}
+
+func (cls *clusterInfo) getPeerURLs() []string {
+ var peerURLs []string
+ for _, peer := range cls.members {
+ peerURLs = append(peerURLs, peer.peerURLsMap)
+ }
+ return peerURLs
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/doc.go
new file mode 100644
index 0000000..d6fefd7
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3election provides a v3 election service from an etcdserver.
+package v3election
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/election.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/election.go
new file mode 100644
index 0000000..77a9c4b
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/election.go
@@ -0,0 +1,134 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3election
+
+import (
+ "context"
+ "errors"
+
+ clientv3 "go.etcd.io/etcd/client/v3"
+ "go.etcd.io/etcd/client/v3/concurrency"
+ epb "go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb"
+)
+
+// ErrMissingLeaderKey is returned when election API request
+// is missing the "leader" field.
+var ErrMissingLeaderKey = errors.New(`"leader" field must be provided`)
+
+type electionServer struct {
+ c *clientv3.Client
+}
+
+func NewElectionServer(c *clientv3.Client) epb.ElectionServer {
+ return &electionServer{c}
+}
+
+func (es *electionServer) Campaign(ctx context.Context, req *epb.CampaignRequest) (*epb.CampaignResponse, error) {
+ s, err := es.session(ctx, req.Lease)
+ if err != nil {
+ return nil, err
+ }
+ e := concurrency.NewElection(s, string(req.Name))
+ if err = e.Campaign(ctx, string(req.Value)); err != nil {
+ return nil, err
+ }
+ return &epb.CampaignResponse{
+ Header: e.Header(),
+ Leader: &epb.LeaderKey{
+ Name: req.Name,
+ Key: []byte(e.Key()),
+ Rev: e.Rev(),
+ Lease: int64(s.Lease()),
+ },
+ }, nil
+}
+
+func (es *electionServer) Proclaim(ctx context.Context, req *epb.ProclaimRequest) (*epb.ProclaimResponse, error) {
+ if req.Leader == nil {
+ return nil, ErrMissingLeaderKey
+ }
+ s, err := es.session(ctx, req.Leader.Lease)
+ if err != nil {
+ return nil, err
+ }
+ e := concurrency.ResumeElection(s, string(req.Leader.Name), string(req.Leader.Key), req.Leader.Rev)
+ if err := e.Proclaim(ctx, string(req.Value)); err != nil {
+ return nil, err
+ }
+ return &epb.ProclaimResponse{Header: e.Header()}, nil
+}
+
+func (es *electionServer) Observe(req *epb.LeaderRequest, stream epb.Election_ObserveServer) error {
+ s, err := es.session(stream.Context(), -1)
+ if err != nil {
+ return err
+ }
+ e := concurrency.NewElection(s, string(req.Name))
+ ch := e.Observe(stream.Context())
+ for stream.Context().Err() == nil {
+ select {
+ case <-stream.Context().Done():
+ case resp, ok := <-ch:
+ if !ok {
+ return nil
+ }
+ lresp := &epb.LeaderResponse{Header: resp.Header, Kv: resp.Kvs[0]}
+ if err := stream.Send(lresp); err != nil {
+ return err
+ }
+ }
+ }
+ return stream.Context().Err()
+}
+
+func (es *electionServer) Leader(ctx context.Context, req *epb.LeaderRequest) (*epb.LeaderResponse, error) {
+ s, err := es.session(ctx, -1)
+ if err != nil {
+ return nil, err
+ }
+ l, lerr := concurrency.NewElection(s, string(req.Name)).Leader(ctx)
+ if lerr != nil {
+ return nil, lerr
+ }
+ return &epb.LeaderResponse{Header: l.Header, Kv: l.Kvs[0]}, nil
+}
+
+func (es *electionServer) Resign(ctx context.Context, req *epb.ResignRequest) (*epb.ResignResponse, error) {
+ if req.Leader == nil {
+ return nil, ErrMissingLeaderKey
+ }
+ s, err := es.session(ctx, req.Leader.Lease)
+ if err != nil {
+ return nil, err
+ }
+ e := concurrency.ResumeElection(s, string(req.Leader.Name), string(req.Leader.Key), req.Leader.Rev)
+ if err := e.Resign(ctx); err != nil {
+ return nil, err
+ }
+ return &epb.ResignResponse{Header: e.Header()}, nil
+}
+
+func (es *electionServer) session(ctx context.Context, lease int64) (*concurrency.Session, error) {
+ s, err := concurrency.NewSession(
+ es.c,
+ concurrency.WithLease(clientv3.LeaseID(lease)),
+ concurrency.WithContext(ctx),
+ )
+ if err != nil {
+ return nil, err
+ }
+ s.Orphan()
+ return s, nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/gw/v3election.pb.gw.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/gw/v3election.pb.gw.go
new file mode 100644
index 0000000..912149f
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/gw/v3election.pb.gw.go
@@ -0,0 +1,395 @@
+// Code generated by protoc-gen-grpc-gateway. DO NOT EDIT.
+// source: server/etcdserver/api/v3election/v3electionpb/v3election.proto
+
+/*
+Package v3electionpb is a reverse proxy.
+
+It translates gRPC into RESTful JSON APIs.
+*/
+package gw
+
+import (
+ protov1 "github.com/golang/protobuf/proto"
+
+ "context"
+ "errors"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb"
+ "io"
+ "net/http"
+
+ "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
+ "github.com/grpc-ecosystem/grpc-gateway/v2/utilities"
+ "google.golang.org/grpc"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/grpclog"
+ "google.golang.org/grpc/metadata"
+ "google.golang.org/grpc/status"
+ "google.golang.org/protobuf/proto"
+)
+
+// Suppress "imported and not used" errors
+var (
+ _ codes.Code
+ _ io.Reader
+ _ status.Status
+ _ = errors.New
+ _ = runtime.String
+ _ = utilities.NewDoubleArray
+ _ = metadata.Join
+)
+
+func request_Election_Campaign_0(ctx context.Context, marshaler runtime.Marshaler, client v3electionpb.ElectionClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.CampaignRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := client.Campaign(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func local_request_Election_Campaign_0(ctx context.Context, marshaler runtime.Marshaler, server v3electionpb.ElectionServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.CampaignRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := server.Campaign(ctx, &protoReq)
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func request_Election_Proclaim_0(ctx context.Context, marshaler runtime.Marshaler, client v3electionpb.ElectionClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.ProclaimRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := client.Proclaim(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func local_request_Election_Proclaim_0(ctx context.Context, marshaler runtime.Marshaler, server v3electionpb.ElectionServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.ProclaimRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := server.Proclaim(ctx, &protoReq)
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func request_Election_Leader_0(ctx context.Context, marshaler runtime.Marshaler, client v3electionpb.ElectionClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.LeaderRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := client.Leader(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func local_request_Election_Leader_0(ctx context.Context, marshaler runtime.Marshaler, server v3electionpb.ElectionServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.LeaderRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := server.Leader(ctx, &protoReq)
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func request_Election_Observe_0(ctx context.Context, marshaler runtime.Marshaler, client v3electionpb.ElectionClient, req *http.Request, pathParams map[string]string) (v3electionpb.Election_ObserveClient, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.LeaderRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ stream, err := client.Observe(ctx, &protoReq)
+ if err != nil {
+ return nil, metadata, err
+ }
+ header, err := stream.Header()
+ if err != nil {
+ return nil, metadata, err
+ }
+ metadata.HeaderMD = header
+ return stream, metadata, nil
+}
+
+func request_Election_Resign_0(ctx context.Context, marshaler runtime.Marshaler, client v3electionpb.ElectionClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.ResignRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := client.Resign(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func local_request_Election_Resign_0(ctx context.Context, marshaler runtime.Marshaler, server v3electionpb.ElectionServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3electionpb.ResignRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := server.Resign(ctx, &protoReq)
+ return protov1.MessageV2(msg), metadata, err
+}
+
+// v3electionpb.RegisterElectionHandlerServer registers the http handlers for service Election to "mux".
+// UnaryRPC :call v3electionpb.ElectionServer directly.
+// StreamingRPC :currently unsupported pending https://github.com/grpc/grpc-go/issues/906.
+// Note that using this registration option will cause many gRPC library features to stop working. Consider using RegisterElectionHandlerFromEndpoint instead.
+// GRPC interceptors will not work for this type of registration. To use interceptors, you must use the "runtime.WithMiddlewares" option in the "runtime.NewServeMux" call.
+func RegisterElectionHandlerServer(ctx context.Context, mux *runtime.ServeMux, server v3electionpb.ElectionServer) error {
+ mux.Handle(http.MethodPost, pattern_Election_Campaign_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ var stream runtime.ServerTransportStream
+ ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/v3electionpb.Election/Campaign", runtime.WithHTTPPathPattern("/v3/election/campaign"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := local_request_Election_Campaign_0(annotatedContext, inboundMarshaler, server, req, pathParams)
+ md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Campaign_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Proclaim_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ var stream runtime.ServerTransportStream
+ ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/v3electionpb.Election/Proclaim", runtime.WithHTTPPathPattern("/v3/election/proclaim"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := local_request_Election_Proclaim_0(annotatedContext, inboundMarshaler, server, req, pathParams)
+ md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Proclaim_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Leader_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ var stream runtime.ServerTransportStream
+ ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/v3electionpb.Election/Leader", runtime.WithHTTPPathPattern("/v3/election/leader"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := local_request_Election_Leader_0(annotatedContext, inboundMarshaler, server, req, pathParams)
+ md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Leader_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+
+ mux.Handle(http.MethodPost, pattern_Election_Observe_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ err := status.Error(codes.Unimplemented, "streaming calls are not yet supported in the in-process transport")
+ _, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Resign_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ var stream runtime.ServerTransportStream
+ ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/v3electionpb.Election/Resign", runtime.WithHTTPPathPattern("/v3/election/resign"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := local_request_Election_Resign_0(annotatedContext, inboundMarshaler, server, req, pathParams)
+ md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Resign_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+
+ return nil
+}
+
+// RegisterElectionHandlerFromEndpoint is same as RegisterElectionHandler but
+// automatically dials to "endpoint" and closes the connection when "ctx" gets done.
+func RegisterElectionHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
+ conn, err := grpc.NewClient(endpoint, opts...)
+ if err != nil {
+ return err
+ }
+ defer func() {
+ if err != nil {
+ if cerr := conn.Close(); cerr != nil {
+ grpclog.Errorf("Failed to close conn to %s: %v", endpoint, cerr)
+ }
+ return
+ }
+ go func() {
+ <-ctx.Done()
+ if cerr := conn.Close(); cerr != nil {
+ grpclog.Errorf("Failed to close conn to %s: %v", endpoint, cerr)
+ }
+ }()
+ }()
+ return RegisterElectionHandler(ctx, mux, conn)
+}
+
+// RegisterElectionHandler registers the http handlers for service Election to "mux".
+// The handlers forward requests to the grpc endpoint over "conn".
+func RegisterElectionHandler(ctx context.Context, mux *runtime.ServeMux, conn *grpc.ClientConn) error {
+ return RegisterElectionHandlerClient(ctx, mux, v3electionpb.NewElectionClient(conn))
+}
+
+// v3electionpb.RegisterElectionHandlerClient registers the http handlers for service Election
+// to "mux". The handlers forward requests to the grpc endpoint over the given implementation of "ElectionClient".
+// Note: the gRPC framework executes interceptors within the gRPC handler. If the passed in "ElectionClient"
+// doesn't go through the normal gRPC flow (creating a gRPC client etc.) then it will be up to the passed in
+// "ElectionClient" to call the correct interceptors. This client ignores the HTTP middlewares.
+func RegisterElectionHandlerClient(ctx context.Context, mux *runtime.ServeMux, client v3electionpb.ElectionClient) error {
+ mux.Handle(http.MethodPost, pattern_Election_Campaign_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3electionpb.Election/Campaign", runtime.WithHTTPPathPattern("/v3/election/campaign"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Election_Campaign_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Campaign_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Proclaim_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3electionpb.Election/Proclaim", runtime.WithHTTPPathPattern("/v3/election/proclaim"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Election_Proclaim_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Proclaim_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Leader_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3electionpb.Election/Leader", runtime.WithHTTPPathPattern("/v3/election/leader"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Election_Leader_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Leader_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Observe_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3electionpb.Election/Observe", runtime.WithHTTPPathPattern("/v3/election/observe"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Election_Observe_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Observe_0(annotatedContext, mux, outboundMarshaler, w, req, func() (proto.Message, error) {
+ m1, err := resp.Recv()
+ return protov1.MessageV2(m1), err
+ }, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Election_Resign_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3electionpb.Election/Resign", runtime.WithHTTPPathPattern("/v3/election/resign"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Election_Resign_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Election_Resign_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ return nil
+}
+
+var (
+ pattern_Election_Campaign_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"v3", "election", "campaign"}, ""))
+ pattern_Election_Proclaim_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"v3", "election", "proclaim"}, ""))
+ pattern_Election_Leader_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"v3", "election", "leader"}, ""))
+ pattern_Election_Observe_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"v3", "election", "observe"}, ""))
+ pattern_Election_Resign_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"v3", "election", "resign"}, ""))
+)
+
+var (
+ forward_Election_Campaign_0 = runtime.ForwardResponseMessage
+ forward_Election_Proclaim_0 = runtime.ForwardResponseMessage
+ forward_Election_Leader_0 = runtime.ForwardResponseMessage
+ forward_Election_Observe_0 = runtime.ForwardResponseStream
+ forward_Election_Resign_0 = runtime.ForwardResponseMessage
+)
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/v3election.pb.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/v3election.pb.go
new file mode 100644
index 0000000..02369cd
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/v3election.pb.go
@@ -0,0 +1,2542 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: v3election.proto
+
+package v3electionpb
+
+import (
+ context "context"
+ fmt "fmt"
+ io "io"
+ math "math"
+ math_bits "math/bits"
+
+ _ "github.com/gogo/protobuf/gogoproto"
+ proto "github.com/golang/protobuf/proto"
+ etcdserverpb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ mvccpb "go.etcd.io/etcd/api/v3/mvccpb"
+ _ "google.golang.org/genproto/googleapis/api/annotations"
+ grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+)
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
+
+type CampaignRequest struct {
+ // name is the election's identifier for the campaign.
+ Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+ // lease is the ID of the lease attached to leadership of the election. If the
+ // lease expires or is revoked before resigning leadership, then the
+ // leadership is transferred to the next campaigner, if any.
+ Lease int64 `protobuf:"varint,2,opt,name=lease,proto3" json:"lease,omitempty"`
+ // value is the initial proclaimed value set when the campaigner wins the
+ // election.
+ Value []byte `protobuf:"bytes,3,opt,name=value,proto3" json:"value,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *CampaignRequest) Reset() { *m = CampaignRequest{} }
+func (m *CampaignRequest) String() string { return proto.CompactTextString(m) }
+func (*CampaignRequest) ProtoMessage() {}
+func (*CampaignRequest) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{0}
+}
+func (m *CampaignRequest) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *CampaignRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_CampaignRequest.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *CampaignRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CampaignRequest.Merge(m, src)
+}
+func (m *CampaignRequest) XXX_Size() int {
+ return m.Size()
+}
+func (m *CampaignRequest) XXX_DiscardUnknown() {
+ xxx_messageInfo_CampaignRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_CampaignRequest proto.InternalMessageInfo
+
+func (m *CampaignRequest) GetName() []byte {
+ if m != nil {
+ return m.Name
+ }
+ return nil
+}
+
+func (m *CampaignRequest) GetLease() int64 {
+ if m != nil {
+ return m.Lease
+ }
+ return 0
+}
+
+func (m *CampaignRequest) GetValue() []byte {
+ if m != nil {
+ return m.Value
+ }
+ return nil
+}
+
+type CampaignResponse struct {
+ Header *etcdserverpb.ResponseHeader `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
+ // leader describes the resources used for holding leadereship of the election.
+ Leader *LeaderKey `protobuf:"bytes,2,opt,name=leader,proto3" json:"leader,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *CampaignResponse) Reset() { *m = CampaignResponse{} }
+func (m *CampaignResponse) String() string { return proto.CompactTextString(m) }
+func (*CampaignResponse) ProtoMessage() {}
+func (*CampaignResponse) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{1}
+}
+func (m *CampaignResponse) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *CampaignResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_CampaignResponse.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *CampaignResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CampaignResponse.Merge(m, src)
+}
+func (m *CampaignResponse) XXX_Size() int {
+ return m.Size()
+}
+func (m *CampaignResponse) XXX_DiscardUnknown() {
+ xxx_messageInfo_CampaignResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_CampaignResponse proto.InternalMessageInfo
+
+func (m *CampaignResponse) GetHeader() *etcdserverpb.ResponseHeader {
+ if m != nil {
+ return m.Header
+ }
+ return nil
+}
+
+func (m *CampaignResponse) GetLeader() *LeaderKey {
+ if m != nil {
+ return m.Leader
+ }
+ return nil
+}
+
+type LeaderKey struct {
+ // name is the election identifier that corresponds to the leadership key.
+ Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+ // key is an opaque key representing the ownership of the election. If the key
+ // is deleted, then leadership is lost.
+ Key []byte `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
+ // rev is the creation revision of the key. It can be used to test for ownership
+ // of an election during transactions by testing the key's creation revision
+ // matches rev.
+ Rev int64 `protobuf:"varint,3,opt,name=rev,proto3" json:"rev,omitempty"`
+ // lease is the lease ID of the election leader.
+ Lease int64 `protobuf:"varint,4,opt,name=lease,proto3" json:"lease,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *LeaderKey) Reset() { *m = LeaderKey{} }
+func (m *LeaderKey) String() string { return proto.CompactTextString(m) }
+func (*LeaderKey) ProtoMessage() {}
+func (*LeaderKey) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{2}
+}
+func (m *LeaderKey) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *LeaderKey) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_LeaderKey.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *LeaderKey) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LeaderKey.Merge(m, src)
+}
+func (m *LeaderKey) XXX_Size() int {
+ return m.Size()
+}
+func (m *LeaderKey) XXX_DiscardUnknown() {
+ xxx_messageInfo_LeaderKey.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_LeaderKey proto.InternalMessageInfo
+
+func (m *LeaderKey) GetName() []byte {
+ if m != nil {
+ return m.Name
+ }
+ return nil
+}
+
+func (m *LeaderKey) GetKey() []byte {
+ if m != nil {
+ return m.Key
+ }
+ return nil
+}
+
+func (m *LeaderKey) GetRev() int64 {
+ if m != nil {
+ return m.Rev
+ }
+ return 0
+}
+
+func (m *LeaderKey) GetLease() int64 {
+ if m != nil {
+ return m.Lease
+ }
+ return 0
+}
+
+type LeaderRequest struct {
+ // name is the election identifier for the leadership information.
+ Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *LeaderRequest) Reset() { *m = LeaderRequest{} }
+func (m *LeaderRequest) String() string { return proto.CompactTextString(m) }
+func (*LeaderRequest) ProtoMessage() {}
+func (*LeaderRequest) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{3}
+}
+func (m *LeaderRequest) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *LeaderRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_LeaderRequest.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *LeaderRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LeaderRequest.Merge(m, src)
+}
+func (m *LeaderRequest) XXX_Size() int {
+ return m.Size()
+}
+func (m *LeaderRequest) XXX_DiscardUnknown() {
+ xxx_messageInfo_LeaderRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_LeaderRequest proto.InternalMessageInfo
+
+func (m *LeaderRequest) GetName() []byte {
+ if m != nil {
+ return m.Name
+ }
+ return nil
+}
+
+type LeaderResponse struct {
+ Header *etcdserverpb.ResponseHeader `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
+ // kv is the key-value pair representing the latest leader update.
+ Kv *mvccpb.KeyValue `protobuf:"bytes,2,opt,name=kv,proto3" json:"kv,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *LeaderResponse) Reset() { *m = LeaderResponse{} }
+func (m *LeaderResponse) String() string { return proto.CompactTextString(m) }
+func (*LeaderResponse) ProtoMessage() {}
+func (*LeaderResponse) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{4}
+}
+func (m *LeaderResponse) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *LeaderResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_LeaderResponse.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *LeaderResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LeaderResponse.Merge(m, src)
+}
+func (m *LeaderResponse) XXX_Size() int {
+ return m.Size()
+}
+func (m *LeaderResponse) XXX_DiscardUnknown() {
+ xxx_messageInfo_LeaderResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_LeaderResponse proto.InternalMessageInfo
+
+func (m *LeaderResponse) GetHeader() *etcdserverpb.ResponseHeader {
+ if m != nil {
+ return m.Header
+ }
+ return nil
+}
+
+func (m *LeaderResponse) GetKv() *mvccpb.KeyValue {
+ if m != nil {
+ return m.Kv
+ }
+ return nil
+}
+
+type ResignRequest struct {
+ // leader is the leadership to relinquish by resignation.
+ Leader *LeaderKey `protobuf:"bytes,1,opt,name=leader,proto3" json:"leader,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *ResignRequest) Reset() { *m = ResignRequest{} }
+func (m *ResignRequest) String() string { return proto.CompactTextString(m) }
+func (*ResignRequest) ProtoMessage() {}
+func (*ResignRequest) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{5}
+}
+func (m *ResignRequest) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *ResignRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_ResignRequest.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *ResignRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ResignRequest.Merge(m, src)
+}
+func (m *ResignRequest) XXX_Size() int {
+ return m.Size()
+}
+func (m *ResignRequest) XXX_DiscardUnknown() {
+ xxx_messageInfo_ResignRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ResignRequest proto.InternalMessageInfo
+
+func (m *ResignRequest) GetLeader() *LeaderKey {
+ if m != nil {
+ return m.Leader
+ }
+ return nil
+}
+
+type ResignResponse struct {
+ Header *etcdserverpb.ResponseHeader `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *ResignResponse) Reset() { *m = ResignResponse{} }
+func (m *ResignResponse) String() string { return proto.CompactTextString(m) }
+func (*ResignResponse) ProtoMessage() {}
+func (*ResignResponse) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{6}
+}
+func (m *ResignResponse) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *ResignResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_ResignResponse.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *ResignResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ResignResponse.Merge(m, src)
+}
+func (m *ResignResponse) XXX_Size() int {
+ return m.Size()
+}
+func (m *ResignResponse) XXX_DiscardUnknown() {
+ xxx_messageInfo_ResignResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ResignResponse proto.InternalMessageInfo
+
+func (m *ResignResponse) GetHeader() *etcdserverpb.ResponseHeader {
+ if m != nil {
+ return m.Header
+ }
+ return nil
+}
+
+type ProclaimRequest struct {
+ // leader is the leadership hold on the election.
+ Leader *LeaderKey `protobuf:"bytes,1,opt,name=leader,proto3" json:"leader,omitempty"`
+ // value is an update meant to overwrite the leader's current value.
+ Value []byte `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *ProclaimRequest) Reset() { *m = ProclaimRequest{} }
+func (m *ProclaimRequest) String() string { return proto.CompactTextString(m) }
+func (*ProclaimRequest) ProtoMessage() {}
+func (*ProclaimRequest) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{7}
+}
+func (m *ProclaimRequest) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *ProclaimRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_ProclaimRequest.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *ProclaimRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProclaimRequest.Merge(m, src)
+}
+func (m *ProclaimRequest) XXX_Size() int {
+ return m.Size()
+}
+func (m *ProclaimRequest) XXX_DiscardUnknown() {
+ xxx_messageInfo_ProclaimRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ProclaimRequest proto.InternalMessageInfo
+
+func (m *ProclaimRequest) GetLeader() *LeaderKey {
+ if m != nil {
+ return m.Leader
+ }
+ return nil
+}
+
+func (m *ProclaimRequest) GetValue() []byte {
+ if m != nil {
+ return m.Value
+ }
+ return nil
+}
+
+type ProclaimResponse struct {
+ Header *etcdserverpb.ResponseHeader `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *ProclaimResponse) Reset() { *m = ProclaimResponse{} }
+func (m *ProclaimResponse) String() string { return proto.CompactTextString(m) }
+func (*ProclaimResponse) ProtoMessage() {}
+func (*ProclaimResponse) Descriptor() ([]byte, []int) {
+ return fileDescriptor_c9b1f26cc432a035, []int{8}
+}
+func (m *ProclaimResponse) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *ProclaimResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_ProclaimResponse.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *ProclaimResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProclaimResponse.Merge(m, src)
+}
+func (m *ProclaimResponse) XXX_Size() int {
+ return m.Size()
+}
+func (m *ProclaimResponse) XXX_DiscardUnknown() {
+ xxx_messageInfo_ProclaimResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ProclaimResponse proto.InternalMessageInfo
+
+func (m *ProclaimResponse) GetHeader() *etcdserverpb.ResponseHeader {
+ if m != nil {
+ return m.Header
+ }
+ return nil
+}
+
+func init() {
+ proto.RegisterType((*CampaignRequest)(nil), "v3electionpb.CampaignRequest")
+ proto.RegisterType((*CampaignResponse)(nil), "v3electionpb.CampaignResponse")
+ proto.RegisterType((*LeaderKey)(nil), "v3electionpb.LeaderKey")
+ proto.RegisterType((*LeaderRequest)(nil), "v3electionpb.LeaderRequest")
+ proto.RegisterType((*LeaderResponse)(nil), "v3electionpb.LeaderResponse")
+ proto.RegisterType((*ResignRequest)(nil), "v3electionpb.ResignRequest")
+ proto.RegisterType((*ResignResponse)(nil), "v3electionpb.ResignResponse")
+ proto.RegisterType((*ProclaimRequest)(nil), "v3electionpb.ProclaimRequest")
+ proto.RegisterType((*ProclaimResponse)(nil), "v3electionpb.ProclaimResponse")
+}
+
+func init() { proto.RegisterFile("v3election.proto", fileDescriptor_c9b1f26cc432a035) }
+
+var fileDescriptor_c9b1f26cc432a035 = []byte{
+ // 556 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x94, 0x41, 0x6f, 0xd3, 0x30,
+ 0x14, 0xc7, 0x71, 0x5a, 0xca, 0x78, 0x74, 0x5b, 0x15, 0x8a, 0x28, 0xa5, 0x64, 0x95, 0xb9, 0x4c,
+ 0x3d, 0xc4, 0x68, 0xe5, 0xd4, 0xd3, 0x04, 0x02, 0x4d, 0x1a, 0x12, 0xe0, 0x03, 0x02, 0x6e, 0x6e,
+ 0xf6, 0x94, 0x55, 0x4d, 0xe3, 0x90, 0x74, 0x91, 0x7a, 0xe5, 0x2b, 0x70, 0x80, 0x8f, 0xc4, 0x11,
+ 0x89, 0x2f, 0x80, 0x0a, 0x1f, 0x04, 0xd9, 0x4e, 0x9a, 0x34, 0x6a, 0x11, 0x5a, 0x6f, 0x8e, 0xdf,
+ 0xdf, 0xef, 0xf7, 0xfe, 0xcf, 0x2f, 0x86, 0x56, 0x3a, 0xc4, 0x00, 0xbd, 0xf9, 0x44, 0x86, 0x6e,
+ 0x14, 0xcb, 0xb9, 0xb4, 0x9b, 0xc5, 0x4e, 0x34, 0xee, 0xb6, 0x7d, 0xe9, 0x4b, 0x1d, 0x60, 0x6a,
+ 0x65, 0x34, 0xdd, 0x23, 0x9c, 0x7b, 0x17, 0x4c, 0x44, 0x13, 0xa6, 0x16, 0x09, 0xc6, 0x29, 0xc6,
+ 0xd1, 0x98, 0xc5, 0x91, 0x97, 0x09, 0x3a, 0x2b, 0xc1, 0x2c, 0xf5, 0xbc, 0x68, 0xcc, 0xa6, 0x69,
+ 0x16, 0xe9, 0xf9, 0x52, 0xfa, 0x01, 0xea, 0x98, 0x08, 0x43, 0x39, 0x17, 0x8a, 0x94, 0x98, 0x28,
+ 0x7d, 0x0b, 0x87, 0xcf, 0xc5, 0x2c, 0x12, 0x13, 0x3f, 0xe4, 0xf8, 0xe9, 0x0a, 0x93, 0xb9, 0x6d,
+ 0x43, 0x3d, 0x14, 0x33, 0xec, 0x90, 0x3e, 0x39, 0x6e, 0x72, 0xbd, 0xb6, 0xdb, 0x70, 0x33, 0x40,
+ 0x91, 0x60, 0xc7, 0xea, 0x93, 0xe3, 0x1a, 0x37, 0x1f, 0x6a, 0x37, 0x15, 0xc1, 0x15, 0x76, 0x6a,
+ 0x5a, 0x6a, 0x3e, 0xe8, 0x02, 0x5a, 0x45, 0xca, 0x24, 0x92, 0x61, 0x82, 0xf6, 0x53, 0x68, 0x5c,
+ 0xa2, 0xb8, 0xc0, 0x58, 0x67, 0xbd, 0x73, 0xd2, 0x73, 0xcb, 0x3e, 0xdc, 0x5c, 0x77, 0xa6, 0x35,
+ 0x3c, 0xd3, 0xda, 0x0c, 0x1a, 0x81, 0x39, 0x65, 0xe9, 0x53, 0xf7, 0xdd, 0x72, 0xab, 0xdc, 0x57,
+ 0x3a, 0x76, 0x8e, 0x0b, 0x9e, 0xc9, 0xe8, 0x07, 0xb8, 0xbd, 0xda, 0xdc, 0xe8, 0xa3, 0x05, 0xb5,
+ 0x29, 0x2e, 0x74, 0xba, 0x26, 0x57, 0x4b, 0xb5, 0x13, 0x63, 0xaa, 0x1d, 0xd4, 0xb8, 0x5a, 0x16,
+ 0x5e, 0xeb, 0x25, 0xaf, 0xf4, 0x31, 0xec, 0x9b, 0xd4, 0xff, 0x68, 0x13, 0xbd, 0x84, 0x83, 0x5c,
+ 0xb4, 0x93, 0xf1, 0x3e, 0x58, 0xd3, 0x34, 0x33, 0xdd, 0x72, 0xcd, 0x8d, 0xba, 0xe7, 0xb8, 0x78,
+ 0xa7, 0x1a, 0xcc, 0xad, 0x69, 0x4a, 0x4f, 0x61, 0x9f, 0x63, 0x52, 0xba, 0xb5, 0xa2, 0x57, 0xe4,
+ 0xff, 0x7a, 0xf5, 0x12, 0x0e, 0xf2, 0x0c, 0xbb, 0xd4, 0x4a, 0xdf, 0xc3, 0xe1, 0x9b, 0x58, 0x7a,
+ 0x81, 0x98, 0xcc, 0xae, 0x5b, 0x4b, 0x31, 0x48, 0x56, 0x79, 0x90, 0xce, 0xa0, 0x55, 0x64, 0xde,
+ 0xa5, 0xc6, 0x93, 0xaf, 0x75, 0xd8, 0x7b, 0x91, 0x15, 0x60, 0x4f, 0x61, 0x2f, 0x9f, 0x4f, 0xfb,
+ 0xd1, 0x7a, 0x65, 0x95, 0x5f, 0xa1, 0xeb, 0x6c, 0x0b, 0x1b, 0x0a, 0xed, 0x7f, 0xfe, 0xf9, 0xe7,
+ 0x8b, 0xd5, 0xa5, 0xf7, 0x58, 0x3a, 0x64, 0xb9, 0x90, 0x79, 0x99, 0x6c, 0x44, 0x06, 0x0a, 0x96,
+ 0x7b, 0xa8, 0xc2, 0x2a, 0x5d, 0xab, 0xc2, 0xaa, 0xd6, 0xb7, 0xc0, 0xa2, 0x4c, 0xa6, 0x60, 0x1e,
+ 0x34, 0x4c, 0x6f, 0xed, 0x87, 0x9b, 0x3a, 0x9e, 0x83, 0x7a, 0x9b, 0x83, 0x19, 0xc6, 0xd1, 0x98,
+ 0x0e, 0xbd, 0xbb, 0x86, 0x31, 0x17, 0xa5, 0x20, 0x3e, 0xdc, 0x7a, 0x3d, 0xd6, 0x0d, 0xdf, 0x85,
+ 0x72, 0xa4, 0x29, 0x0f, 0x68, 0x7b, 0x8d, 0x22, 0x4d, 0xe2, 0x11, 0x19, 0x3c, 0x21, 0xca, 0x8d,
+ 0x19, 0xd0, 0x2a, 0x67, 0x6d, 0xf0, 0xab, 0x9c, 0xf5, 0x99, 0xde, 0xe2, 0x26, 0xd6, 0xa2, 0x11,
+ 0x19, 0x3c, 0xe3, 0xdf, 0x97, 0x0e, 0xf9, 0xb1, 0x74, 0xc8, 0xaf, 0xa5, 0x43, 0xbe, 0xfd, 0x76,
+ 0x6e, 0x7c, 0x3c, 0xf5, 0xa5, 0x9e, 0x29, 0x77, 0x22, 0xf5, 0x63, 0xcb, 0xcc, 0x70, 0xe9, 0xf3,
+ 0xab, 0x51, 0xd3, 0xaf, 0x69, 0xc1, 0x65, 0xe5, 0x12, 0xc6, 0x0d, 0xfd, 0xb4, 0x0e, 0xff, 0x06,
+ 0x00, 0x00, 0xff, 0xff, 0xcd, 0x58, 0x82, 0xe2, 0xeb, 0x05, 0x00, 0x00,
+}
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ context.Context
+var _ grpc.ClientConn
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+const _ = grpc.SupportPackageIsVersion4
+
+// ElectionClient is the client API for Election service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
+type ElectionClient interface {
+ // Campaign waits to acquire leadership in an election, returning a LeaderKey
+ // representing the leadership if successful. The LeaderKey can then be used
+ // to issue new values on the election, transactionally guard API requests on
+ // leadership still being held, and resign from the election.
+ Campaign(ctx context.Context, in *CampaignRequest, opts ...grpc.CallOption) (*CampaignResponse, error)
+ // Proclaim updates the leader's posted value with a new value.
+ Proclaim(ctx context.Context, in *ProclaimRequest, opts ...grpc.CallOption) (*ProclaimResponse, error)
+ // Leader returns the current election proclamation, if any.
+ Leader(ctx context.Context, in *LeaderRequest, opts ...grpc.CallOption) (*LeaderResponse, error)
+ // Observe streams election proclamations in-order as made by the election's
+ // elected leaders.
+ Observe(ctx context.Context, in *LeaderRequest, opts ...grpc.CallOption) (Election_ObserveClient, error)
+ // Resign releases election leadership so other campaigners may acquire
+ // leadership on the election.
+ Resign(ctx context.Context, in *ResignRequest, opts ...grpc.CallOption) (*ResignResponse, error)
+}
+
+type electionClient struct {
+ cc *grpc.ClientConn
+}
+
+func NewElectionClient(cc *grpc.ClientConn) ElectionClient {
+ return &electionClient{cc}
+}
+
+func (c *electionClient) Campaign(ctx context.Context, in *CampaignRequest, opts ...grpc.CallOption) (*CampaignResponse, error) {
+ out := new(CampaignResponse)
+ err := c.cc.Invoke(ctx, "/v3electionpb.Election/Campaign", in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *electionClient) Proclaim(ctx context.Context, in *ProclaimRequest, opts ...grpc.CallOption) (*ProclaimResponse, error) {
+ out := new(ProclaimResponse)
+ err := c.cc.Invoke(ctx, "/v3electionpb.Election/Proclaim", in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *electionClient) Leader(ctx context.Context, in *LeaderRequest, opts ...grpc.CallOption) (*LeaderResponse, error) {
+ out := new(LeaderResponse)
+ err := c.cc.Invoke(ctx, "/v3electionpb.Election/Leader", in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *electionClient) Observe(ctx context.Context, in *LeaderRequest, opts ...grpc.CallOption) (Election_ObserveClient, error) {
+ stream, err := c.cc.NewStream(ctx, &_Election_serviceDesc.Streams[0], "/v3electionpb.Election/Observe", opts...)
+ if err != nil {
+ return nil, err
+ }
+ x := &electionObserveClient{stream}
+ if err := x.ClientStream.SendMsg(in); err != nil {
+ return nil, err
+ }
+ if err := x.ClientStream.CloseSend(); err != nil {
+ return nil, err
+ }
+ return x, nil
+}
+
+type Election_ObserveClient interface {
+ Recv() (*LeaderResponse, error)
+ grpc.ClientStream
+}
+
+type electionObserveClient struct {
+ grpc.ClientStream
+}
+
+func (x *electionObserveClient) Recv() (*LeaderResponse, error) {
+ m := new(LeaderResponse)
+ if err := x.ClientStream.RecvMsg(m); err != nil {
+ return nil, err
+ }
+ return m, nil
+}
+
+func (c *electionClient) Resign(ctx context.Context, in *ResignRequest, opts ...grpc.CallOption) (*ResignResponse, error) {
+ out := new(ResignResponse)
+ err := c.cc.Invoke(ctx, "/v3electionpb.Election/Resign", in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+// ElectionServer is the server API for Election service.
+type ElectionServer interface {
+ // Campaign waits to acquire leadership in an election, returning a LeaderKey
+ // representing the leadership if successful. The LeaderKey can then be used
+ // to issue new values on the election, transactionally guard API requests on
+ // leadership still being held, and resign from the election.
+ Campaign(context.Context, *CampaignRequest) (*CampaignResponse, error)
+ // Proclaim updates the leader's posted value with a new value.
+ Proclaim(context.Context, *ProclaimRequest) (*ProclaimResponse, error)
+ // Leader returns the current election proclamation, if any.
+ Leader(context.Context, *LeaderRequest) (*LeaderResponse, error)
+ // Observe streams election proclamations in-order as made by the election's
+ // elected leaders.
+ Observe(*LeaderRequest, Election_ObserveServer) error
+ // Resign releases election leadership so other campaigners may acquire
+ // leadership on the election.
+ Resign(context.Context, *ResignRequest) (*ResignResponse, error)
+}
+
+// UnimplementedElectionServer can be embedded to have forward compatible implementations.
+type UnimplementedElectionServer struct {
+}
+
+func (*UnimplementedElectionServer) Campaign(ctx context.Context, req *CampaignRequest) (*CampaignResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Campaign not implemented")
+}
+func (*UnimplementedElectionServer) Proclaim(ctx context.Context, req *ProclaimRequest) (*ProclaimResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Proclaim not implemented")
+}
+func (*UnimplementedElectionServer) Leader(ctx context.Context, req *LeaderRequest) (*LeaderResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Leader not implemented")
+}
+func (*UnimplementedElectionServer) Observe(req *LeaderRequest, srv Election_ObserveServer) error {
+ return status.Errorf(codes.Unimplemented, "method Observe not implemented")
+}
+func (*UnimplementedElectionServer) Resign(ctx context.Context, req *ResignRequest) (*ResignResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Resign not implemented")
+}
+
+func RegisterElectionServer(s *grpc.Server, srv ElectionServer) {
+ s.RegisterService(&_Election_serviceDesc, srv)
+}
+
+func _Election_Campaign_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(CampaignRequest)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(ElectionServer).Campaign(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: "/v3electionpb.Election/Campaign",
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(ElectionServer).Campaign(ctx, req.(*CampaignRequest))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Election_Proclaim_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(ProclaimRequest)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(ElectionServer).Proclaim(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: "/v3electionpb.Election/Proclaim",
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(ElectionServer).Proclaim(ctx, req.(*ProclaimRequest))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Election_Leader_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(LeaderRequest)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(ElectionServer).Leader(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: "/v3electionpb.Election/Leader",
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(ElectionServer).Leader(ctx, req.(*LeaderRequest))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Election_Observe_Handler(srv interface{}, stream grpc.ServerStream) error {
+ m := new(LeaderRequest)
+ if err := stream.RecvMsg(m); err != nil {
+ return err
+ }
+ return srv.(ElectionServer).Observe(m, &electionObserveServer{stream})
+}
+
+type Election_ObserveServer interface {
+ Send(*LeaderResponse) error
+ grpc.ServerStream
+}
+
+type electionObserveServer struct {
+ grpc.ServerStream
+}
+
+func (x *electionObserveServer) Send(m *LeaderResponse) error {
+ return x.ServerStream.SendMsg(m)
+}
+
+func _Election_Resign_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(ResignRequest)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(ElectionServer).Resign(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: "/v3electionpb.Election/Resign",
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(ElectionServer).Resign(ctx, req.(*ResignRequest))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+var _Election_serviceDesc = grpc.ServiceDesc{
+ ServiceName: "v3electionpb.Election",
+ HandlerType: (*ElectionServer)(nil),
+ Methods: []grpc.MethodDesc{
+ {
+ MethodName: "Campaign",
+ Handler: _Election_Campaign_Handler,
+ },
+ {
+ MethodName: "Proclaim",
+ Handler: _Election_Proclaim_Handler,
+ },
+ {
+ MethodName: "Leader",
+ Handler: _Election_Leader_Handler,
+ },
+ {
+ MethodName: "Resign",
+ Handler: _Election_Resign_Handler,
+ },
+ },
+ Streams: []grpc.StreamDesc{
+ {
+ StreamName: "Observe",
+ Handler: _Election_Observe_Handler,
+ ServerStreams: true,
+ },
+ },
+ Metadata: "v3election.proto",
+}
+
+func (m *CampaignRequest) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *CampaignRequest) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *CampaignRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if len(m.Value) > 0 {
+ i -= len(m.Value)
+ copy(dAtA[i:], m.Value)
+ i = encodeVarintV3Election(dAtA, i, uint64(len(m.Value)))
+ i--
+ dAtA[i] = 0x1a
+ }
+ if m.Lease != 0 {
+ i = encodeVarintV3Election(dAtA, i, uint64(m.Lease))
+ i--
+ dAtA[i] = 0x10
+ }
+ if len(m.Name) > 0 {
+ i -= len(m.Name)
+ copy(dAtA[i:], m.Name)
+ i = encodeVarintV3Election(dAtA, i, uint64(len(m.Name)))
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *CampaignResponse) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *CampaignResponse) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *CampaignResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Leader != nil {
+ {
+ size, err := m.Leader.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0x12
+ }
+ if m.Header != nil {
+ {
+ size, err := m.Header.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *LeaderKey) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *LeaderKey) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LeaderKey) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Lease != 0 {
+ i = encodeVarintV3Election(dAtA, i, uint64(m.Lease))
+ i--
+ dAtA[i] = 0x20
+ }
+ if m.Rev != 0 {
+ i = encodeVarintV3Election(dAtA, i, uint64(m.Rev))
+ i--
+ dAtA[i] = 0x18
+ }
+ if len(m.Key) > 0 {
+ i -= len(m.Key)
+ copy(dAtA[i:], m.Key)
+ i = encodeVarintV3Election(dAtA, i, uint64(len(m.Key)))
+ i--
+ dAtA[i] = 0x12
+ }
+ if len(m.Name) > 0 {
+ i -= len(m.Name)
+ copy(dAtA[i:], m.Name)
+ i = encodeVarintV3Election(dAtA, i, uint64(len(m.Name)))
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *LeaderRequest) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *LeaderRequest) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LeaderRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if len(m.Name) > 0 {
+ i -= len(m.Name)
+ copy(dAtA[i:], m.Name)
+ i = encodeVarintV3Election(dAtA, i, uint64(len(m.Name)))
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *LeaderResponse) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *LeaderResponse) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LeaderResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Kv != nil {
+ {
+ size, err := m.Kv.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0x12
+ }
+ if m.Header != nil {
+ {
+ size, err := m.Header.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *ResignRequest) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *ResignRequest) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ResignRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Leader != nil {
+ {
+ size, err := m.Leader.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *ResignResponse) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *ResignResponse) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ResignResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Header != nil {
+ {
+ size, err := m.Header.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *ProclaimRequest) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *ProclaimRequest) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ProclaimRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if len(m.Value) > 0 {
+ i -= len(m.Value)
+ copy(dAtA[i:], m.Value)
+ i = encodeVarintV3Election(dAtA, i, uint64(len(m.Value)))
+ i--
+ dAtA[i] = 0x12
+ }
+ if m.Leader != nil {
+ {
+ size, err := m.Leader.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *ProclaimResponse) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *ProclaimResponse) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ProclaimResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Header != nil {
+ {
+ size, err := m.Header.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Election(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func encodeVarintV3Election(dAtA []byte, offset int, v uint64) int {
+ offset -= sovV3Election(v)
+ base := offset
+ for v >= 1<<7 {
+ dAtA[offset] = uint8(v&0x7f | 0x80)
+ v >>= 7
+ offset++
+ }
+ dAtA[offset] = uint8(v)
+ return base
+}
+func (m *CampaignRequest) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ l = len(m.Name)
+ if l > 0 {
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.Lease != 0 {
+ n += 1 + sovV3Election(uint64(m.Lease))
+ }
+ l = len(m.Value)
+ if l > 0 {
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *CampaignResponse) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Header != nil {
+ l = m.Header.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.Leader != nil {
+ l = m.Leader.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *LeaderKey) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ l = len(m.Name)
+ if l > 0 {
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ l = len(m.Key)
+ if l > 0 {
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.Rev != 0 {
+ n += 1 + sovV3Election(uint64(m.Rev))
+ }
+ if m.Lease != 0 {
+ n += 1 + sovV3Election(uint64(m.Lease))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *LeaderRequest) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ l = len(m.Name)
+ if l > 0 {
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *LeaderResponse) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Header != nil {
+ l = m.Header.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.Kv != nil {
+ l = m.Kv.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *ResignRequest) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Leader != nil {
+ l = m.Leader.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *ResignResponse) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Header != nil {
+ l = m.Header.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *ProclaimRequest) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Leader != nil {
+ l = m.Leader.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ l = len(m.Value)
+ if l > 0 {
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *ProclaimResponse) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Header != nil {
+ l = m.Header.Size()
+ n += 1 + l + sovV3Election(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func sovV3Election(x uint64) (n int) {
+ return (math_bits.Len64(x|1) + 6) / 7
+}
+func sozV3Election(x uint64) (n int) {
+ return sovV3Election(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (m *CampaignRequest) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: CampaignRequest: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: CampaignRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Name = append(m.Name[:0], dAtA[iNdEx:postIndex]...)
+ if m.Name == nil {
+ m.Name = []byte{}
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 0 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Lease", wireType)
+ }
+ m.Lease = 0
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ m.Lease |= int64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ case 3:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Value = append(m.Value[:0], dAtA[iNdEx:postIndex]...)
+ if m.Value == nil {
+ m.Value = []byte{}
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *CampaignResponse) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: CampaignResponse: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: CampaignResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Header", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Header == nil {
+ m.Header = &etcdserverpb.ResponseHeader{}
+ }
+ if err := m.Header.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Leader", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Leader == nil {
+ m.Leader = &LeaderKey{}
+ }
+ if err := m.Leader.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *LeaderKey) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: LeaderKey: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: LeaderKey: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Name = append(m.Name[:0], dAtA[iNdEx:postIndex]...)
+ if m.Name == nil {
+ m.Name = []byte{}
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Key = append(m.Key[:0], dAtA[iNdEx:postIndex]...)
+ if m.Key == nil {
+ m.Key = []byte{}
+ }
+ iNdEx = postIndex
+ case 3:
+ if wireType != 0 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Rev", wireType)
+ }
+ m.Rev = 0
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ m.Rev |= int64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ case 4:
+ if wireType != 0 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Lease", wireType)
+ }
+ m.Lease = 0
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ m.Lease |= int64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *LeaderRequest) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: LeaderRequest: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: LeaderRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Name = append(m.Name[:0], dAtA[iNdEx:postIndex]...)
+ if m.Name == nil {
+ m.Name = []byte{}
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *LeaderResponse) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: LeaderResponse: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: LeaderResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Header", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Header == nil {
+ m.Header = &etcdserverpb.ResponseHeader{}
+ }
+ if err := m.Header.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Kv", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Kv == nil {
+ m.Kv = &mvccpb.KeyValue{}
+ }
+ if err := m.Kv.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *ResignRequest) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: ResignRequest: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: ResignRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Leader", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Leader == nil {
+ m.Leader = &LeaderKey{}
+ }
+ if err := m.Leader.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *ResignResponse) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: ResignResponse: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: ResignResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Header", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Header == nil {
+ m.Header = &etcdserverpb.ResponseHeader{}
+ }
+ if err := m.Header.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *ProclaimRequest) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: ProclaimRequest: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: ProclaimRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Leader", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Leader == nil {
+ m.Leader = &LeaderKey{}
+ }
+ if err := m.Leader.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Value = append(m.Value[:0], dAtA[iNdEx:postIndex]...)
+ if m.Value == nil {
+ m.Value = []byte{}
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *ProclaimResponse) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: ProclaimResponse: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: ProclaimResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Header", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Header == nil {
+ m.Header = &etcdserverpb.ResponseHeader{}
+ }
+ if err := m.Header.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Election(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Election
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func skipV3Election(dAtA []byte) (n int, err error) {
+ l := len(dAtA)
+ iNdEx := 0
+ depth := 0
+ for iNdEx < l {
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ wireType := int(wire & 0x7)
+ switch wireType {
+ case 0:
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ iNdEx++
+ if dAtA[iNdEx-1] < 0x80 {
+ break
+ }
+ }
+ case 1:
+ iNdEx += 8
+ case 2:
+ var length int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowV3Election
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ length |= (int(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if length < 0 {
+ return 0, ErrInvalidLengthV3Election
+ }
+ iNdEx += length
+ case 3:
+ depth++
+ case 4:
+ if depth == 0 {
+ return 0, ErrUnexpectedEndOfGroupV3Election
+ }
+ depth--
+ case 5:
+ iNdEx += 4
+ default:
+ return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+ }
+ if iNdEx < 0 {
+ return 0, ErrInvalidLengthV3Election
+ }
+ if depth == 0 {
+ return iNdEx, nil
+ }
+ }
+ return 0, io.ErrUnexpectedEOF
+}
+
+var (
+ ErrInvalidLengthV3Election = fmt.Errorf("proto: negative length found during unmarshaling")
+ ErrIntOverflowV3Election = fmt.Errorf("proto: integer overflow")
+ ErrUnexpectedEndOfGroupV3Election = fmt.Errorf("proto: unexpected end of group")
+)
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/v3election.proto b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/v3election.proto
new file mode 100644
index 0000000..6042776
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb/v3election.proto
@@ -0,0 +1,121 @@
+syntax = "proto3";
+package v3electionpb;
+
+import "gogoproto/gogo.proto";
+import "etcd/api/etcdserverpb/rpc.proto";
+import "etcd/api/mvccpb/kv.proto";
+
+// for grpc-gateway
+import "google/api/annotations.proto";
+
+option go_package = "go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb";
+
+option (gogoproto.marshaler_all) = true;
+option (gogoproto.unmarshaler_all) = true;
+
+// The election service exposes client-side election facilities as a gRPC interface.
+service Election {
+ // Campaign waits to acquire leadership in an election, returning a LeaderKey
+ // representing the leadership if successful. The LeaderKey can then be used
+ // to issue new values on the election, transactionally guard API requests on
+ // leadership still being held, and resign from the election.
+ rpc Campaign(CampaignRequest) returns (CampaignResponse) {
+ option (google.api.http) = {
+ post: "/v3/election/campaign"
+ body: "*"
+ };
+ }
+ // Proclaim updates the leader's posted value with a new value.
+ rpc Proclaim(ProclaimRequest) returns (ProclaimResponse) {
+ option (google.api.http) = {
+ post: "/v3/election/proclaim"
+ body: "*"
+ };
+ }
+ // Leader returns the current election proclamation, if any.
+ rpc Leader(LeaderRequest) returns (LeaderResponse) {
+ option (google.api.http) = {
+ post: "/v3/election/leader"
+ body: "*"
+ };
+ }
+ // Observe streams election proclamations in-order as made by the election's
+ // elected leaders.
+ rpc Observe(LeaderRequest) returns (stream LeaderResponse) {
+ option (google.api.http) = {
+ post: "/v3/election/observe"
+ body: "*"
+ };
+ }
+ // Resign releases election leadership so other campaigners may acquire
+ // leadership on the election.
+ rpc Resign(ResignRequest) returns (ResignResponse) {
+ option (google.api.http) = {
+ post: "/v3/election/resign"
+ body: "*"
+ };
+ }
+}
+
+message CampaignRequest {
+ // name is the election's identifier for the campaign.
+ bytes name = 1;
+ // lease is the ID of the lease attached to leadership of the election. If the
+ // lease expires or is revoked before resigning leadership, then the
+ // leadership is transferred to the next campaigner, if any.
+ int64 lease = 2;
+ // value is the initial proclaimed value set when the campaigner wins the
+ // election.
+ bytes value = 3;
+}
+
+message CampaignResponse {
+ etcdserverpb.ResponseHeader header = 1;
+ // leader describes the resources used for holding leadereship of the election.
+ LeaderKey leader = 2;
+}
+
+message LeaderKey {
+ // name is the election identifier that corresponds to the leadership key.
+ bytes name = 1;
+ // key is an opaque key representing the ownership of the election. If the key
+ // is deleted, then leadership is lost.
+ bytes key = 2;
+ // rev is the creation revision of the key. It can be used to test for ownership
+ // of an election during transactions by testing the key's creation revision
+ // matches rev.
+ int64 rev = 3;
+ // lease is the lease ID of the election leader.
+ int64 lease = 4;
+}
+
+message LeaderRequest {
+ // name is the election identifier for the leadership information.
+ bytes name = 1;
+}
+
+message LeaderResponse {
+ etcdserverpb.ResponseHeader header = 1;
+ // kv is the key-value pair representing the latest leader update.
+ mvccpb.KeyValue kv = 2;
+}
+
+message ResignRequest {
+ // leader is the leadership to relinquish by resignation.
+ LeaderKey leader = 1;
+}
+
+message ResignResponse {
+ etcdserverpb.ResponseHeader header = 1;
+}
+
+message ProclaimRequest {
+ // leader is the leadership hold on the election.
+ LeaderKey leader = 1;
+ // value is an update meant to overwrite the leader's current value.
+ bytes value = 2;
+}
+
+message ProclaimResponse {
+ etcdserverpb.ResponseHeader header = 1;
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/doc.go
new file mode 100644
index 0000000..e0a1008
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3lock provides a v3 locking service from an etcdserver.
+package v3lock
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/lock.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/lock.go
new file mode 100644
index 0000000..c8ef56e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/lock.go
@@ -0,0 +1,56 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3lock
+
+import (
+ "context"
+
+ clientv3 "go.etcd.io/etcd/client/v3"
+ "go.etcd.io/etcd/client/v3/concurrency"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb"
+)
+
+type lockServer struct {
+ c *clientv3.Client
+}
+
+func NewLockServer(c *clientv3.Client) v3lockpb.LockServer {
+ return &lockServer{c}
+}
+
+func (ls *lockServer) Lock(ctx context.Context, req *v3lockpb.LockRequest) (*v3lockpb.LockResponse, error) {
+ s, err := concurrency.NewSession(
+ ls.c,
+ concurrency.WithLease(clientv3.LeaseID(req.Lease)),
+ concurrency.WithContext(ctx),
+ )
+ if err != nil {
+ return nil, err
+ }
+ s.Orphan()
+ m := concurrency.NewMutex(s, string(req.Name))
+ if err = m.Lock(ctx); err != nil {
+ return nil, err
+ }
+ return &v3lockpb.LockResponse{Header: m.Header(), Key: []byte(m.Key())}, nil
+}
+
+func (ls *lockServer) Unlock(ctx context.Context, req *v3lockpb.UnlockRequest) (*v3lockpb.UnlockResponse, error) {
+ resp, err := ls.c.Delete(ctx, string(req.Key))
+ if err != nil {
+ return nil, err
+ }
+ return &v3lockpb.UnlockResponse{Header: resp.Header}, nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/gw/v3lock.pb.gw.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/gw/v3lock.pb.gw.go
new file mode 100644
index 0000000..5efb759
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/gw/v3lock.pb.gw.go
@@ -0,0 +1,220 @@
+// Code generated by protoc-gen-grpc-gateway. DO NOT EDIT.
+// source: server/etcdserver/api/v3lock/v3lockpb/v3lock.proto
+
+/*
+Package v3lockpb is a reverse proxy.
+
+It translates gRPC into RESTful JSON APIs.
+*/
+package gw
+
+import (
+ protov1 "github.com/golang/protobuf/proto"
+
+ "context"
+ "errors"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb"
+ "io"
+ "net/http"
+
+ "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
+ "github.com/grpc-ecosystem/grpc-gateway/v2/utilities"
+ "google.golang.org/grpc"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/grpclog"
+ "google.golang.org/grpc/metadata"
+ "google.golang.org/grpc/status"
+ "google.golang.org/protobuf/proto"
+)
+
+// Suppress "imported and not used" errors
+var (
+ _ codes.Code
+ _ io.Reader
+ _ status.Status
+ _ = errors.New
+ _ = runtime.String
+ _ = utilities.NewDoubleArray
+ _ = metadata.Join
+)
+
+func request_Lock_Lock_0(ctx context.Context, marshaler runtime.Marshaler, client v3lockpb.LockClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3lockpb.LockRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := client.Lock(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func local_request_Lock_Lock_0(ctx context.Context, marshaler runtime.Marshaler, server v3lockpb.LockServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3lockpb.LockRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := server.Lock(ctx, &protoReq)
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func request_Lock_Unlock_0(ctx context.Context, marshaler runtime.Marshaler, client v3lockpb.LockClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3lockpb.UnlockRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := client.Unlock(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
+ return protov1.MessageV2(msg), metadata, err
+}
+
+func local_request_Lock_Unlock_0(ctx context.Context, marshaler runtime.Marshaler, server v3lockpb.LockServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
+ var (
+ protoReq v3lockpb.UnlockRequest
+ metadata runtime.ServerMetadata
+ )
+ if err := marshaler.NewDecoder(req.Body).Decode(protov1.MessageV2(&protoReq)); err != nil && !errors.Is(err, io.EOF) {
+ return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
+ }
+ msg, err := server.Unlock(ctx, &protoReq)
+ return protov1.MessageV2(msg), metadata, err
+}
+
+// v3lockpb.RegisterLockHandlerServer registers the http handlers for service Lock to "mux".
+// UnaryRPC :call v3lockpb.LockServer directly.
+// StreamingRPC :currently unsupported pending https://github.com/grpc/grpc-go/issues/906.
+// Note that using this registration option will cause many gRPC library features to stop working. Consider using RegisterLockHandlerFromEndpoint instead.
+// GRPC interceptors will not work for this type of registration. To use interceptors, you must use the "runtime.WithMiddlewares" option in the "runtime.NewServeMux" call.
+func RegisterLockHandlerServer(ctx context.Context, mux *runtime.ServeMux, server v3lockpb.LockServer) error {
+ mux.Handle(http.MethodPost, pattern_Lock_Lock_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ var stream runtime.ServerTransportStream
+ ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/v3lockpb.Lock/Lock", runtime.WithHTTPPathPattern("/v3/lock/lock"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := local_request_Lock_Lock_0(annotatedContext, inboundMarshaler, server, req, pathParams)
+ md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Lock_Lock_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Lock_Unlock_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ var stream runtime.ServerTransportStream
+ ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/v3lockpb.Lock/Unlock", runtime.WithHTTPPathPattern("/v3/lock/unlock"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := local_request_Lock_Unlock_0(annotatedContext, inboundMarshaler, server, req, pathParams)
+ md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Lock_Unlock_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+
+ return nil
+}
+
+// RegisterLockHandlerFromEndpoint is same as RegisterLockHandler but
+// automatically dials to "endpoint" and closes the connection when "ctx" gets done.
+func RegisterLockHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
+ conn, err := grpc.NewClient(endpoint, opts...)
+ if err != nil {
+ return err
+ }
+ defer func() {
+ if err != nil {
+ if cerr := conn.Close(); cerr != nil {
+ grpclog.Errorf("Failed to close conn to %s: %v", endpoint, cerr)
+ }
+ return
+ }
+ go func() {
+ <-ctx.Done()
+ if cerr := conn.Close(); cerr != nil {
+ grpclog.Errorf("Failed to close conn to %s: %v", endpoint, cerr)
+ }
+ }()
+ }()
+ return RegisterLockHandler(ctx, mux, conn)
+}
+
+// RegisterLockHandler registers the http handlers for service Lock to "mux".
+// The handlers forward requests to the grpc endpoint over "conn".
+func RegisterLockHandler(ctx context.Context, mux *runtime.ServeMux, conn *grpc.ClientConn) error {
+ return RegisterLockHandlerClient(ctx, mux, v3lockpb.NewLockClient(conn))
+}
+
+// v3lockpb.RegisterLockHandlerClient registers the http handlers for service Lock
+// to "mux". The handlers forward requests to the grpc endpoint over the given implementation of "LockClient".
+// Note: the gRPC framework executes interceptors within the gRPC handler. If the passed in "LockClient"
+// doesn't go through the normal gRPC flow (creating a gRPC client etc.) then it will be up to the passed in
+// "LockClient" to call the correct interceptors. This client ignores the HTTP middlewares.
+func RegisterLockHandlerClient(ctx context.Context, mux *runtime.ServeMux, client v3lockpb.LockClient) error {
+ mux.Handle(http.MethodPost, pattern_Lock_Lock_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3lockpb.Lock/Lock", runtime.WithHTTPPathPattern("/v3/lock/lock"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Lock_Lock_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Lock_Lock_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ mux.Handle(http.MethodPost, pattern_Lock_Unlock_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
+ ctx, cancel := context.WithCancel(req.Context())
+ defer cancel()
+ inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
+ annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/v3lockpb.Lock/Unlock", runtime.WithHTTPPathPattern("/v3/lock/unlock"))
+ if err != nil {
+ runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ resp, md, err := request_Lock_Unlock_0(annotatedContext, inboundMarshaler, client, req, pathParams)
+ annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
+ if err != nil {
+ runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
+ return
+ }
+ forward_Lock_Unlock_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
+ })
+ return nil
+}
+
+var (
+ pattern_Lock_Lock_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 1}, []string{"v3", "lock"}, ""))
+ pattern_Lock_Unlock_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"v3", "lock", "unlock"}, ""))
+)
+
+var (
+ forward_Lock_Lock_0 = runtime.ForwardResponseMessage
+ forward_Lock_Unlock_0 = runtime.ForwardResponseMessage
+)
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/v3lock.pb.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/v3lock.pb.go
new file mode 100644
index 0000000..39c04ab
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/v3lock.pb.go
@@ -0,0 +1,1143 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: v3lock.proto
+
+package v3lockpb
+
+import (
+ context "context"
+ fmt "fmt"
+ io "io"
+ math "math"
+ math_bits "math/bits"
+
+ _ "github.com/gogo/protobuf/gogoproto"
+ proto "github.com/golang/protobuf/proto"
+ etcdserverpb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ _ "google.golang.org/genproto/googleapis/api/annotations"
+ grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+)
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
+
+type LockRequest struct {
+ // name is the identifier for the distributed shared lock to be acquired.
+ Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+ // lease is the ID of the lease that will be attached to ownership of the
+ // lock. If the lease expires or is revoked and currently holds the lock,
+ // the lock is automatically released. Calls to Lock with the same lease will
+ // be treated as a single acquisition; locking twice with the same lease is a
+ // no-op.
+ Lease int64 `protobuf:"varint,2,opt,name=lease,proto3" json:"lease,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *LockRequest) Reset() { *m = LockRequest{} }
+func (m *LockRequest) String() string { return proto.CompactTextString(m) }
+func (*LockRequest) ProtoMessage() {}
+func (*LockRequest) Descriptor() ([]byte, []int) {
+ return fileDescriptor_52389b3e2f253201, []int{0}
+}
+func (m *LockRequest) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *LockRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_LockRequest.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *LockRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LockRequest.Merge(m, src)
+}
+func (m *LockRequest) XXX_Size() int {
+ return m.Size()
+}
+func (m *LockRequest) XXX_DiscardUnknown() {
+ xxx_messageInfo_LockRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_LockRequest proto.InternalMessageInfo
+
+func (m *LockRequest) GetName() []byte {
+ if m != nil {
+ return m.Name
+ }
+ return nil
+}
+
+func (m *LockRequest) GetLease() int64 {
+ if m != nil {
+ return m.Lease
+ }
+ return 0
+}
+
+type LockResponse struct {
+ Header *etcdserverpb.ResponseHeader `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
+ // key is a key that will exist on etcd for the duration that the Lock caller
+ // owns the lock. Users should not modify this key or the lock may exhibit
+ // undefined behavior.
+ Key []byte `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *LockResponse) Reset() { *m = LockResponse{} }
+func (m *LockResponse) String() string { return proto.CompactTextString(m) }
+func (*LockResponse) ProtoMessage() {}
+func (*LockResponse) Descriptor() ([]byte, []int) {
+ return fileDescriptor_52389b3e2f253201, []int{1}
+}
+func (m *LockResponse) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *LockResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_LockResponse.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *LockResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LockResponse.Merge(m, src)
+}
+func (m *LockResponse) XXX_Size() int {
+ return m.Size()
+}
+func (m *LockResponse) XXX_DiscardUnknown() {
+ xxx_messageInfo_LockResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_LockResponse proto.InternalMessageInfo
+
+func (m *LockResponse) GetHeader() *etcdserverpb.ResponseHeader {
+ if m != nil {
+ return m.Header
+ }
+ return nil
+}
+
+func (m *LockResponse) GetKey() []byte {
+ if m != nil {
+ return m.Key
+ }
+ return nil
+}
+
+type UnlockRequest struct {
+ // key is the lock ownership key granted by Lock.
+ Key []byte `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *UnlockRequest) Reset() { *m = UnlockRequest{} }
+func (m *UnlockRequest) String() string { return proto.CompactTextString(m) }
+func (*UnlockRequest) ProtoMessage() {}
+func (*UnlockRequest) Descriptor() ([]byte, []int) {
+ return fileDescriptor_52389b3e2f253201, []int{2}
+}
+func (m *UnlockRequest) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *UnlockRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_UnlockRequest.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *UnlockRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_UnlockRequest.Merge(m, src)
+}
+func (m *UnlockRequest) XXX_Size() int {
+ return m.Size()
+}
+func (m *UnlockRequest) XXX_DiscardUnknown() {
+ xxx_messageInfo_UnlockRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_UnlockRequest proto.InternalMessageInfo
+
+func (m *UnlockRequest) GetKey() []byte {
+ if m != nil {
+ return m.Key
+ }
+ return nil
+}
+
+type UnlockResponse struct {
+ Header *etcdserverpb.ResponseHeader `protobuf:"bytes,1,opt,name=header,proto3" json:"header,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *UnlockResponse) Reset() { *m = UnlockResponse{} }
+func (m *UnlockResponse) String() string { return proto.CompactTextString(m) }
+func (*UnlockResponse) ProtoMessage() {}
+func (*UnlockResponse) Descriptor() ([]byte, []int) {
+ return fileDescriptor_52389b3e2f253201, []int{3}
+}
+func (m *UnlockResponse) XXX_Unmarshal(b []byte) error {
+ return m.Unmarshal(b)
+}
+func (m *UnlockResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ if deterministic {
+ return xxx_messageInfo_UnlockResponse.Marshal(b, m, deterministic)
+ } else {
+ b = b[:cap(b)]
+ n, err := m.MarshalToSizedBuffer(b)
+ if err != nil {
+ return nil, err
+ }
+ return b[:n], nil
+ }
+}
+func (m *UnlockResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_UnlockResponse.Merge(m, src)
+}
+func (m *UnlockResponse) XXX_Size() int {
+ return m.Size()
+}
+func (m *UnlockResponse) XXX_DiscardUnknown() {
+ xxx_messageInfo_UnlockResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_UnlockResponse proto.InternalMessageInfo
+
+func (m *UnlockResponse) GetHeader() *etcdserverpb.ResponseHeader {
+ if m != nil {
+ return m.Header
+ }
+ return nil
+}
+
+func init() {
+ proto.RegisterType((*LockRequest)(nil), "v3lockpb.LockRequest")
+ proto.RegisterType((*LockResponse)(nil), "v3lockpb.LockResponse")
+ proto.RegisterType((*UnlockRequest)(nil), "v3lockpb.UnlockRequest")
+ proto.RegisterType((*UnlockResponse)(nil), "v3lockpb.UnlockResponse")
+}
+
+func init() { proto.RegisterFile("v3lock.proto", fileDescriptor_52389b3e2f253201) }
+
+var fileDescriptor_52389b3e2f253201 = []byte{
+ // 356 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x92, 0xcd, 0x4a, 0xc3, 0x40,
+ 0x10, 0xc7, 0xdd, 0xb6, 0x16, 0xd9, 0xa6, 0x2a, 0x4b, 0xd5, 0x10, 0x4a, 0xac, 0x39, 0x15, 0x0f,
+ 0x59, 0x68, 0x05, 0xc5, 0xa3, 0x07, 0x11, 0x11, 0x84, 0x80, 0x0a, 0xde, 0xd2, 0x74, 0x88, 0xa5,
+ 0x71, 0x27, 0x26, 0x69, 0xc1, 0xab, 0xaf, 0xe0, 0xc5, 0xc7, 0xf0, 0x31, 0x3c, 0x0a, 0xbe, 0x80,
+ 0x54, 0x1f, 0x44, 0x32, 0x9b, 0xd8, 0xaa, 0x47, 0x2f, 0xc9, 0xec, 0xce, 0x6f, 0xfe, 0xf3, 0xb1,
+ 0xc3, 0x8d, 0x69, 0x3f, 0xc2, 0x60, 0xec, 0xc6, 0x09, 0x66, 0x28, 0x56, 0xf4, 0x29, 0x1e, 0x58,
+ 0xad, 0x10, 0x43, 0xa4, 0x4b, 0x99, 0x5b, 0xda, 0x6f, 0x6d, 0x43, 0x16, 0x0c, 0xa5, 0x1f, 0x8f,
+ 0x64, 0x6e, 0xa4, 0x90, 0x4c, 0x21, 0x89, 0x07, 0x32, 0x89, 0x83, 0x02, 0x68, 0x87, 0x88, 0x61,
+ 0x04, 0x84, 0xf8, 0x4a, 0x61, 0xe6, 0x67, 0x23, 0x54, 0xa9, 0xf6, 0x3a, 0xfb, 0xbc, 0x71, 0x86,
+ 0xc1, 0xd8, 0x83, 0xbb, 0x09, 0xa4, 0x99, 0x10, 0xbc, 0xa6, 0xfc, 0x5b, 0x30, 0x59, 0x87, 0x75,
+ 0x0d, 0x8f, 0x6c, 0xd1, 0xe2, 0xcb, 0x11, 0xf8, 0x29, 0x98, 0x95, 0x0e, 0xeb, 0x56, 0x3d, 0x7d,
+ 0x70, 0x2e, 0xb9, 0xa1, 0x03, 0xd3, 0x18, 0x55, 0x0a, 0x62, 0x8f, 0xd7, 0x6f, 0xc0, 0x1f, 0x42,
+ 0x42, 0xb1, 0x8d, 0x5e, 0xdb, 0x5d, 0xac, 0xc7, 0x2d, 0xb9, 0x13, 0x62, 0xbc, 0x82, 0x15, 0xeb,
+ 0xbc, 0x3a, 0x86, 0x7b, 0x52, 0x36, 0xbc, 0xdc, 0x74, 0x76, 0x78, 0xf3, 0x42, 0x45, 0x0b, 0x25,
+ 0x15, 0x08, 0x9b, 0x23, 0xc7, 0x7c, 0xb5, 0x44, 0xfe, 0x93, 0xbc, 0xf7, 0xcc, 0x78, 0x2d, 0xef,
+ 0x41, 0x9c, 0x17, 0xff, 0x0d, 0xb7, 0x1c, 0xb6, 0xbb, 0x30, 0x14, 0x6b, 0xf3, 0xf7, 0xb5, 0x56,
+ 0x73, 0xcc, 0x87, 0xb7, 0xcf, 0xc7, 0x8a, 0x70, 0x9a, 0x72, 0xda, 0x97, 0x39, 0x40, 0x9f, 0x43,
+ 0xb6, 0x2b, 0xae, 0x78, 0x5d, 0x57, 0x28, 0xb6, 0xe6, 0xb1, 0x3f, 0xda, 0xb2, 0xcc, 0xbf, 0x8e,
+ 0x42, 0xd6, 0x22, 0xd9, 0x96, 0xb3, 0xf6, 0x2d, 0x3b, 0x51, 0x85, 0xf0, 0xd1, 0xe9, 0xcb, 0xcc,
+ 0x66, 0xaf, 0x33, 0x9b, 0xbd, 0xcf, 0x6c, 0xf6, 0xf4, 0x61, 0x2f, 0x5d, 0x1f, 0x84, 0x48, 0xcd,
+ 0xba, 0x23, 0xa4, 0x0d, 0x90, 0xba, 0xeb, 0x3c, 0x76, 0x3e, 0x03, 0x7a, 0x7c, 0x9d, 0x4f, 0x96,
+ 0x69, 0x07, 0x75, 0xda, 0x80, 0xfe, 0x57, 0x00, 0x00, 0x00, 0xff, 0xff, 0xcb, 0x48, 0x31, 0x4a,
+ 0x70, 0x02, 0x00, 0x00,
+}
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ context.Context
+var _ grpc.ClientConn
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+const _ = grpc.SupportPackageIsVersion4
+
+// LockClient is the client API for Lock service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
+type LockClient interface {
+ // Lock acquires a distributed shared lock on a given named lock.
+ // On success, it will return a unique key that exists so long as the
+ // lock is held by the caller. This key can be used in conjunction with
+ // transactions to safely ensure updates to etcd only occur while holding
+ // lock ownership. The lock is held until Unlock is called on the key or the
+ // lease associate with the owner expires.
+ Lock(ctx context.Context, in *LockRequest, opts ...grpc.CallOption) (*LockResponse, error)
+ // Unlock takes a key returned by Lock and releases the hold on lock. The
+ // next Lock caller waiting for the lock will then be woken up and given
+ // ownership of the lock.
+ Unlock(ctx context.Context, in *UnlockRequest, opts ...grpc.CallOption) (*UnlockResponse, error)
+}
+
+type lockClient struct {
+ cc *grpc.ClientConn
+}
+
+func NewLockClient(cc *grpc.ClientConn) LockClient {
+ return &lockClient{cc}
+}
+
+func (c *lockClient) Lock(ctx context.Context, in *LockRequest, opts ...grpc.CallOption) (*LockResponse, error) {
+ out := new(LockResponse)
+ err := c.cc.Invoke(ctx, "/v3lockpb.Lock/Lock", in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *lockClient) Unlock(ctx context.Context, in *UnlockRequest, opts ...grpc.CallOption) (*UnlockResponse, error) {
+ out := new(UnlockResponse)
+ err := c.cc.Invoke(ctx, "/v3lockpb.Lock/Unlock", in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+// LockServer is the server API for Lock service.
+type LockServer interface {
+ // Lock acquires a distributed shared lock on a given named lock.
+ // On success, it will return a unique key that exists so long as the
+ // lock is held by the caller. This key can be used in conjunction with
+ // transactions to safely ensure updates to etcd only occur while holding
+ // lock ownership. The lock is held until Unlock is called on the key or the
+ // lease associate with the owner expires.
+ Lock(context.Context, *LockRequest) (*LockResponse, error)
+ // Unlock takes a key returned by Lock and releases the hold on lock. The
+ // next Lock caller waiting for the lock will then be woken up and given
+ // ownership of the lock.
+ Unlock(context.Context, *UnlockRequest) (*UnlockResponse, error)
+}
+
+// UnimplementedLockServer can be embedded to have forward compatible implementations.
+type UnimplementedLockServer struct {
+}
+
+func (*UnimplementedLockServer) Lock(ctx context.Context, req *LockRequest) (*LockResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Lock not implemented")
+}
+func (*UnimplementedLockServer) Unlock(ctx context.Context, req *UnlockRequest) (*UnlockResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Unlock not implemented")
+}
+
+func RegisterLockServer(s *grpc.Server, srv LockServer) {
+ s.RegisterService(&_Lock_serviceDesc, srv)
+}
+
+func _Lock_Lock_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(LockRequest)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(LockServer).Lock(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: "/v3lockpb.Lock/Lock",
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(LockServer).Lock(ctx, req.(*LockRequest))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Lock_Unlock_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(UnlockRequest)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(LockServer).Unlock(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: "/v3lockpb.Lock/Unlock",
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(LockServer).Unlock(ctx, req.(*UnlockRequest))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+var _Lock_serviceDesc = grpc.ServiceDesc{
+ ServiceName: "v3lockpb.Lock",
+ HandlerType: (*LockServer)(nil),
+ Methods: []grpc.MethodDesc{
+ {
+ MethodName: "Lock",
+ Handler: _Lock_Lock_Handler,
+ },
+ {
+ MethodName: "Unlock",
+ Handler: _Lock_Unlock_Handler,
+ },
+ },
+ Streams: []grpc.StreamDesc{},
+ Metadata: "v3lock.proto",
+}
+
+func (m *LockRequest) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *LockRequest) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LockRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Lease != 0 {
+ i = encodeVarintV3Lock(dAtA, i, uint64(m.Lease))
+ i--
+ dAtA[i] = 0x10
+ }
+ if len(m.Name) > 0 {
+ i -= len(m.Name)
+ copy(dAtA[i:], m.Name)
+ i = encodeVarintV3Lock(dAtA, i, uint64(len(m.Name)))
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *LockResponse) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *LockResponse) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LockResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if len(m.Key) > 0 {
+ i -= len(m.Key)
+ copy(dAtA[i:], m.Key)
+ i = encodeVarintV3Lock(dAtA, i, uint64(len(m.Key)))
+ i--
+ dAtA[i] = 0x12
+ }
+ if m.Header != nil {
+ {
+ size, err := m.Header.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Lock(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *UnlockRequest) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *UnlockRequest) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *UnlockRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if len(m.Key) > 0 {
+ i -= len(m.Key)
+ copy(dAtA[i:], m.Key)
+ i = encodeVarintV3Lock(dAtA, i, uint64(len(m.Key)))
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func (m *UnlockResponse) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalToSizedBuffer(dAtA[:size])
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *UnlockResponse) MarshalTo(dAtA []byte) (int, error) {
+ size := m.Size()
+ return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *UnlockResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+ i := len(dAtA)
+ _ = i
+ var l int
+ _ = l
+ if m.XXX_unrecognized != nil {
+ i -= len(m.XXX_unrecognized)
+ copy(dAtA[i:], m.XXX_unrecognized)
+ }
+ if m.Header != nil {
+ {
+ size, err := m.Header.MarshalToSizedBuffer(dAtA[:i])
+ if err != nil {
+ return 0, err
+ }
+ i -= size
+ i = encodeVarintV3Lock(dAtA, i, uint64(size))
+ }
+ i--
+ dAtA[i] = 0xa
+ }
+ return len(dAtA) - i, nil
+}
+
+func encodeVarintV3Lock(dAtA []byte, offset int, v uint64) int {
+ offset -= sovV3Lock(v)
+ base := offset
+ for v >= 1<<7 {
+ dAtA[offset] = uint8(v&0x7f | 0x80)
+ v >>= 7
+ offset++
+ }
+ dAtA[offset] = uint8(v)
+ return base
+}
+func (m *LockRequest) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ l = len(m.Name)
+ if l > 0 {
+ n += 1 + l + sovV3Lock(uint64(l))
+ }
+ if m.Lease != 0 {
+ n += 1 + sovV3Lock(uint64(m.Lease))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *LockResponse) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Header != nil {
+ l = m.Header.Size()
+ n += 1 + l + sovV3Lock(uint64(l))
+ }
+ l = len(m.Key)
+ if l > 0 {
+ n += 1 + l + sovV3Lock(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *UnlockRequest) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ l = len(m.Key)
+ if l > 0 {
+ n += 1 + l + sovV3Lock(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func (m *UnlockResponse) Size() (n int) {
+ if m == nil {
+ return 0
+ }
+ var l int
+ _ = l
+ if m.Header != nil {
+ l = m.Header.Size()
+ n += 1 + l + sovV3Lock(uint64(l))
+ }
+ if m.XXX_unrecognized != nil {
+ n += len(m.XXX_unrecognized)
+ }
+ return n
+}
+
+func sovV3Lock(x uint64) (n int) {
+ return (math_bits.Len64(x|1) + 6) / 7
+}
+func sozV3Lock(x uint64) (n int) {
+ return sovV3Lock(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (m *LockRequest) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: LockRequest: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: LockRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Name = append(m.Name[:0], dAtA[iNdEx:postIndex]...)
+ if m.Name == nil {
+ m.Name = []byte{}
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 0 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Lease", wireType)
+ }
+ m.Lease = 0
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ m.Lease |= int64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Lock(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *LockResponse) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: LockResponse: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: LockResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Header", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Header == nil {
+ m.Header = &etcdserverpb.ResponseHeader{}
+ }
+ if err := m.Header.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Key = append(m.Key[:0], dAtA[iNdEx:postIndex]...)
+ if m.Key == nil {
+ m.Key = []byte{}
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Lock(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *UnlockRequest) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: UnlockRequest: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: UnlockRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
+ }
+ var byteLen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ byteLen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if byteLen < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ postIndex := iNdEx + byteLen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.Key = append(m.Key[:0], dAtA[iNdEx:postIndex]...)
+ if m.Key == nil {
+ m.Key = []byte{}
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Lock(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func (m *UnlockResponse) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= uint64(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: UnlockResponse: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: UnlockResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field Header", wireType)
+ }
+ var msglen int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ msglen |= int(b&0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if msglen < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ postIndex := iNdEx + msglen
+ if postIndex < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ if m.Header == nil {
+ m.Header = &etcdserverpb.ResponseHeader{}
+ }
+ if err := m.Header.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+ return err
+ }
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipV3Lock(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if (skippy < 0) || (iNdEx+skippy) < 0 {
+ return ErrInvalidLengthV3Lock
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func skipV3Lock(dAtA []byte) (n int, err error) {
+ l := len(dAtA)
+ iNdEx := 0
+ depth := 0
+ for iNdEx < l {
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ wireType := int(wire & 0x7)
+ switch wireType {
+ case 0:
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ iNdEx++
+ if dAtA[iNdEx-1] < 0x80 {
+ break
+ }
+ }
+ case 1:
+ iNdEx += 8
+ case 2:
+ var length int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowV3Lock
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ length |= (int(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ if length < 0 {
+ return 0, ErrInvalidLengthV3Lock
+ }
+ iNdEx += length
+ case 3:
+ depth++
+ case 4:
+ if depth == 0 {
+ return 0, ErrUnexpectedEndOfGroupV3Lock
+ }
+ depth--
+ case 5:
+ iNdEx += 4
+ default:
+ return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+ }
+ if iNdEx < 0 {
+ return 0, ErrInvalidLengthV3Lock
+ }
+ if depth == 0 {
+ return iNdEx, nil
+ }
+ }
+ return 0, io.ErrUnexpectedEOF
+}
+
+var (
+ ErrInvalidLengthV3Lock = fmt.Errorf("proto: negative length found during unmarshaling")
+ ErrIntOverflowV3Lock = fmt.Errorf("proto: integer overflow")
+ ErrUnexpectedEndOfGroupV3Lock = fmt.Errorf("proto: unexpected end of group")
+)
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/v3lock.proto b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/v3lock.proto
new file mode 100644
index 0000000..88a1c82
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb/v3lock.proto
@@ -0,0 +1,67 @@
+syntax = "proto3";
+package v3lockpb;
+
+import "gogoproto/gogo.proto";
+import "etcd/api/etcdserverpb/rpc.proto";
+
+// for grpc-gateway
+import "google/api/annotations.proto";
+
+option go_package = "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb";
+
+option (gogoproto.marshaler_all) = true;
+option (gogoproto.unmarshaler_all) = true;
+
+// The lock service exposes client-side locking facilities as a gRPC interface.
+service Lock {
+ // Lock acquires a distributed shared lock on a given named lock.
+ // On success, it will return a unique key that exists so long as the
+ // lock is held by the caller. This key can be used in conjunction with
+ // transactions to safely ensure updates to etcd only occur while holding
+ // lock ownership. The lock is held until Unlock is called on the key or the
+ // lease associate with the owner expires.
+ rpc Lock(LockRequest) returns (LockResponse) {
+ option (google.api.http) = {
+ post: "/v3/lock/lock"
+ body: "*"
+ };
+ }
+
+ // Unlock takes a key returned by Lock and releases the hold on lock. The
+ // next Lock caller waiting for the lock will then be woken up and given
+ // ownership of the lock.
+ rpc Unlock(UnlockRequest) returns (UnlockResponse) {
+ option (google.api.http) = {
+ post: "/v3/lock/unlock"
+ body: "*"
+ };
+ }
+}
+
+message LockRequest {
+ // name is the identifier for the distributed shared lock to be acquired.
+ bytes name = 1;
+ // lease is the ID of the lease that will be attached to ownership of the
+ // lock. If the lease expires or is revoked and currently holds the lock,
+ // the lock is automatically released. Calls to Lock with the same lease will
+ // be treated as a single acquisition; locking twice with the same lease is a
+ // no-op.
+ int64 lease = 2;
+}
+
+message LockResponse {
+ etcdserverpb.ResponseHeader header = 1;
+ // key is a key that will exist on etcd for the duration that the Lock caller
+ // owns the lock. Users should not modify this key or the lock may exhibit
+ // undefined behavior.
+ bytes key = 2;
+}
+
+message UnlockRequest {
+ // key is the lock ownership key granted by Lock.
+ bytes key = 1;
+}
+
+message UnlockResponse {
+ etcdserverpb.ResponseHeader header = 1;
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/auth.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/auth.go
new file mode 100644
index 0000000..6c5db76
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/auth.go
@@ -0,0 +1,187 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+)
+
+type AuthServer struct {
+ authenticator etcdserver.Authenticator
+}
+
+func NewAuthServer(s *etcdserver.EtcdServer) *AuthServer {
+ return &AuthServer{authenticator: s}
+}
+
+func (as *AuthServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
+ resp, err := as.authenticator.AuthEnable(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
+ resp, err := as.authenticator.AuthDisable(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) AuthStatus(ctx context.Context, r *pb.AuthStatusRequest) (*pb.AuthStatusResponse, error) {
+ resp, err := as.authenticator.AuthStatus(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
+ resp, err := as.authenticator.Authenticate(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
+ resp, err := as.authenticator.RoleAdd(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
+ resp, err := as.authenticator.RoleDelete(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
+ resp, err := as.authenticator.RoleGet(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
+ resp, err := as.authenticator.RoleList(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
+ resp, err := as.authenticator.RoleRevokePermission(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
+ resp, err := as.authenticator.RoleGrantPermission(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
+ resp, err := as.authenticator.UserAdd(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
+ resp, err := as.authenticator.UserDelete(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
+ resp, err := as.authenticator.UserGet(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
+ resp, err := as.authenticator.UserList(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
+ resp, err := as.authenticator.UserGrantRole(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
+ resp, err := as.authenticator.UserRevokeRole(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+func (as *AuthServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
+ resp, err := as.authenticator.UserChangePassword(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return resp, nil
+}
+
+type AuthGetter interface {
+ AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error)
+ AuthStore() auth.AuthStore
+}
+
+type AuthAdmin struct {
+ ag AuthGetter
+}
+
+// isPermitted verifies the user has admin privilege.
+// Only users with "root" role are permitted.
+func (aa *AuthAdmin) isPermitted(ctx context.Context) error {
+ authInfo, err := aa.ag.AuthInfoFromCtx(ctx)
+ if err != nil {
+ return err
+ }
+
+ return aa.ag.AuthStore().IsAdminPermitted(authInfo)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/codec.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/codec.go
new file mode 100644
index 0000000..1bbed83
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/codec.go
@@ -0,0 +1,34 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import "github.com/golang/protobuf/proto"
+
+type codec struct{}
+
+func (c *codec) Marshal(v any) ([]byte, error) {
+ b, err := proto.Marshal(v.(proto.Message))
+ sentBytes.Add(float64(len(b)))
+ return b, err
+}
+
+func (c *codec) Unmarshal(data []byte, v any) error {
+ receivedBytes.Add(float64(len(data)))
+ return proto.Unmarshal(data, v.(proto.Message))
+}
+
+func (c *codec) String() string {
+ return "proto"
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/grpc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/grpc.go
new file mode 100644
index 0000000..efa1514
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/grpc.go
@@ -0,0 +1,97 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "crypto/tls"
+ "math"
+
+ grpc_prometheus "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
+ "github.com/prometheus/client_golang/prometheus"
+ "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
+ "go.uber.org/zap"
+ "google.golang.org/grpc"
+ "google.golang.org/grpc/health"
+ healthpb "google.golang.org/grpc/health/grpc_health_v1"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/client/v3/credentials"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+)
+
+const (
+ maxSendBytes = math.MaxInt32
+)
+
+func Server(s *etcdserver.EtcdServer, tls *tls.Config, interceptor grpc.UnaryServerInterceptor, gopts ...grpc.ServerOption) *grpc.Server {
+ var opts []grpc.ServerOption
+ opts = append(opts, grpc.CustomCodec(&codec{}))
+ if tls != nil {
+ opts = append(opts, grpc.Creds(credentials.NewTransportCredential(tls)))
+ }
+
+ var mopts []grpc_prometheus.ServerMetricsOption
+ if s.Cfg.Metrics == "extensive" {
+ mopts = append(mopts, grpc_prometheus.WithServerHandlingTimeHistogram())
+ }
+ serverMetrics := grpc_prometheus.NewServerMetrics(mopts...)
+ err := prometheus.Register(serverMetrics)
+ if err != nil {
+ s.Cfg.Logger.Warn("etcdserver: failed to register grpc metrics", zap.Error(err))
+ }
+
+ chainUnaryInterceptors := []grpc.UnaryServerInterceptor{
+ newLogUnaryInterceptor(s),
+ newUnaryInterceptor(s),
+ serverMetrics.UnaryServerInterceptor(),
+ }
+ if interceptor != nil {
+ chainUnaryInterceptors = append(chainUnaryInterceptors, interceptor)
+ }
+
+ chainStreamInterceptors := []grpc.StreamServerInterceptor{
+ newStreamInterceptor(s),
+ serverMetrics.StreamServerInterceptor(),
+ }
+
+ if s.Cfg.EnableDistributedTracing {
+ opts = append(opts, grpc.StatsHandler(otelgrpc.NewServerHandler(s.Cfg.TracerOptions...)))
+ }
+
+ opts = append(opts, grpc.ChainUnaryInterceptor(chainUnaryInterceptors...))
+ opts = append(opts, grpc.ChainStreamInterceptor(chainStreamInterceptors...))
+
+ opts = append(opts, grpc.MaxRecvMsgSize(int(s.Cfg.MaxRequestBytesWithOverhead())))
+ opts = append(opts, grpc.MaxSendMsgSize(maxSendBytes))
+ opts = append(opts, grpc.MaxConcurrentStreams(s.Cfg.MaxConcurrentStreams))
+
+ grpcServer := grpc.NewServer(append(opts, gopts...)...)
+
+ pb.RegisterKVServer(grpcServer, NewQuotaKVServer(s))
+ pb.RegisterWatchServer(grpcServer, NewWatchServer(s))
+ pb.RegisterLeaseServer(grpcServer, NewQuotaLeaseServer(s))
+ pb.RegisterClusterServer(grpcServer, NewClusterServer(s))
+ pb.RegisterAuthServer(grpcServer, NewAuthServer(s))
+
+ hsrv := health.NewServer()
+ healthNotifier := newHealthNotifier(hsrv, s)
+ healthpb.RegisterHealthServer(grpcServer, hsrv)
+ pb.RegisterMaintenanceServer(grpcServer, NewMaintenanceServer(s, healthNotifier))
+
+ // set zero values for metrics registered for this grpc server
+ serverMetrics.InitializeMetrics(grpcServer)
+
+ return grpcServer
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/header.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/header.go
new file mode 100644
index 0000000..8fe4e58
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/header.go
@@ -0,0 +1,50 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/apply"
+)
+
+type header struct {
+ clusterID int64
+ memberID int64
+ sg apply.RaftStatusGetter
+ rev func() int64
+}
+
+func newHeader(s *etcdserver.EtcdServer) header {
+ return header{
+ clusterID: int64(s.Cluster().ID()),
+ memberID: int64(s.MemberID()),
+ sg: s,
+ rev: func() int64 { return s.KV().Rev() },
+ }
+}
+
+// fill populates pb.ResponseHeader using etcdserver information
+func (h *header) fill(rh *pb.ResponseHeader) {
+ if rh == nil {
+ panic("unexpected nil resp.Header")
+ }
+ rh.ClusterId = uint64(h.clusterID)
+ rh.MemberId = uint64(h.memberID)
+ rh.RaftTerm = h.sg.Term()
+ if rh.Revision == 0 {
+ rh.Revision = h.rev()
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/health.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/health.go
new file mode 100644
index 0000000..2861e11
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/health.go
@@ -0,0 +1,79 @@
+// Copyright 2023 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "go.uber.org/zap"
+ "google.golang.org/grpc/health"
+ healthpb "google.golang.org/grpc/health/grpc_health_v1"
+
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/features"
+)
+
+const (
+ allGRPCServices = ""
+)
+
+type notifier interface {
+ defragStarted()
+ defragFinished()
+}
+
+func newHealthNotifier(hs *health.Server, s *etcdserver.EtcdServer) notifier {
+ if hs == nil {
+ panic("unexpected nil gRPC health server")
+ }
+ hc := &healthNotifier{hs: hs, lg: s.Logger(), stopGRPCServiceOnDefrag: s.FeatureEnabled(features.StopGRPCServiceOnDefrag)}
+ // set grpc health server as serving status blindly since
+ // the grpc server will serve iff s.ReadyNotify() is closed.
+ hc.startServe()
+ return hc
+}
+
+type healthNotifier struct {
+ hs *health.Server
+ lg *zap.Logger
+
+ stopGRPCServiceOnDefrag bool
+}
+
+func (hc *healthNotifier) defragStarted() {
+ if !hc.stopGRPCServiceOnDefrag {
+ return
+ }
+ hc.stopServe("defrag is active")
+}
+
+func (hc *healthNotifier) defragFinished() { hc.startServe() }
+
+func (hc *healthNotifier) startServe() {
+ hc.lg.Info(
+ "grpc service status changed",
+ zap.String("service", allGRPCServices),
+ zap.String("status", healthpb.HealthCheckResponse_SERVING.String()),
+ )
+ hc.hs.SetServingStatus(allGRPCServices, healthpb.HealthCheckResponse_SERVING)
+}
+
+func (hc *healthNotifier) stopServe(reason string) {
+ hc.lg.Warn(
+ "grpc service status changed",
+ zap.String("service", allGRPCServices),
+ zap.String("status", healthpb.HealthCheckResponse_NOT_SERVING.String()),
+ zap.String("reason", reason),
+ )
+ hc.hs.SetServingStatus(allGRPCServices, healthpb.HealthCheckResponse_NOT_SERVING)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/interceptor.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/interceptor.go
new file mode 100644
index 0000000..697d0b0
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/interceptor.go
@@ -0,0 +1,353 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+ "sync"
+ "time"
+ "unicode/utf8"
+
+ "go.uber.org/zap"
+ "google.golang.org/grpc"
+ "google.golang.org/grpc/metadata"
+ "google.golang.org/grpc/peer"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/api"
+ "go.etcd.io/raft/v3"
+)
+
+const (
+ maxNoLeaderCnt = 3
+ snapshotMethod = "/etcdserverpb.Maintenance/Snapshot"
+)
+
+type streamsMap struct {
+ mu sync.Mutex
+ streams map[grpc.ServerStream]struct{}
+}
+
+func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
+ return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
+ if !api.IsCapabilityEnabled(api.V3rpcCapability) {
+ return nil, rpctypes.ErrGRPCNotCapable
+ }
+
+ if s.IsMemberExist(s.MemberID()) && s.IsLearner() && !isRPCSupportedForLearner(req) {
+ return nil, rpctypes.ErrGRPCNotSupportedForLearner
+ }
+
+ md, ok := metadata.FromIncomingContext(ctx)
+ if ok {
+ ver, vs := "unknown", md.Get(rpctypes.MetadataClientAPIVersionKey)
+ if len(vs) > 0 {
+ ver = vs[0]
+ }
+ if !utf8.ValidString(ver) {
+ return nil, rpctypes.ErrGRPCInvalidClientAPIVersion
+ }
+ clientRequests.WithLabelValues("unary", ver).Inc()
+
+ if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
+ if s.Leader() == types.ID(raft.None) {
+ return nil, rpctypes.ErrGRPCNoLeader
+ }
+ }
+ }
+
+ return handler(ctx, req)
+ }
+}
+
+func newLogUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
+ return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
+ startTime := time.Now()
+ resp, err := handler(ctx, req)
+ lg := s.Logger()
+ if lg != nil { // acquire stats if debug level is enabled or RequestInfo is expensive
+ defer logUnaryRequestStats(ctx, lg, s.Cfg.WarningUnaryRequestDuration, info, startTime, req, resp)
+ }
+ return resp, err
+ }
+}
+
+func logUnaryRequestStats(ctx context.Context, lg *zap.Logger, warnLatency time.Duration, info *grpc.UnaryServerInfo, startTime time.Time, req any, resp any) {
+ duration := time.Since(startTime)
+ var enabledDebugLevel, expensiveRequest bool
+ if lg.Core().Enabled(zap.DebugLevel) {
+ enabledDebugLevel = true
+ }
+ if duration > warnLatency {
+ expensiveRequest = true
+ }
+ if !enabledDebugLevel && !expensiveRequest {
+ return
+ }
+ remote := "No remote client info."
+ peerInfo, ok := peer.FromContext(ctx)
+ if ok {
+ remote = peerInfo.Addr.String()
+ }
+ responseType := info.FullMethod
+ var reqCount, respCount int64
+ var reqSize, respSize int
+ var reqContent string
+ switch _resp := resp.(type) {
+ case *pb.RangeResponse:
+ _req, ok := req.(*pb.RangeRequest)
+ if ok {
+ reqCount = 0
+ reqSize = _req.Size()
+ reqContent = _req.String()
+ }
+ if _resp != nil {
+ respCount = _resp.GetCount()
+ respSize = _resp.Size()
+ }
+ case *pb.PutResponse:
+ _req, ok := req.(*pb.PutRequest)
+ if ok {
+ reqCount = 1
+ reqSize = _req.Size()
+ reqContent = pb.NewLoggablePutRequest(_req).String()
+ // redact value field from request content, see PR #9821
+ }
+ if _resp != nil {
+ respCount = 0
+ respSize = _resp.Size()
+ }
+ case *pb.DeleteRangeResponse:
+ _req, ok := req.(*pb.DeleteRangeRequest)
+ if ok {
+ reqCount = 0
+ reqSize = _req.Size()
+ reqContent = _req.String()
+ }
+ if _resp != nil {
+ respCount = _resp.GetDeleted()
+ respSize = _resp.Size()
+ }
+ case *pb.TxnResponse:
+ _req, ok := req.(*pb.TxnRequest)
+ if ok && _resp != nil {
+ if _resp.GetSucceeded() { // determine the 'actual' count and size of request based on success or failure
+ reqCount = int64(len(_req.GetSuccess()))
+ reqSize = 0
+ for _, r := range _req.GetSuccess() {
+ reqSize += r.Size()
+ }
+ } else {
+ reqCount = int64(len(_req.GetFailure()))
+ reqSize = 0
+ for _, r := range _req.GetFailure() {
+ reqSize += r.Size()
+ }
+ }
+ reqContent = pb.NewLoggableTxnRequest(_req).String()
+ // redact value field from request content, see PR #9821
+ }
+ if _resp != nil {
+ respCount = 0
+ respSize = _resp.Size()
+ }
+ default:
+ reqCount = -1
+ reqSize = -1
+ respCount = -1
+ respSize = -1
+ }
+
+ if enabledDebugLevel {
+ logGenericRequestStats(lg, startTime, duration, remote, responseType, reqCount, reqSize, respCount, respSize, reqContent)
+ } else if expensiveRequest {
+ logExpensiveRequestStats(lg, startTime, duration, remote, responseType, reqCount, reqSize, respCount, respSize, reqContent)
+ }
+}
+
+func logGenericRequestStats(lg *zap.Logger, startTime time.Time, duration time.Duration, remote string, responseType string,
+ reqCount int64, reqSize int, respCount int64, respSize int, reqContent string,
+) {
+ lg.Debug("request stats",
+ zap.Time("start time", startTime),
+ zap.Duration("time spent", duration),
+ zap.String("remote", remote),
+ zap.String("response type", responseType),
+ zap.Int64("request count", reqCount),
+ zap.Int("request size", reqSize),
+ zap.Int64("response count", respCount),
+ zap.Int("response size", respSize),
+ zap.String("request content", reqContent),
+ )
+}
+
+func logExpensiveRequestStats(lg *zap.Logger, startTime time.Time, duration time.Duration, remote string, responseType string,
+ reqCount int64, reqSize int, respCount int64, respSize int, reqContent string,
+) {
+ lg.Warn("request stats",
+ zap.Time("start time", startTime),
+ zap.Duration("time spent", duration),
+ zap.String("remote", remote),
+ zap.String("response type", responseType),
+ zap.Int64("request count", reqCount),
+ zap.Int("request size", reqSize),
+ zap.Int64("response count", respCount),
+ zap.Int("response size", respSize),
+ zap.String("request content", reqContent),
+ )
+}
+
+func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor {
+ smap := monitorLeader(s)
+
+ return func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
+ if !api.IsCapabilityEnabled(api.V3rpcCapability) {
+ return rpctypes.ErrGRPCNotCapable
+ }
+
+ if s.IsMemberExist(s.MemberID()) && s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot
+ return rpctypes.ErrGRPCNotSupportedForLearner
+ }
+
+ md, ok := metadata.FromIncomingContext(ss.Context())
+ if ok {
+ ver, vs := "unknown", md.Get(rpctypes.MetadataClientAPIVersionKey)
+ if len(vs) > 0 {
+ ver = vs[0]
+ }
+ if !utf8.ValidString(ver) {
+ return rpctypes.ErrGRPCInvalidClientAPIVersion
+ }
+ clientRequests.WithLabelValues("stream", ver).Inc()
+
+ if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
+ if s.Leader() == types.ID(raft.None) {
+ return rpctypes.ErrGRPCNoLeader
+ }
+
+ ctx := newCancellableContext(ss.Context())
+ ss = serverStreamWithCtx{ctx: ctx, ServerStream: ss}
+
+ smap.mu.Lock()
+ smap.streams[ss] = struct{}{}
+ smap.mu.Unlock()
+
+ defer func() {
+ smap.mu.Lock()
+ delete(smap.streams, ss)
+ smap.mu.Unlock()
+ // TODO: investigate whether the reason for cancellation here is useful to know
+ ctx.Cancel(nil)
+ }()
+ }
+ }
+
+ return handler(srv, ss)
+ }
+}
+
+// cancellableContext wraps a context with new cancellable context that allows a
+// specific cancellation error to be preserved and later retrieved using the
+// Context.Err() function. This is so downstream context users can disambiguate
+// the reason for the cancellation which could be from the client (for example)
+// or from this interceptor code.
+type cancellableContext struct {
+ context.Context
+
+ lock sync.RWMutex
+ cancel context.CancelFunc
+ cancelReason error
+}
+
+func newCancellableContext(parent context.Context) *cancellableContext {
+ ctx, cancel := context.WithCancel(parent)
+ return &cancellableContext{
+ Context: ctx,
+ cancel: cancel,
+ }
+}
+
+// Cancel stores the cancellation reason and then delegates to context.WithCancel
+// against the parent context.
+func (c *cancellableContext) Cancel(reason error) {
+ c.lock.Lock()
+ c.cancelReason = reason
+ c.lock.Unlock()
+ c.cancel()
+}
+
+// Err will return the preserved cancel reason error if present, and will
+// otherwise return the underlying error from the parent context.
+func (c *cancellableContext) Err() error {
+ c.lock.RLock()
+ defer c.lock.RUnlock()
+ if c.cancelReason != nil {
+ return c.cancelReason
+ }
+ return c.Context.Err()
+}
+
+type serverStreamWithCtx struct {
+ grpc.ServerStream
+
+ // ctx is used so that we can preserve a reason for cancellation.
+ ctx *cancellableContext
+}
+
+func (ssc serverStreamWithCtx) Context() context.Context { return ssc.ctx }
+
+func monitorLeader(s *etcdserver.EtcdServer) *streamsMap {
+ smap := &streamsMap{
+ streams: make(map[grpc.ServerStream]struct{}),
+ }
+
+ s.GoAttach(func() {
+ election := time.Duration(s.Cfg.TickMs) * time.Duration(s.Cfg.ElectionTicks) * time.Millisecond
+ noLeaderCnt := 0
+
+ for {
+ select {
+ case <-s.StoppingNotify():
+ return
+ case <-time.After(election):
+ if s.Leader() == types.ID(raft.None) {
+ noLeaderCnt++
+ } else {
+ noLeaderCnt = 0
+ }
+
+ // We are more conservative on canceling existing streams. Reconnecting streams
+ // cost much more than just rejecting new requests. So we wait until the member
+ // cannot find a leader for maxNoLeaderCnt election timeouts to cancel existing streams.
+ if noLeaderCnt >= maxNoLeaderCnt {
+ smap.mu.Lock()
+ for ss := range smap.streams {
+ if ssWithCtx, ok := ss.(serverStreamWithCtx); ok {
+ ssWithCtx.ctx.Cancel(rpctypes.ErrGRPCNoLeader)
+ <-ss.Context().Done()
+ }
+ }
+ smap.streams = make(map[grpc.ServerStream]struct{})
+ smap.mu.Unlock()
+ }
+ }
+ }
+ })
+
+ return smap
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/key.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/key.go
new file mode 100644
index 0000000..2c1de2a
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/key.go
@@ -0,0 +1,280 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package v3rpc implements etcd v3 RPC system based on gRPC.
+package v3rpc
+
+import (
+ "context"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/pkg/v3/adt"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+)
+
+type kvServer struct {
+ hdr header
+ kv etcdserver.RaftKV
+ // maxTxnOps is the max operations per txn.
+ // e.g suppose maxTxnOps = 128.
+ // Txn.Success can have at most 128 operations,
+ // and Txn.Failure can have at most 128 operations.
+ maxTxnOps uint
+}
+
+func NewKVServer(s *etcdserver.EtcdServer) pb.KVServer {
+ return &kvServer{hdr: newHeader(s), kv: s, maxTxnOps: s.Cfg.MaxTxnOps}
+}
+
+func (s *kvServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
+ if err := checkRangeRequest(r); err != nil {
+ return nil, err
+ }
+
+ resp, err := s.kv.Range(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+
+ s.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (s *kvServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
+ if err := checkPutRequest(r); err != nil {
+ return nil, err
+ }
+
+ resp, err := s.kv.Put(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+
+ s.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (s *kvServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
+ if err := checkDeleteRequest(r); err != nil {
+ return nil, err
+ }
+
+ resp, err := s.kv.DeleteRange(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+
+ s.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (s *kvServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
+ if err := checkTxnRequest(r, int(s.maxTxnOps)); err != nil {
+ return nil, err
+ }
+ // check for forbidden put/del overlaps after checking request to avoid quadratic blowup
+ if _, _, err := checkIntervals(r.Success); err != nil {
+ return nil, err
+ }
+ if _, _, err := checkIntervals(r.Failure); err != nil {
+ return nil, err
+ }
+
+ resp, err := s.kv.Txn(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+
+ s.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (s *kvServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
+ resp, err := s.kv.Compact(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+
+ s.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func checkRangeRequest(r *pb.RangeRequest) error {
+ if len(r.Key) == 0 {
+ return rpctypes.ErrGRPCEmptyKey
+ }
+
+ if _, ok := pb.RangeRequest_SortOrder_name[int32(r.SortOrder)]; !ok {
+ return rpctypes.ErrGRPCInvalidSortOption
+ }
+
+ if _, ok := pb.RangeRequest_SortTarget_name[int32(r.SortTarget)]; !ok {
+ return rpctypes.ErrGRPCInvalidSortOption
+ }
+
+ return nil
+}
+
+func checkPutRequest(r *pb.PutRequest) error {
+ if len(r.Key) == 0 {
+ return rpctypes.ErrGRPCEmptyKey
+ }
+ if r.IgnoreValue && len(r.Value) != 0 {
+ return rpctypes.ErrGRPCValueProvided
+ }
+ if r.IgnoreLease && r.Lease != 0 {
+ return rpctypes.ErrGRPCLeaseProvided
+ }
+ return nil
+}
+
+func checkDeleteRequest(r *pb.DeleteRangeRequest) error {
+ if len(r.Key) == 0 {
+ return rpctypes.ErrGRPCEmptyKey
+ }
+ return nil
+}
+
+func checkTxnRequest(r *pb.TxnRequest, maxTxnOps int) error {
+ opc := len(r.Compare)
+ if opc < len(r.Success) {
+ opc = len(r.Success)
+ }
+ if opc < len(r.Failure) {
+ opc = len(r.Failure)
+ }
+ if opc > maxTxnOps {
+ return rpctypes.ErrGRPCTooManyOps
+ }
+
+ for _, c := range r.Compare {
+ if len(c.Key) == 0 {
+ return rpctypes.ErrGRPCEmptyKey
+ }
+ }
+ for _, u := range r.Success {
+ if err := checkRequestOp(u, maxTxnOps-opc); err != nil {
+ return err
+ }
+ }
+ for _, u := range r.Failure {
+ if err := checkRequestOp(u, maxTxnOps-opc); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// checkIntervals tests whether puts and deletes overlap for a list of ops. If
+// there is an overlap, returns an error. If no overlap, return put and delete
+// sets for recursive evaluation.
+func checkIntervals(reqs []*pb.RequestOp) (map[string]struct{}, adt.IntervalTree, error) {
+ dels := adt.NewIntervalTree()
+
+ // collect deletes from this level; build first to check lower level overlapped puts
+ for _, req := range reqs {
+ tv, ok := req.Request.(*pb.RequestOp_RequestDeleteRange)
+ if !ok {
+ continue
+ }
+ dreq := tv.RequestDeleteRange
+ if dreq == nil {
+ continue
+ }
+ var iv adt.Interval
+ if len(dreq.RangeEnd) != 0 {
+ iv = adt.NewStringAffineInterval(string(dreq.Key), string(dreq.RangeEnd))
+ } else {
+ iv = adt.NewStringAffinePoint(string(dreq.Key))
+ }
+ dels.Insert(iv, struct{}{})
+ }
+
+ // collect children puts/deletes
+ puts := make(map[string]struct{})
+ for _, req := range reqs {
+ tv, ok := req.Request.(*pb.RequestOp_RequestTxn)
+ if !ok {
+ continue
+ }
+ putsThen, delsThen, err := checkIntervals(tv.RequestTxn.Success)
+ if err != nil {
+ return nil, dels, err
+ }
+ putsElse, delsElse, err := checkIntervals(tv.RequestTxn.Failure)
+ if err != nil {
+ return nil, dels, err
+ }
+ for k := range putsThen {
+ if _, ok := puts[k]; ok {
+ return nil, dels, rpctypes.ErrGRPCDuplicateKey
+ }
+ if dels.Intersects(adt.NewStringAffinePoint(k)) {
+ return nil, dels, rpctypes.ErrGRPCDuplicateKey
+ }
+ puts[k] = struct{}{}
+ }
+ for k := range putsElse {
+ if _, ok := puts[k]; ok {
+ // if key is from putsThen, overlap is OK since
+ // either then/else are mutually exclusive
+ if _, isSafe := putsThen[k]; !isSafe {
+ return nil, dels, rpctypes.ErrGRPCDuplicateKey
+ }
+ }
+ if dels.Intersects(adt.NewStringAffinePoint(k)) {
+ return nil, dels, rpctypes.ErrGRPCDuplicateKey
+ }
+ puts[k] = struct{}{}
+ }
+ dels.Union(delsThen, adt.NewStringAffineInterval("\x00", ""))
+ dels.Union(delsElse, adt.NewStringAffineInterval("\x00", ""))
+ }
+
+ // collect and check this level's puts
+ for _, req := range reqs {
+ tv, ok := req.Request.(*pb.RequestOp_RequestPut)
+ if !ok || tv.RequestPut == nil {
+ continue
+ }
+ k := string(tv.RequestPut.Key)
+ if _, ok := puts[k]; ok {
+ return nil, dels, rpctypes.ErrGRPCDuplicateKey
+ }
+ if dels.Intersects(adt.NewStringAffinePoint(k)) {
+ return nil, dels, rpctypes.ErrGRPCDuplicateKey
+ }
+ puts[k] = struct{}{}
+ }
+ return puts, dels, nil
+}
+
+func checkRequestOp(u *pb.RequestOp, maxTxnOps int) error {
+ // TODO: ensure only one of the field is set.
+ switch uv := u.Request.(type) {
+ case *pb.RequestOp_RequestRange:
+ return checkRangeRequest(uv.RequestRange)
+ case *pb.RequestOp_RequestPut:
+ return checkPutRequest(uv.RequestPut)
+ case *pb.RequestOp_RequestDeleteRange:
+ return checkDeleteRequest(uv.RequestDeleteRange)
+ case *pb.RequestOp_RequestTxn:
+ return checkTxnRequest(uv.RequestTxn, maxTxnOps)
+ default:
+ // empty op / nil entry
+ return rpctypes.ErrGRPCKeyNotFound
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/lease.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/lease.go
new file mode 100644
index 0000000..f51334e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/lease.go
@@ -0,0 +1,157 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+ "errors"
+ "io"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/lease"
+)
+
+type LeaseServer struct {
+ lg *zap.Logger
+ hdr header
+ le etcdserver.Lessor
+}
+
+func NewLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
+ srv := &LeaseServer{lg: s.Cfg.Logger, le: s, hdr: newHeader(s)}
+ if srv.lg == nil {
+ srv.lg = zap.NewNop()
+ }
+ return srv
+}
+
+func (ls *LeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ resp, err := ls.le.LeaseGrant(ctx, cr)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ ls.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ls *LeaseServer) LeaseRevoke(ctx context.Context, rr *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
+ resp, err := ls.le.LeaseRevoke(ctx, rr)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ ls.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ls *LeaseServer) LeaseTimeToLive(ctx context.Context, rr *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
+ resp, err := ls.le.LeaseTimeToLive(ctx, rr)
+ if err != nil && !errors.Is(err, lease.ErrLeaseNotFound) {
+ return nil, togRPCError(err)
+ }
+ if errors.Is(err, lease.ErrLeaseNotFound) {
+ resp = &pb.LeaseTimeToLiveResponse{
+ Header: &pb.ResponseHeader{},
+ ID: rr.ID,
+ TTL: -1,
+ }
+ }
+ ls.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ls *LeaseServer) LeaseLeases(ctx context.Context, rr *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) {
+ resp, err := ls.le.LeaseLeases(ctx, rr)
+ if err != nil && !errors.Is(err, lease.ErrLeaseNotFound) {
+ return nil, togRPCError(err)
+ }
+ if errors.Is(err, lease.ErrLeaseNotFound) {
+ resp = &pb.LeaseLeasesResponse{
+ Header: &pb.ResponseHeader{},
+ Leases: []*pb.LeaseStatus{},
+ }
+ }
+ ls.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ls *LeaseServer) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) (err error) {
+ errc := make(chan error, 1)
+ go func() {
+ errc <- ls.leaseKeepAlive(stream)
+ }()
+ select {
+ case err = <-errc:
+ case <-stream.Context().Done():
+ // the only server-side cancellation is noleader for now.
+ err = stream.Context().Err()
+ if errors.Is(err, context.Canceled) {
+ err = rpctypes.ErrGRPCNoLeader
+ }
+ }
+ return err
+}
+
+func (ls *LeaseServer) leaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) error {
+ for {
+ req, err := stream.Recv()
+ if errors.Is(err, io.EOF) {
+ return nil
+ }
+ if err != nil {
+ if isClientCtxErr(stream.Context().Err(), err) {
+ ls.lg.Debug("failed to receive lease keepalive request from gRPC stream", zap.Error(err))
+ } else {
+ ls.lg.Warn("failed to receive lease keepalive request from gRPC stream", zap.Error(err))
+ streamFailures.WithLabelValues("receive", "lease-keepalive").Inc()
+ }
+ return err
+ }
+
+ // Create header before we sent out the renew request.
+ // This can make sure that the revision is strictly smaller or equal to
+ // when the keepalive happened at the local server (when the local server is the leader)
+ // or remote leader.
+ // Without this, a lease might be revoked at rev 3 but client can see the keepalive succeeded
+ // at rev 4.
+ resp := &pb.LeaseKeepAliveResponse{ID: req.ID, Header: &pb.ResponseHeader{}}
+ ls.hdr.fill(resp.Header)
+
+ ttl, err := ls.le.LeaseRenew(stream.Context(), lease.LeaseID(req.ID))
+ if errors.Is(err, lease.ErrLeaseNotFound) {
+ err = nil
+ ttl = 0
+ }
+
+ if err != nil {
+ return togRPCError(err)
+ }
+
+ resp.TTL = ttl
+ err = stream.Send(resp)
+ if err != nil {
+ if isClientCtxErr(stream.Context().Err(), err) {
+ ls.lg.Debug("failed to send lease keepalive response to gRPC stream", zap.Error(err))
+ } else {
+ ls.lg.Warn("failed to send lease keepalive response to gRPC stream", zap.Error(err))
+ streamFailures.WithLabelValues("send", "lease-keepalive").Inc()
+ }
+ return err
+ }
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/maintenance.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/maintenance.go
new file mode 100644
index 0000000..ec7de44
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/maintenance.go
@@ -0,0 +1,370 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+ "crypto/sha256"
+ errorspkg "errors"
+ "io"
+ "time"
+
+ "github.com/dustin/go-humanize"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/server/v3/config"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/apply"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ serverversion "go.etcd.io/etcd/server/v3/etcdserver/version"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+ "go.etcd.io/etcd/server/v3/storage/schema"
+ "go.etcd.io/raft/v3"
+)
+
+type KVGetter interface {
+ KV() mvcc.WatchableKV
+}
+
+type BackendGetter interface {
+ Backend() backend.Backend
+}
+
+type Defrager interface {
+ Defragment() error
+}
+
+type Alarmer interface {
+ // Alarms is implemented in Server interface located in etcdserver/server.go
+ // It returns a list of alarms present in the AlarmStore
+ Alarms() []*pb.AlarmMember
+ Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error)
+}
+
+type Downgrader interface {
+ Downgrade(ctx context.Context, dr *pb.DowngradeRequest) (*pb.DowngradeResponse, error)
+}
+
+type LeaderTransferrer interface {
+ MoveLeader(ctx context.Context, lead, target uint64) error
+}
+
+type ClusterStatusGetter interface {
+ IsLearner() bool
+}
+
+type ConfigGetter interface {
+ Config() config.ServerConfig
+}
+
+type maintenanceServer struct {
+ lg *zap.Logger
+ rg apply.RaftStatusGetter
+ hasher mvcc.HashStorage
+ bg BackendGetter
+ defrag Defrager
+ a Alarmer
+ lt LeaderTransferrer
+ hdr header
+ cs ClusterStatusGetter
+ d Downgrader
+ vs serverversion.Server
+ cg ConfigGetter
+
+ healthNotifier notifier
+}
+
+func NewMaintenanceServer(s *etcdserver.EtcdServer, healthNotifier notifier) pb.MaintenanceServer {
+ srv := &maintenanceServer{
+ lg: s.Cfg.Logger,
+ rg: s,
+ hasher: s.KV().HashStorage(),
+ bg: s,
+ defrag: s,
+ a: s,
+ lt: s,
+ hdr: newHeader(s),
+ cs: s,
+ d: s,
+ vs: etcdserver.NewServerVersionAdapter(s),
+ healthNotifier: healthNotifier,
+ cg: s,
+ }
+ if srv.lg == nil {
+ srv.lg = zap.NewNop()
+ }
+ return &authMaintenanceServer{srv, &AuthAdmin{s}}
+}
+
+func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
+ ms.lg.Info("starting defragment")
+ ms.healthNotifier.defragStarted()
+ defer ms.healthNotifier.defragFinished()
+ err := ms.defrag.Defragment()
+ if err != nil {
+ ms.lg.Warn("failed to defragment", zap.Error(err))
+ return nil, togRPCError(err)
+ }
+ ms.lg.Info("finished defragment")
+ return &pb.DefragmentResponse{}, nil
+}
+
+// big enough size to hold >1 OS pages in the buffer
+const snapshotSendBufferSize = 32 * 1024
+
+func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
+ ver := schema.ReadStorageVersion(ms.bg.Backend().ReadTx())
+ storageVersion := ""
+ if ver != nil {
+ storageVersion = ver.String()
+ }
+ snap := ms.bg.Backend().Snapshot()
+ pr, pw := io.Pipe()
+
+ defer pr.Close()
+
+ go func() {
+ snap.WriteTo(pw)
+ if err := snap.Close(); err != nil {
+ ms.lg.Warn("failed to close snapshot", zap.Error(err))
+ }
+ pw.Close()
+ }()
+
+ // record SHA digest of snapshot data
+ // used for integrity checks during snapshot restore operation
+ h := sha256.New()
+
+ sent := int64(0)
+ total := snap.Size()
+ size := humanize.Bytes(uint64(total))
+
+ start := time.Now()
+ ms.lg.Info("sending database snapshot to client",
+ zap.Int64("total-bytes", total),
+ zap.String("size", size),
+ zap.String("storage-version", storageVersion),
+ )
+ for total-sent > 0 {
+ // buffer just holds read bytes from stream
+ // response size is multiple of OS page size, fetched in boltdb
+ // e.g. 4*1024
+ // NOTE: srv.Send does not wait until the message is received by the client.
+ // Therefore the buffer can not be safely reused between Send operations
+ buf := make([]byte, snapshotSendBufferSize)
+
+ n, err := io.ReadFull(pr, buf)
+ if err != nil && !errorspkg.Is(err, io.EOF) && !errorspkg.Is(err, io.ErrUnexpectedEOF) {
+ return togRPCError(err)
+ }
+ sent += int64(n)
+
+ // if total is x * snapshotSendBufferSize. it is possible that
+ // resp.RemainingBytes == 0
+ // resp.Blob == zero byte but not nil
+ // does this make server response sent to client nil in proto
+ // and client stops receiving from snapshot stream before
+ // server sends snapshot SHA?
+ // No, the client will still receive non-nil response
+ // until server closes the stream with EOF
+ resp := &pb.SnapshotResponse{
+ RemainingBytes: uint64(total - sent),
+ Blob: buf[:n],
+ Version: storageVersion,
+ }
+ if err = srv.Send(resp); err != nil {
+ return togRPCError(err)
+ }
+ h.Write(buf[:n])
+ }
+
+ // send SHA digest for integrity checks
+ // during snapshot restore operation
+ sha := h.Sum(nil)
+
+ ms.lg.Info("sending database sha256 checksum to client",
+ zap.Int64("total-bytes", total),
+ zap.Int("checksum-size", len(sha)),
+ )
+ hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha, Version: storageVersion}
+ if err := srv.Send(hresp); err != nil {
+ return togRPCError(err)
+ }
+
+ ms.lg.Info("successfully sent database snapshot to client",
+ zap.Int64("total-bytes", total),
+ zap.String("size", size),
+ zap.Duration("took", time.Since(start)),
+ zap.String("storage-version", storageVersion),
+ )
+ return nil
+}
+
+func (ms *maintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
+ h, rev, err := ms.hasher.Hash()
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ resp := &pb.HashResponse{Header: &pb.ResponseHeader{Revision: rev}, Hash: h}
+ ms.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ms *maintenanceServer) HashKV(ctx context.Context, r *pb.HashKVRequest) (*pb.HashKVResponse, error) {
+ h, rev, err := ms.hasher.HashByRev(r.Revision)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+
+ resp := &pb.HashKVResponse{
+ Header: &pb.ResponseHeader{Revision: rev},
+ Hash: h.Hash,
+ CompactRevision: h.CompactRevision,
+ HashRevision: h.Revision,
+ }
+ ms.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ms *maintenanceServer) Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
+ resp, err := ms.a.Alarm(ctx, ar)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ if resp.Header == nil {
+ resp.Header = &pb.ResponseHeader{}
+ }
+ ms.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+func (ms *maintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
+ hdr := &pb.ResponseHeader{}
+ ms.hdr.fill(hdr)
+ resp := &pb.StatusResponse{
+ Header: hdr,
+ Version: version.Version,
+ Leader: uint64(ms.rg.Leader()),
+ RaftIndex: ms.rg.CommittedIndex(),
+ RaftAppliedIndex: ms.rg.AppliedIndex(),
+ RaftTerm: ms.rg.Term(),
+ DbSize: ms.bg.Backend().Size(),
+ DbSizeInUse: ms.bg.Backend().SizeInUse(),
+ IsLearner: ms.cs.IsLearner(),
+ DbSizeQuota: ms.cg.Config().QuotaBackendBytes,
+ DowngradeInfo: &pb.DowngradeInfo{Enabled: false},
+ }
+ if storageVersion := ms.vs.GetStorageVersion(); storageVersion != nil {
+ resp.StorageVersion = storageVersion.String()
+ }
+ if downgradeInfo := ms.vs.GetDowngradeInfo(); downgradeInfo != nil {
+ resp.DowngradeInfo = &pb.DowngradeInfo{
+ Enabled: downgradeInfo.Enabled,
+ TargetVersion: downgradeInfo.TargetVersion,
+ }
+ }
+ if resp.Leader == raft.None {
+ resp.Errors = append(resp.Errors, errors.ErrNoLeader.Error())
+ }
+ for _, a := range ms.a.Alarms() {
+ resp.Errors = append(resp.Errors, a.String())
+ }
+ return resp, nil
+}
+
+func (ms *maintenanceServer) MoveLeader(ctx context.Context, tr *pb.MoveLeaderRequest) (*pb.MoveLeaderResponse, error) {
+ if ms.rg.MemberID() != ms.rg.Leader() {
+ return nil, rpctypes.ErrGRPCNotLeader
+ }
+
+ if err := ms.lt.MoveLeader(ctx, uint64(ms.rg.Leader()), tr.TargetID); err != nil {
+ return nil, togRPCError(err)
+ }
+ return &pb.MoveLeaderResponse{}, nil
+}
+
+func (ms *maintenanceServer) Downgrade(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) {
+ resp, err := ms.d.Downgrade(ctx, r)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ resp.Header = &pb.ResponseHeader{}
+ ms.hdr.fill(resp.Header)
+ return resp, nil
+}
+
+type authMaintenanceServer struct {
+ *maintenanceServer
+ *AuthAdmin
+}
+
+func (ams *authMaintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
+ if err := ams.isPermitted(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+
+ return ams.maintenanceServer.Defragment(ctx, sr)
+}
+
+func (ams *authMaintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
+ if err := ams.isPermitted(srv.Context()); err != nil {
+ return togRPCError(err)
+ }
+
+ return ams.maintenanceServer.Snapshot(sr, srv)
+}
+
+func (ams *authMaintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
+ if err := ams.isPermitted(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+
+ return ams.maintenanceServer.Hash(ctx, r)
+}
+
+func (ams *authMaintenanceServer) HashKV(ctx context.Context, r *pb.HashKVRequest) (*pb.HashKVResponse, error) {
+ if err := ams.isPermitted(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+ return ams.maintenanceServer.HashKV(ctx, r)
+}
+
+func (ams *authMaintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
+ if err := ams.isPermitted(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+
+ return ams.maintenanceServer.Status(ctx, ar)
+}
+
+func (ams *authMaintenanceServer) MoveLeader(ctx context.Context, tr *pb.MoveLeaderRequest) (*pb.MoveLeaderResponse, error) {
+ if err := ams.isPermitted(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+
+ return ams.maintenanceServer.MoveLeader(ctx, tr)
+}
+
+func (ams *authMaintenanceServer) Downgrade(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) {
+ if err := ams.isPermitted(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+
+ return ams.maintenanceServer.Downgrade(ctx, r)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/member.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/member.go
new file mode 100644
index 0000000..7fd68fe
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/member.go
@@ -0,0 +1,124 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+ "time"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/api"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+)
+
+type ClusterServer struct {
+ cluster api.Cluster
+ server *etcdserver.EtcdServer
+}
+
+func NewClusterServer(s *etcdserver.EtcdServer) *ClusterServer {
+ return &ClusterServer{
+ cluster: s.Cluster(),
+ server: s,
+ }
+}
+
+func (cs *ClusterServer) MemberAdd(ctx context.Context, r *pb.MemberAddRequest) (*pb.MemberAddResponse, error) {
+ urls, err := types.NewURLs(r.PeerURLs)
+ if err != nil {
+ return nil, rpctypes.ErrGRPCMemberBadURLs
+ }
+
+ now := time.Now()
+ var m *membership.Member
+ if r.IsLearner {
+ m = membership.NewMemberAsLearner("", urls, "", &now)
+ } else {
+ m = membership.NewMember("", urls, "", &now)
+ }
+ membs, merr := cs.server.AddMember(ctx, *m)
+ if merr != nil {
+ return nil, togRPCError(merr)
+ }
+
+ return &pb.MemberAddResponse{
+ Header: cs.header(),
+ Member: &pb.Member{
+ ID: uint64(m.ID),
+ PeerURLs: m.PeerURLs,
+ IsLearner: m.IsLearner,
+ },
+ Members: membersToProtoMembers(membs),
+ }, nil
+}
+
+func (cs *ClusterServer) MemberRemove(ctx context.Context, r *pb.MemberRemoveRequest) (*pb.MemberRemoveResponse, error) {
+ membs, err := cs.server.RemoveMember(ctx, r.ID)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return &pb.MemberRemoveResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
+}
+
+func (cs *ClusterServer) MemberUpdate(ctx context.Context, r *pb.MemberUpdateRequest) (*pb.MemberUpdateResponse, error) {
+ m := membership.Member{
+ ID: types.ID(r.ID),
+ RaftAttributes: membership.RaftAttributes{PeerURLs: r.PeerURLs},
+ }
+ membs, err := cs.server.UpdateMember(ctx, m)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return &pb.MemberUpdateResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
+}
+
+func (cs *ClusterServer) MemberList(ctx context.Context, r *pb.MemberListRequest) (*pb.MemberListResponse, error) {
+ if r.Linearizable {
+ if err := cs.server.LinearizableReadNotify(ctx); err != nil {
+ return nil, togRPCError(err)
+ }
+ }
+ membs := membersToProtoMembers(cs.cluster.Members())
+ return &pb.MemberListResponse{Header: cs.header(), Members: membs}, nil
+}
+
+func (cs *ClusterServer) MemberPromote(ctx context.Context, r *pb.MemberPromoteRequest) (*pb.MemberPromoteResponse, error) {
+ membs, err := cs.server.PromoteMember(ctx, r.ID)
+ if err != nil {
+ return nil, togRPCError(err)
+ }
+ return &pb.MemberPromoteResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
+}
+
+func (cs *ClusterServer) header() *pb.ResponseHeader {
+ return &pb.ResponseHeader{ClusterId: uint64(cs.cluster.ID()), MemberId: uint64(cs.server.MemberID()), RaftTerm: cs.server.Term()}
+}
+
+func membersToProtoMembers(membs []*membership.Member) []*pb.Member {
+ protoMembs := make([]*pb.Member, len(membs))
+ for i := range membs {
+ protoMembs[i] = &pb.Member{
+ Name: membs[i].Name,
+ ID: uint64(membs[i].ID),
+ PeerURLs: membs[i].PeerURLs,
+ ClientURLs: membs[i].ClientURLs,
+ IsLearner: membs[i].IsLearner,
+ }
+ }
+ return protoMembs
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/metrics.go
new file mode 100644
index 0000000..d79506e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/metrics.go
@@ -0,0 +1,62 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+ sentBytes = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "client_grpc_sent_bytes_total",
+ Help: "The total number of bytes sent to grpc clients.",
+ })
+
+ receivedBytes = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "client_grpc_received_bytes_total",
+ Help: "The total number of bytes received from grpc clients.",
+ })
+
+ streamFailures = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "network",
+ Name: "server_stream_failures_total",
+ Help: "The total number of stream failures from the local server.",
+ },
+ []string{"Type", "API"},
+ )
+
+ clientRequests = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "client_requests_total",
+ Help: "The total number of client requests per client version.",
+ },
+ []string{"type", "client_api_version"},
+ )
+)
+
+func init() {
+ prometheus.MustRegister(sentBytes)
+ prometheus.MustRegister(receivedBytes)
+ prometheus.MustRegister(streamFailures)
+ prometheus.MustRegister(clientRequests)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/quota.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/quota.go
new file mode 100644
index 0000000..13bb83f
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/quota.go
@@ -0,0 +1,95 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/storage"
+)
+
+type quotaKVServer struct {
+ pb.KVServer
+ qa quotaAlarmer
+}
+
+type quotaAlarmer struct {
+ q storage.Quota
+ a Alarmer
+ id types.ID
+}
+
+// check whether request satisfies the quota. If there is not enough space,
+// ignore request and raise the free space alarm.
+func (qa *quotaAlarmer) check(ctx context.Context, r any) error {
+ if qa.q.Available(r) {
+ return nil
+ }
+ req := &pb.AlarmRequest{
+ MemberID: uint64(qa.id),
+ Action: pb.AlarmRequest_ACTIVATE,
+ Alarm: pb.AlarmType_NOSPACE,
+ }
+ qa.a.Alarm(ctx, req)
+ return rpctypes.ErrGRPCNoSpace
+}
+
+func NewQuotaKVServer(s *etcdserver.EtcdServer) pb.KVServer {
+ return "aKVServer{
+ NewKVServer(s),
+ quotaAlarmer{newBackendQuota(s, "kv"), s, s.MemberID()},
+ }
+}
+
+func (s *quotaKVServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
+ if err := s.qa.check(ctx, r); err != nil {
+ return nil, err
+ }
+ return s.KVServer.Put(ctx, r)
+}
+
+func (s *quotaKVServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
+ if err := s.qa.check(ctx, r); err != nil {
+ return nil, err
+ }
+ return s.KVServer.Txn(ctx, r)
+}
+
+type quotaLeaseServer struct {
+ pb.LeaseServer
+ qa quotaAlarmer
+}
+
+func (s *quotaLeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ if err := s.qa.check(ctx, cr); err != nil {
+ return nil, err
+ }
+ return s.LeaseServer.LeaseGrant(ctx, cr)
+}
+
+func NewQuotaLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
+ return "aLeaseServer{
+ NewLeaseServer(s),
+ quotaAlarmer{newBackendQuota(s, "lease"), s, s.MemberID()},
+ }
+}
+
+func newBackendQuota(s *etcdserver.EtcdServer, name string) storage.Quota {
+ return storage.NewBackendQuota(s.Logger(), s.Cfg.QuotaBackendBytes, s.Backend(), name)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/util.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/util.go
new file mode 100644
index 0000000..2354b0c
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/util.go
@@ -0,0 +1,151 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+ errorspkg "errors"
+ "strings"
+
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ "go.etcd.io/etcd/server/v3/etcdserver/version"
+ "go.etcd.io/etcd/server/v3/lease"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+var toGRPCErrorMap = map[error]error{
+ membership.ErrIDRemoved: rpctypes.ErrGRPCMemberNotFound,
+ membership.ErrIDNotFound: rpctypes.ErrGRPCMemberNotFound,
+ membership.ErrIDExists: rpctypes.ErrGRPCMemberExist,
+ membership.ErrPeerURLexists: rpctypes.ErrGRPCPeerURLExist,
+ membership.ErrMemberNotLearner: rpctypes.ErrGRPCMemberNotLearner,
+ membership.ErrTooManyLearners: rpctypes.ErrGRPCTooManyLearners,
+ errors.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
+ errors.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
+
+ mvcc.ErrCompacted: rpctypes.ErrGRPCCompacted,
+ mvcc.ErrFutureRev: rpctypes.ErrGRPCFutureRev,
+ errors.ErrRequestTooLarge: rpctypes.ErrGRPCRequestTooLarge,
+ errors.ErrNoSpace: rpctypes.ErrGRPCNoSpace,
+ errors.ErrTooManyRequests: rpctypes.ErrTooManyRequests,
+
+ errors.ErrNoLeader: rpctypes.ErrGRPCNoLeader,
+ errors.ErrNotLeader: rpctypes.ErrGRPCNotLeader,
+ errors.ErrLeaderChanged: rpctypes.ErrGRPCLeaderChanged,
+ errors.ErrStopped: rpctypes.ErrGRPCStopped,
+ errors.ErrTimeout: rpctypes.ErrGRPCTimeout,
+ errors.ErrTimeoutDueToLeaderFail: rpctypes.ErrGRPCTimeoutDueToLeaderFail,
+ errors.ErrTimeoutDueToConnectionLost: rpctypes.ErrGRPCTimeoutDueToConnectionLost,
+ errors.ErrTimeoutWaitAppliedIndex: rpctypes.ErrGRPCTimeoutWaitAppliedIndex,
+ errors.ErrUnhealthy: rpctypes.ErrGRPCUnhealthy,
+ errors.ErrKeyNotFound: rpctypes.ErrGRPCKeyNotFound,
+ errors.ErrCorrupt: rpctypes.ErrGRPCCorrupt,
+ errors.ErrBadLeaderTransferee: rpctypes.ErrGRPCBadLeaderTransferee,
+
+ errors.ErrClusterVersionUnavailable: rpctypes.ErrGRPCClusterVersionUnavailable,
+ errors.ErrWrongDowngradeVersionFormat: rpctypes.ErrGRPCWrongDowngradeVersionFormat,
+ version.ErrInvalidDowngradeTargetVersion: rpctypes.ErrGRPCInvalidDowngradeTargetVersion,
+ version.ErrDowngradeInProcess: rpctypes.ErrGRPCDowngradeInProcess,
+ version.ErrNoInflightDowngrade: rpctypes.ErrGRPCNoInflightDowngrade,
+
+ lease.ErrLeaseNotFound: rpctypes.ErrGRPCLeaseNotFound,
+ lease.ErrLeaseExists: rpctypes.ErrGRPCLeaseExist,
+ lease.ErrLeaseTTLTooLarge: rpctypes.ErrGRPCLeaseTTLTooLarge,
+
+ auth.ErrRootUserNotExist: rpctypes.ErrGRPCRootUserNotExist,
+ auth.ErrRootRoleNotExist: rpctypes.ErrGRPCRootRoleNotExist,
+ auth.ErrUserAlreadyExist: rpctypes.ErrGRPCUserAlreadyExist,
+ auth.ErrUserEmpty: rpctypes.ErrGRPCUserEmpty,
+ auth.ErrUserNotFound: rpctypes.ErrGRPCUserNotFound,
+ auth.ErrRoleAlreadyExist: rpctypes.ErrGRPCRoleAlreadyExist,
+ auth.ErrRoleNotFound: rpctypes.ErrGRPCRoleNotFound,
+ auth.ErrRoleEmpty: rpctypes.ErrGRPCRoleEmpty,
+ auth.ErrAuthFailed: rpctypes.ErrGRPCAuthFailed,
+ auth.ErrPermissionNotGiven: rpctypes.ErrGRPCPermissionNotGiven,
+ auth.ErrPermissionDenied: rpctypes.ErrGRPCPermissionDenied,
+ auth.ErrRoleNotGranted: rpctypes.ErrGRPCRoleNotGranted,
+ auth.ErrPermissionNotGranted: rpctypes.ErrGRPCPermissionNotGranted,
+ auth.ErrAuthNotEnabled: rpctypes.ErrGRPCAuthNotEnabled,
+ auth.ErrInvalidAuthToken: rpctypes.ErrGRPCInvalidAuthToken,
+ auth.ErrInvalidAuthMgmt: rpctypes.ErrGRPCInvalidAuthMgmt,
+ auth.ErrAuthOldRevision: rpctypes.ErrGRPCAuthOldRevision,
+
+ // In sync with status.FromContextError
+ context.Canceled: rpctypes.ErrGRPCCanceled,
+ context.DeadlineExceeded: rpctypes.ErrGRPCDeadlineExceeded,
+}
+
+func togRPCError(err error) error {
+ // let gRPC server convert to codes.Canceled, codes.DeadlineExceeded
+ if errorspkg.Is(err, context.Canceled) || errorspkg.Is(err, context.DeadlineExceeded) {
+ return err
+ }
+ grpcErr, ok := toGRPCErrorMap[err]
+ if !ok {
+ return status.Error(codes.Unknown, err.Error())
+ }
+ return grpcErr
+}
+
+func isClientCtxErr(ctxErr error, err error) bool {
+ if ctxErr != nil {
+ return true
+ }
+
+ ev, ok := status.FromError(err)
+ if !ok {
+ return false
+ }
+
+ switch ev.Code() {
+ case codes.Canceled, codes.DeadlineExceeded:
+ // client-side context cancel or deadline exceeded
+ // "rpc error: code = Canceled desc = context canceled"
+ // "rpc error: code = DeadlineExceeded desc = context deadline exceeded"
+ return true
+ case codes.Unavailable:
+ msg := ev.Message()
+ // client-side context cancel or deadline exceeded with TLS ("http2.errClientDisconnected")
+ // "rpc error: code = Unavailable desc = client disconnected"
+ if msg == "client disconnected" {
+ return true
+ }
+ // "grpc/transport.ClientTransport.CloseStream" on canceled streams
+ // "rpc error: code = Unavailable desc = stream error: stream ID 21; CANCEL")
+ if strings.HasPrefix(msg, "stream error: ") && strings.HasSuffix(msg, "; CANCEL") {
+ return true
+ }
+ }
+ return false
+}
+
+// in v3.4, learner is allowed to serve serializable read and endpoint status
+func isRPCSupportedForLearner(req any) bool {
+ switch r := req.(type) {
+ case *pb.StatusRequest:
+ return true
+ case *pb.RangeRequest:
+ return r.Serializable
+ default:
+ return false
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/watch.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/watch.go
new file mode 100644
index 0000000..d4a5bc3
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc/watch.go
@@ -0,0 +1,616 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v3rpc
+
+import (
+ "context"
+ "errors"
+ "io"
+ "math/rand"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/mvccpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/client/pkg/v3/verify"
+ clientv3 "go.etcd.io/etcd/client/v3"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver"
+ "go.etcd.io/etcd/server/v3/etcdserver/apply"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+const minWatchProgressInterval = 100 * time.Millisecond
+
+type watchServer struct {
+ lg *zap.Logger
+
+ clusterID int64
+ memberID int64
+
+ maxRequestBytes uint
+
+ sg apply.RaftStatusGetter
+ watchable mvcc.WatchableKV
+ ag AuthGetter
+}
+
+// NewWatchServer returns a new watch server.
+func NewWatchServer(s *etcdserver.EtcdServer) pb.WatchServer {
+ srv := &watchServer{
+ lg: s.Cfg.Logger,
+
+ clusterID: int64(s.Cluster().ID()),
+ memberID: int64(s.MemberID()),
+
+ maxRequestBytes: s.Cfg.MaxRequestBytesWithOverhead(),
+
+ sg: s,
+ watchable: s.Watchable(),
+ ag: s,
+ }
+ if srv.lg == nil {
+ srv.lg = zap.NewNop()
+ }
+ if s.Cfg.WatchProgressNotifyInterval > 0 {
+ if s.Cfg.WatchProgressNotifyInterval < minWatchProgressInterval {
+ srv.lg.Warn(
+ "adjusting watch progress notify interval to minimum period",
+ zap.Duration("min-watch-progress-notify-interval", minWatchProgressInterval),
+ )
+ s.Cfg.WatchProgressNotifyInterval = minWatchProgressInterval
+ }
+ SetProgressReportInterval(s.Cfg.WatchProgressNotifyInterval)
+ }
+ return srv
+}
+
+var (
+ // External test can read this with GetProgressReportInterval()
+ // and change this to a small value to finish fast with
+ // SetProgressReportInterval().
+ progressReportInterval = 10 * time.Minute
+ progressReportIntervalMu sync.RWMutex
+)
+
+// GetProgressReportInterval returns the current progress report interval (for testing).
+func GetProgressReportInterval() time.Duration {
+ progressReportIntervalMu.RLock()
+ interval := progressReportInterval
+ progressReportIntervalMu.RUnlock()
+
+ // add rand(1/10*progressReportInterval) as jitter so that etcdserver will not
+ // send progress notifications to watchers around the same time even when watchers
+ // are created around the same time (which is common when a client restarts itself).
+ jitter := time.Duration(rand.Int63n(int64(interval) / 10))
+
+ return interval + jitter
+}
+
+// SetProgressReportInterval updates the current progress report interval (for testing).
+func SetProgressReportInterval(newTimeout time.Duration) {
+ progressReportIntervalMu.Lock()
+ progressReportInterval = newTimeout
+ progressReportIntervalMu.Unlock()
+}
+
+// We send ctrl response inside the read loop. We do not want
+// send to block read, but we still want ctrl response we sent to
+// be serialized. Thus we use a buffered chan to solve the problem.
+// A small buffer should be OK for most cases, since we expect the
+// ctrl requests are infrequent.
+const ctrlStreamBufLen = 16
+
+// serverWatchStream is an etcd server side stream. It receives requests
+// from client side gRPC stream. It receives watch events from mvcc.WatchStream,
+// and creates responses that forwarded to gRPC stream.
+// It also forwards control message like watch created and canceled.
+type serverWatchStream struct {
+ lg *zap.Logger
+
+ clusterID int64
+ memberID int64
+
+ maxRequestBytes uint
+
+ sg apply.RaftStatusGetter
+ watchable mvcc.WatchableKV
+ ag AuthGetter
+
+ gRPCStream pb.Watch_WatchServer
+ watchStream mvcc.WatchStream
+ ctrlStream chan *pb.WatchResponse
+
+ // mu protects progress, prevKV, fragment
+ mu sync.RWMutex
+ // tracks the watchID that stream might need to send progress to
+ // TODO: combine progress and prevKV into a single struct?
+ progress map[mvcc.WatchID]bool
+ // record watch IDs that need return previous key-value pair
+ prevKV map[mvcc.WatchID]bool
+ // records fragmented watch IDs
+ fragment map[mvcc.WatchID]bool
+
+ // closec indicates the stream is closed.
+ closec chan struct{}
+
+ // wg waits for the send loop to complete
+ wg sync.WaitGroup
+}
+
+func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
+ sws := serverWatchStream{
+ lg: ws.lg,
+
+ clusterID: ws.clusterID,
+ memberID: ws.memberID,
+
+ maxRequestBytes: ws.maxRequestBytes,
+
+ sg: ws.sg,
+ watchable: ws.watchable,
+ ag: ws.ag,
+
+ gRPCStream: stream,
+ watchStream: ws.watchable.NewWatchStream(),
+ // chan for sending control response like watcher created and canceled.
+ ctrlStream: make(chan *pb.WatchResponse, ctrlStreamBufLen),
+
+ progress: make(map[mvcc.WatchID]bool),
+ prevKV: make(map[mvcc.WatchID]bool),
+ fragment: make(map[mvcc.WatchID]bool),
+
+ closec: make(chan struct{}),
+ }
+
+ sws.wg.Add(1)
+ go func() {
+ sws.sendLoop()
+ sws.wg.Done()
+ }()
+
+ errc := make(chan error, 1)
+ // Ideally recvLoop would also use sws.wg to signal its completion
+ // but when stream.Context().Done() is closed, the stream's recv
+ // may continue to block since it uses a different context, leading to
+ // deadlock when calling sws.close().
+ go func() {
+ if rerr := sws.recvLoop(); rerr != nil {
+ if isClientCtxErr(stream.Context().Err(), rerr) {
+ sws.lg.Debug("failed to receive watch request from gRPC stream", zap.Error(rerr))
+ } else {
+ sws.lg.Warn("failed to receive watch request from gRPC stream", zap.Error(rerr))
+ streamFailures.WithLabelValues("receive", "watch").Inc()
+ }
+ errc <- rerr
+ }
+ }()
+
+ // TODO: There's a race here. When a stream is closed (e.g. due to a cancellation),
+ // the underlying error (e.g. a gRPC stream error) may be returned and handled
+ // through errc if the recv goroutine finishes before the send goroutine.
+ // When the recv goroutine wins, the stream error is retained. When recv loses
+ // the race, the underlying error is lost (unless the root error is propagated
+ // through Context.Err() which is not always the case (as callers have to decide
+ // to implement a custom context to do so). The stdlib context package builtins
+ // may be insufficient to carry semantically useful errors around and should be
+ // revisited.
+ select {
+ case err = <-errc:
+ if errors.Is(err, context.Canceled) {
+ err = rpctypes.ErrGRPCWatchCanceled
+ }
+ close(sws.ctrlStream)
+ case <-stream.Context().Done():
+ err = stream.Context().Err()
+ if errors.Is(err, context.Canceled) {
+ err = rpctypes.ErrGRPCWatchCanceled
+ }
+ }
+
+ sws.close()
+ return err
+}
+
+func (sws *serverWatchStream) isWatchPermitted(wcr *pb.WatchCreateRequest) error {
+ authInfo, err := sws.ag.AuthInfoFromCtx(sws.gRPCStream.Context())
+ if err != nil {
+ return err
+ }
+ if authInfo == nil {
+ // if auth is enabled, IsRangePermitted() can cause an error
+ authInfo = &auth.AuthInfo{}
+ }
+ return sws.ag.AuthStore().IsRangePermitted(authInfo, wcr.Key, wcr.RangeEnd)
+}
+
+func (sws *serverWatchStream) recvLoop() error {
+ for {
+ req, err := sws.gRPCStream.Recv()
+ if errors.Is(err, io.EOF) {
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+
+ switch uv := req.RequestUnion.(type) {
+ case *pb.WatchRequest_CreateRequest:
+ if uv.CreateRequest == nil {
+ break
+ }
+
+ creq := uv.CreateRequest
+ if len(creq.Key) == 0 {
+ // \x00 is the smallest key
+ creq.Key = []byte{0}
+ }
+ if len(creq.RangeEnd) == 0 {
+ // force nil since watchstream.Watch distinguishes
+ // between nil and []byte{} for single key / >=
+ creq.RangeEnd = nil
+ }
+ if len(creq.RangeEnd) == 1 && creq.RangeEnd[0] == 0 {
+ // support >= key queries
+ creq.RangeEnd = []byte{}
+ }
+
+ err := sws.isWatchPermitted(creq)
+ if err != nil {
+ var cancelReason string
+ switch {
+ case errors.Is(err, auth.ErrInvalidAuthToken):
+ cancelReason = rpctypes.ErrGRPCInvalidAuthToken.Error()
+ case errors.Is(err, auth.ErrAuthOldRevision):
+ cancelReason = rpctypes.ErrGRPCAuthOldRevision.Error()
+ case errors.Is(err, auth.ErrUserEmpty):
+ cancelReason = rpctypes.ErrGRPCUserEmpty.Error()
+ default:
+ if !errors.Is(err, auth.ErrPermissionDenied) {
+ sws.lg.Error("unexpected error code", zap.Error(err))
+ }
+ cancelReason = rpctypes.ErrGRPCPermissionDenied.Error()
+ }
+
+ wr := &pb.WatchResponse{
+ Header: sws.newResponseHeader(sws.watchStream.Rev()),
+ WatchId: clientv3.InvalidWatchID,
+ Canceled: true,
+ Created: true,
+ CancelReason: cancelReason,
+ }
+
+ select {
+ case sws.ctrlStream <- wr:
+ continue
+ case <-sws.closec:
+ return nil
+ }
+ }
+
+ filters := FiltersFromRequest(creq)
+
+ id, err := sws.watchStream.Watch(mvcc.WatchID(creq.WatchId), creq.Key, creq.RangeEnd, creq.StartRevision, filters...)
+ if err == nil {
+ sws.mu.Lock()
+ if creq.ProgressNotify {
+ sws.progress[id] = true
+ }
+ if creq.PrevKv {
+ sws.prevKV[id] = true
+ }
+ if creq.Fragment {
+ sws.fragment[id] = true
+ }
+ sws.mu.Unlock()
+ } else {
+ id = clientv3.InvalidWatchID
+ }
+
+ wr := &pb.WatchResponse{
+ Header: sws.newResponseHeader(sws.watchStream.Rev()),
+ WatchId: int64(id),
+ Created: true,
+ Canceled: err != nil,
+ }
+ if err != nil {
+ wr.CancelReason = err.Error()
+ }
+ select {
+ case sws.ctrlStream <- wr:
+ case <-sws.closec:
+ return nil
+ }
+
+ case *pb.WatchRequest_CancelRequest:
+ if uv.CancelRequest != nil {
+ id := uv.CancelRequest.WatchId
+ err := sws.watchStream.Cancel(mvcc.WatchID(id))
+ if err == nil {
+ wr := &pb.WatchResponse{
+ Header: sws.newResponseHeader(sws.watchStream.Rev()),
+ WatchId: id,
+ Canceled: true,
+ }
+ select {
+ case sws.ctrlStream <- wr:
+ case <-sws.closec:
+ return nil
+ }
+
+ sws.mu.Lock()
+ delete(sws.progress, mvcc.WatchID(id))
+ delete(sws.prevKV, mvcc.WatchID(id))
+ delete(sws.fragment, mvcc.WatchID(id))
+ sws.mu.Unlock()
+ }
+ }
+ case *pb.WatchRequest_ProgressRequest:
+ if uv.ProgressRequest != nil {
+ sws.mu.Lock()
+ sws.watchStream.RequestProgressAll()
+ sws.mu.Unlock()
+ }
+ default:
+ // we probably should not shutdown the entire stream when
+ // receive an invalid command.
+ // so just do nothing instead.
+ sws.lg.Sugar().Infof("invalid watch request type %T received in gRPC stream", uv)
+ continue
+ }
+ }
+}
+
+func (sws *serverWatchStream) sendLoop() {
+ // watch ids that are currently active
+ ids := make(map[mvcc.WatchID]struct{})
+ // watch responses pending on a watch id creation message
+ pending := make(map[mvcc.WatchID][]*pb.WatchResponse)
+
+ interval := GetProgressReportInterval()
+ progressTicker := time.NewTicker(interval)
+
+ defer func() {
+ progressTicker.Stop()
+ // drain the chan to clean up pending events
+ for ws := range sws.watchStream.Chan() {
+ mvcc.ReportEventReceived(len(ws.Events))
+ }
+ for _, wrs := range pending {
+ for _, ws := range wrs {
+ mvcc.ReportEventReceived(len(ws.Events))
+ }
+ }
+ }()
+
+ for {
+ select {
+ case wresp, ok := <-sws.watchStream.Chan():
+ if !ok {
+ return
+ }
+
+ // TODO: evs is []mvccpb.Event type
+ // either return []*mvccpb.Event from the mvcc package
+ // or define protocol buffer with []mvccpb.Event.
+ evs := wresp.Events
+ events := make([]*mvccpb.Event, len(evs))
+ sws.mu.RLock()
+ needPrevKV := sws.prevKV[wresp.WatchID]
+ sws.mu.RUnlock()
+ for i := range evs {
+ events[i] = &evs[i]
+ if needPrevKV && !IsCreateEvent(evs[i]) {
+ opt := mvcc.RangeOptions{Rev: evs[i].Kv.ModRevision - 1}
+ r, err := sws.watchable.Range(context.TODO(), evs[i].Kv.Key, nil, opt)
+ if err == nil && len(r.KVs) != 0 {
+ events[i].PrevKv = &(r.KVs[0])
+ }
+ }
+ }
+
+ canceled := wresp.CompactRevision != 0
+ wr := &pb.WatchResponse{
+ Header: sws.newResponseHeader(wresp.Revision),
+ WatchId: int64(wresp.WatchID),
+ Events: events,
+ CompactRevision: wresp.CompactRevision,
+ Canceled: canceled,
+ }
+
+ // Progress notifications can have WatchID -1
+ // if they announce on behalf of multiple watchers
+ if wresp.WatchID != clientv3.InvalidWatchID {
+ if _, okID := ids[wresp.WatchID]; !okID {
+ // buffer if id not yet announced
+ wrs := append(pending[wresp.WatchID], wr)
+ pending[wresp.WatchID] = wrs
+ continue
+ }
+ }
+
+ mvcc.ReportEventReceived(len(evs))
+
+ sws.mu.RLock()
+ fragmented, ok := sws.fragment[wresp.WatchID]
+ sws.mu.RUnlock()
+
+ var serr error
+ // gofail: var beforeSendWatchResponse struct{}
+ if !fragmented && !ok {
+ serr = sws.gRPCStream.Send(wr)
+ } else {
+ serr = sendFragments(wr, sws.maxRequestBytes, sws.gRPCStream.Send)
+ }
+
+ if serr != nil {
+ if isClientCtxErr(sws.gRPCStream.Context().Err(), serr) {
+ sws.lg.Debug("failed to send watch response to gRPC stream", zap.Error(serr))
+ } else {
+ sws.lg.Warn("failed to send watch response to gRPC stream", zap.Error(serr))
+ streamFailures.WithLabelValues("send", "watch").Inc()
+ }
+ return
+ }
+
+ sws.mu.Lock()
+ if len(evs) > 0 && sws.progress[wresp.WatchID] {
+ // elide next progress update if sent a key update
+ sws.progress[wresp.WatchID] = false
+ }
+ sws.mu.Unlock()
+
+ case c, ok := <-sws.ctrlStream:
+ if !ok {
+ return
+ }
+
+ if err := sws.gRPCStream.Send(c); err != nil {
+ if isClientCtxErr(sws.gRPCStream.Context().Err(), err) {
+ sws.lg.Debug("failed to send watch control response to gRPC stream", zap.Error(err))
+ } else {
+ sws.lg.Warn("failed to send watch control response to gRPC stream", zap.Error(err))
+ streamFailures.WithLabelValues("send", "watch").Inc()
+ }
+ return
+ }
+
+ // track id creation
+ wid := mvcc.WatchID(c.WatchId)
+
+ verify.Assert(!(c.Canceled && c.Created) || wid == clientv3.InvalidWatchID, "unexpected watchId: %d, wanted: %d, since both 'Canceled' and 'Created' are true", wid, clientv3.InvalidWatchID)
+
+ if c.Canceled && wid != clientv3.InvalidWatchID {
+ delete(ids, wid)
+ continue
+ }
+ if c.Created {
+ // flush buffered events
+ ids[wid] = struct{}{}
+ for _, v := range pending[wid] {
+ mvcc.ReportEventReceived(len(v.Events))
+ if err := sws.gRPCStream.Send(v); err != nil {
+ if isClientCtxErr(sws.gRPCStream.Context().Err(), err) {
+ sws.lg.Debug("failed to send pending watch response to gRPC stream", zap.Error(err))
+ } else {
+ sws.lg.Warn("failed to send pending watch response to gRPC stream", zap.Error(err))
+ streamFailures.WithLabelValues("send", "watch").Inc()
+ }
+ return
+ }
+ }
+ delete(pending, wid)
+ }
+
+ case <-progressTicker.C:
+ sws.mu.Lock()
+ for id, ok := range sws.progress {
+ if ok {
+ sws.watchStream.RequestProgress(id)
+ }
+ sws.progress[id] = true
+ }
+ sws.mu.Unlock()
+
+ case <-sws.closec:
+ return
+ }
+ }
+}
+
+func IsCreateEvent(e mvccpb.Event) bool {
+ return e.Type == mvccpb.PUT && e.Kv.CreateRevision == e.Kv.ModRevision
+}
+
+func sendFragments(
+ wr *pb.WatchResponse,
+ maxRequestBytes uint,
+ sendFunc func(*pb.WatchResponse) error,
+) error {
+ // no need to fragment if total request size is smaller
+ // than max request limit or response contains only one event
+ if uint(wr.Size()) < maxRequestBytes || len(wr.Events) < 2 {
+ return sendFunc(wr)
+ }
+
+ ow := *wr
+ ow.Events = make([]*mvccpb.Event, 0)
+ ow.Fragment = true
+
+ var idx int
+ for {
+ cur := ow
+ for _, ev := range wr.Events[idx:] {
+ cur.Events = append(cur.Events, ev)
+ if len(cur.Events) > 1 && uint(cur.Size()) >= maxRequestBytes {
+ cur.Events = cur.Events[:len(cur.Events)-1]
+ break
+ }
+ idx++
+ }
+ if idx == len(wr.Events) {
+ // last response has no more fragment
+ cur.Fragment = false
+ }
+ if err := sendFunc(&cur); err != nil {
+ return err
+ }
+ if !cur.Fragment {
+ break
+ }
+ }
+ return nil
+}
+
+func (sws *serverWatchStream) close() {
+ sws.watchStream.Close()
+ close(sws.closec)
+ sws.wg.Wait()
+}
+
+func (sws *serverWatchStream) newResponseHeader(rev int64) *pb.ResponseHeader {
+ return &pb.ResponseHeader{
+ ClusterId: uint64(sws.clusterID),
+ MemberId: uint64(sws.memberID),
+ Revision: rev,
+ RaftTerm: sws.sg.Term(),
+ }
+}
+
+func filterNoDelete(e mvccpb.Event) bool {
+ return e.Type == mvccpb.DELETE
+}
+
+func filterNoPut(e mvccpb.Event) bool {
+ return e.Type == mvccpb.PUT
+}
+
+// FiltersFromRequest returns "mvcc.FilterFunc" from a given watch create request.
+func FiltersFromRequest(creq *pb.WatchCreateRequest) []mvcc.FilterFunc {
+ filters := make([]mvcc.FilterFunc, 0, len(creq.Filters))
+ for _, ft := range creq.Filters {
+ switch ft {
+ case pb.WatchCreateRequest_NOPUT:
+ filters = append(filters, filterNoPut)
+ case pb.WatchCreateRequest_NODELETE:
+ filters = append(filters, filterNoDelete)
+ default:
+ }
+ }
+ return filters
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/apply.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/apply.go
new file mode 100644
index 0000000..e45d53e
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/apply.go
@@ -0,0 +1,493 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package apply
+
+import (
+ "context"
+
+ "github.com/coreos/go-semver/semver"
+ "github.com/gogo/protobuf/proto"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/membershippb"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/traceutil"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver/api"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm"
+ "go.etcd.io/etcd/server/v3/etcdserver/cindex"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ mvcctxn "go.etcd.io/etcd/server/v3/etcdserver/txn"
+ "go.etcd.io/etcd/server/v3/etcdserver/version"
+ "go.etcd.io/etcd/server/v3/lease"
+ serverstorage "go.etcd.io/etcd/server/v3/storage"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+const (
+ v3Version = "v3"
+)
+
+// RaftStatusGetter represents etcd server and Raft progress.
+type RaftStatusGetter interface {
+ MemberID() types.ID
+ Leader() types.ID
+ CommittedIndex() uint64
+ AppliedIndex() uint64
+ Term() uint64
+}
+
+type Result struct {
+ Resp proto.Message
+ Err error
+ // Physc signals the physical effect of the request has completed in addition
+ // to being logically reflected by the node. Currently, only used for
+ // Compaction requests.
+ Physc <-chan struct{}
+ Trace *traceutil.Trace
+}
+
+type applyFunc func(r *pb.InternalRaftRequest) *Result
+
+// applierV3 is the interface for processing V3 raft messages
+type applierV3 interface {
+ // Apply executes the generic portion of application logic for the current applier, but
+ // delegates the actual execution to the applyFunc method.
+ Apply(r *pb.InternalRaftRequest, applyFunc applyFunc) *Result
+
+ Put(p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error)
+ Range(r *pb.RangeRequest) (*pb.RangeResponse, *traceutil.Trace, error)
+ DeleteRange(dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, *traceutil.Trace, error)
+ Txn(rt *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error)
+ Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error)
+
+ LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
+ LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
+
+ LeaseCheckpoint(lc *pb.LeaseCheckpointRequest) (*pb.LeaseCheckpointResponse, error)
+
+ Alarm(*pb.AlarmRequest) (*pb.AlarmResponse, error)
+
+ Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error)
+
+ AuthEnable() (*pb.AuthEnableResponse, error)
+ AuthDisable() (*pb.AuthDisableResponse, error)
+ AuthStatus() (*pb.AuthStatusResponse, error)
+
+ UserAdd(ua *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
+ UserDelete(ua *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
+ UserChangePassword(ua *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
+ UserGrantRole(ua *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
+ UserGet(ua *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
+ UserRevokeRole(ua *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
+ RoleAdd(ua *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
+ RoleGrantPermission(ua *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
+ RoleGet(ua *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
+ RoleRevokePermission(ua *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
+ RoleDelete(ua *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
+ UserList(ua *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
+ RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
+}
+
+type SnapshotServer interface {
+ ForceSnapshot()
+}
+
+type applierV3backend struct {
+ lg *zap.Logger
+ kv mvcc.KV
+ alarmStore *v3alarm.AlarmStore
+ authStore auth.AuthStore
+ lessor lease.Lessor
+ cluster *membership.RaftCluster
+ raftStatus RaftStatusGetter
+ snapshotServer SnapshotServer
+ consistentIndex cindex.ConsistentIndexer
+
+ txnModeWriteWithSharedBuffer bool
+}
+
+func newApplierV3Backend(
+ lg *zap.Logger,
+ kv mvcc.KV,
+ alarmStore *v3alarm.AlarmStore,
+ authStore auth.AuthStore,
+ lessor lease.Lessor,
+ cluster *membership.RaftCluster,
+ raftStatus RaftStatusGetter,
+ snapshotServer SnapshotServer,
+ consistentIndex cindex.ConsistentIndexer,
+ txnModeWriteWithSharedBuffer bool,
+) applierV3 {
+ return &applierV3backend{
+ lg: lg,
+ kv: kv,
+ alarmStore: alarmStore,
+ authStore: authStore,
+ lessor: lessor,
+ cluster: cluster,
+ raftStatus: raftStatus,
+ snapshotServer: snapshotServer,
+ consistentIndex: consistentIndex,
+ txnModeWriteWithSharedBuffer: txnModeWriteWithSharedBuffer,
+ }
+}
+
+func (a *applierV3backend) Apply(r *pb.InternalRaftRequest, applyFunc applyFunc) *Result {
+ return applyFunc(r)
+}
+
+func (a *applierV3backend) Put(p *pb.PutRequest) (resp *pb.PutResponse, trace *traceutil.Trace, err error) {
+ return mvcctxn.Put(context.TODO(), a.lg, a.lessor, a.kv, p)
+}
+
+func (a *applierV3backend) DeleteRange(dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, *traceutil.Trace, error) {
+ return mvcctxn.DeleteRange(context.TODO(), a.lg, a.kv, dr)
+}
+
+func (a *applierV3backend) Range(r *pb.RangeRequest) (*pb.RangeResponse, *traceutil.Trace, error) {
+ return mvcctxn.Range(context.TODO(), a.lg, a.kv, r)
+}
+
+func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
+ return mvcctxn.Txn(context.TODO(), a.lg, rt, a.txnModeWriteWithSharedBuffer, a.kv, a.lessor)
+}
+
+func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error) {
+ resp := &pb.CompactionResponse{}
+ resp.Header = &pb.ResponseHeader{}
+ trace := traceutil.New("compact",
+ a.lg,
+ traceutil.Field{Key: "revision", Value: compaction.Revision},
+ )
+
+ ch, err := a.kv.Compact(trace, compaction.Revision)
+ if err != nil {
+ return nil, ch, nil, err
+ }
+ // get the current revision. which key to get is not important.
+ rr, _ := a.kv.Range(context.TODO(), []byte("compaction"), nil, mvcc.RangeOptions{})
+ resp.Header.Revision = rr.Rev
+ return resp, ch, trace, err
+}
+
+func (a *applierV3backend) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ l, err := a.lessor.Grant(lease.LeaseID(lc.ID), lc.TTL)
+ resp := &pb.LeaseGrantResponse{}
+ if err == nil {
+ resp.ID = int64(l.ID)
+ resp.TTL = l.TTL()
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
+ err := a.lessor.Revoke(lease.LeaseID(lc.ID))
+ return &pb.LeaseRevokeResponse{Header: a.newHeader()}, err
+}
+
+func (a *applierV3backend) LeaseCheckpoint(lc *pb.LeaseCheckpointRequest) (*pb.LeaseCheckpointResponse, error) {
+ for _, c := range lc.Checkpoints {
+ err := a.lessor.Checkpoint(lease.LeaseID(c.ID), c.Remaining_TTL)
+ if err != nil {
+ return &pb.LeaseCheckpointResponse{Header: a.newHeader()}, err
+ }
+ }
+ return &pb.LeaseCheckpointResponse{Header: a.newHeader()}, nil
+}
+
+func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
+ resp := &pb.AlarmResponse{}
+
+ switch ar.Action {
+ case pb.AlarmRequest_GET:
+ resp.Alarms = a.alarmStore.Get(ar.Alarm)
+ case pb.AlarmRequest_ACTIVATE:
+ if ar.Alarm == pb.AlarmType_NONE {
+ break
+ }
+ m := a.alarmStore.Activate(types.ID(ar.MemberID), ar.Alarm)
+ if m == nil {
+ break
+ }
+ resp.Alarms = append(resp.Alarms, m)
+ alarms.WithLabelValues(types.ID(ar.MemberID).String(), m.Alarm.String()).Inc()
+ case pb.AlarmRequest_DEACTIVATE:
+ m := a.alarmStore.Deactivate(types.ID(ar.MemberID), ar.Alarm)
+ if m == nil {
+ break
+ }
+ resp.Alarms = append(resp.Alarms, m)
+ alarms.WithLabelValues(types.ID(ar.MemberID).String(), m.Alarm.String()).Dec()
+ default:
+ return nil, nil
+ }
+ return resp, nil
+}
+
+type applierV3Capped struct {
+ applierV3
+ q serverstorage.BackendQuota
+}
+
+// newApplierV3Capped creates an applyV3 that will reject Puts and transactions
+// with Puts so that the number of keys in the store is capped.
+func newApplierV3Capped(base applierV3) applierV3 { return &applierV3Capped{applierV3: base} }
+
+func (a *applierV3Capped) Put(_ *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
+ return nil, nil, errors.ErrNoSpace
+}
+
+func (a *applierV3Capped) Txn(r *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
+ if a.q.Cost(r) > 0 {
+ return nil, nil, errors.ErrNoSpace
+ }
+ return a.applierV3.Txn(r)
+}
+
+func (a *applierV3Capped) LeaseGrant(_ *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ return nil, errors.ErrNoSpace
+}
+
+func (a *applierV3backend) AuthEnable() (*pb.AuthEnableResponse, error) {
+ err := a.authStore.AuthEnable()
+ if err != nil {
+ return nil, err
+ }
+ return &pb.AuthEnableResponse{Header: a.newHeader()}, nil
+}
+
+func (a *applierV3backend) AuthDisable() (*pb.AuthDisableResponse, error) {
+ a.authStore.AuthDisable()
+ return &pb.AuthDisableResponse{Header: a.newHeader()}, nil
+}
+
+func (a *applierV3backend) AuthStatus() (*pb.AuthStatusResponse, error) {
+ enabled := a.authStore.IsAuthEnabled()
+ authRevision := a.authStore.Revision()
+ return &pb.AuthStatusResponse{Header: a.newHeader(), Enabled: enabled, AuthRevision: authRevision}, nil
+}
+
+func (a *applierV3backend) Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error) {
+ ctx := context.WithValue(context.WithValue(context.Background(), auth.AuthenticateParamIndex{}, a.consistentIndex.ConsistentIndex()), auth.AuthenticateParamSimpleTokenPrefix{}, r.SimpleToken)
+ resp, err := a.authStore.Authenticate(ctx, r.Name, r.Password)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
+ resp, err := a.authStore.UserAdd(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
+ resp, err := a.authStore.UserDelete(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
+ resp, err := a.authStore.UserChangePassword(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
+ resp, err := a.authStore.UserGrantRole(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
+ resp, err := a.authStore.UserGet(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
+ resp, err := a.authStore.UserRevokeRole(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
+ resp, err := a.authStore.RoleAdd(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
+ resp, err := a.authStore.RoleGrantPermission(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
+ resp, err := a.authStore.RoleGet(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
+ resp, err := a.authStore.RoleRevokePermission(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
+ resp, err := a.authStore.RoleDelete(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
+ resp, err := a.authStore.UserList(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
+ resp, err := a.authStore.RoleList(r)
+ if resp != nil {
+ resp.Header = a.newHeader()
+ }
+ return resp, err
+}
+
+type applierMembership struct {
+ lg *zap.Logger
+ cluster *membership.RaftCluster
+ snapshotServer SnapshotServer
+}
+
+func NewApplierMembership(lg *zap.Logger, cluster *membership.RaftCluster, snapshotServer SnapshotServer) *applierMembership {
+ return &applierMembership{
+ lg: lg,
+ cluster: cluster,
+ snapshotServer: snapshotServer,
+ }
+}
+
+func (a *applierMembership) ClusterVersionSet(r *membershippb.ClusterVersionSetRequest, shouldApplyV3 membership.ShouldApplyV3) {
+ prevVersion := a.cluster.Version()
+ newVersion := semver.Must(semver.NewVersion(r.Ver))
+ a.cluster.SetVersion(newVersion, api.UpdateCapability, shouldApplyV3)
+ // Force snapshot after cluster version downgrade.
+ if prevVersion != nil && newVersion.LessThan(*prevVersion) {
+ lg := a.lg
+ if lg != nil {
+ lg.Info("Cluster version downgrade detected, forcing snapshot",
+ zap.String("prev-cluster-version", prevVersion.String()),
+ zap.String("new-cluster-version", newVersion.String()),
+ )
+ }
+ a.snapshotServer.ForceSnapshot()
+ }
+}
+
+func (a *applierMembership) ClusterMemberAttrSet(r *membershippb.ClusterMemberAttrSetRequest, shouldApplyV3 membership.ShouldApplyV3) {
+ a.cluster.UpdateAttributes(
+ types.ID(r.Member_ID),
+ membership.Attributes{
+ Name: r.MemberAttributes.Name,
+ ClientURLs: r.MemberAttributes.ClientUrls,
+ },
+ shouldApplyV3,
+ )
+}
+
+func (a *applierMembership) DowngradeInfoSet(r *membershippb.DowngradeInfoSetRequest, shouldApplyV3 membership.ShouldApplyV3) {
+ d := version.DowngradeInfo{Enabled: false}
+ if r.Enabled {
+ d = version.DowngradeInfo{Enabled: true, TargetVersion: r.Ver}
+ }
+ a.cluster.SetDowngradeInfo(&d, shouldApplyV3)
+}
+
+type quotaApplierV3 struct {
+ applierV3
+ q serverstorage.Quota
+}
+
+func newQuotaApplierV3(lg *zap.Logger, quotaBackendBytesCfg int64, be backend.Backend, app applierV3) applierV3 {
+ return "aApplierV3{app, serverstorage.NewBackendQuota(lg, quotaBackendBytesCfg, be, "v3-applier")}
+}
+
+func (a *quotaApplierV3) Put(p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
+ ok := a.q.Available(p)
+ resp, trace, err := a.applierV3.Put(p)
+ if err == nil && !ok {
+ err = errors.ErrNoSpace
+ }
+ return resp, trace, err
+}
+
+func (a *quotaApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
+ ok := a.q.Available(rt)
+ resp, trace, err := a.applierV3.Txn(rt)
+ if err == nil && !ok {
+ err = errors.ErrNoSpace
+ }
+ return resp, trace, err
+}
+
+func (a *quotaApplierV3) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ ok := a.q.Available(lc)
+ resp, err := a.applierV3.LeaseGrant(lc)
+ if err == nil && !ok {
+ err = errors.ErrNoSpace
+ }
+ return resp, err
+}
+
+func (a *applierV3backend) newHeader() *pb.ResponseHeader {
+ return &pb.ResponseHeader{
+ ClusterId: uint64(a.cluster.ID()),
+ MemberId: uint64(a.raftStatus.MemberID()),
+ Revision: a.kv.Rev(),
+ RaftTerm: a.raftStatus.Term(),
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/apply_auth.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/apply_auth.go
new file mode 100644
index 0000000..3922dee
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/apply_auth.go
@@ -0,0 +1,201 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package apply
+
+import (
+ "sync"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/pkg/v3/traceutil"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver/txn"
+ "go.etcd.io/etcd/server/v3/lease"
+)
+
+type authApplierV3 struct {
+ applierV3
+ as auth.AuthStore
+ lessor lease.Lessor
+
+ // mu serializes Apply so that user isn't corrupted and so that
+ // serialized requests don't leak data from TOCTOU errors
+ mu sync.Mutex
+
+ authInfo auth.AuthInfo
+}
+
+func newAuthApplierV3(as auth.AuthStore, base applierV3, lessor lease.Lessor) *authApplierV3 {
+ return &authApplierV3{applierV3: base, as: as, lessor: lessor}
+}
+
+func (aa *authApplierV3) Apply(r *pb.InternalRaftRequest, applyFunc applyFunc) *Result {
+ aa.mu.Lock()
+ defer aa.mu.Unlock()
+ if r.Header != nil {
+ // backward-compatible with pre-3.0 releases when internalRaftRequest
+ // does not have header field
+ aa.authInfo.Username = r.Header.Username
+ aa.authInfo.Revision = r.Header.AuthRevision
+ }
+ if needAdminPermission(r) {
+ if err := aa.as.IsAdminPermitted(&aa.authInfo); err != nil {
+ aa.authInfo.Username = ""
+ aa.authInfo.Revision = 0
+ return &Result{Err: err}
+ }
+ }
+ ret := aa.applierV3.Apply(r, applyFunc)
+ aa.authInfo.Username = ""
+ aa.authInfo.Revision = 0
+ return ret
+}
+
+func (aa *authApplierV3) Put(r *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
+ if err := aa.as.IsPutPermitted(&aa.authInfo, r.Key); err != nil {
+ return nil, nil, err
+ }
+
+ if err := aa.checkLeasePuts(lease.LeaseID(r.Lease)); err != nil {
+ // The specified lease is already attached with a key that cannot
+ // be written by this user. It means the user cannot revoke the
+ // lease so attaching the lease to the newly written key should
+ // be forbidden.
+ return nil, nil, err
+ }
+
+ if r.PrevKv {
+ err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, nil)
+ if err != nil {
+ return nil, nil, err
+ }
+ }
+ return aa.applierV3.Put(r)
+}
+
+func (aa *authApplierV3) Range(r *pb.RangeRequest) (*pb.RangeResponse, *traceutil.Trace, error) {
+ if err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, r.RangeEnd); err != nil {
+ return nil, nil, err
+ }
+ return aa.applierV3.Range(r)
+}
+
+func (aa *authApplierV3) DeleteRange(r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, *traceutil.Trace, error) {
+ if err := aa.as.IsDeleteRangePermitted(&aa.authInfo, r.Key, r.RangeEnd); err != nil {
+ return nil, nil, err
+ }
+ if r.PrevKv {
+ err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, r.RangeEnd)
+ if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ return aa.applierV3.DeleteRange(r)
+}
+
+func (aa *authApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
+ if err := txn.CheckTxnAuth(aa.as, &aa.authInfo, rt); err != nil {
+ return nil, nil, err
+ }
+ return aa.applierV3.Txn(rt)
+}
+
+func (aa *authApplierV3) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
+ if err := aa.checkLeasePuts(lease.LeaseID(lc.ID)); err != nil {
+ return nil, err
+ }
+ return aa.applierV3.LeaseRevoke(lc)
+}
+
+func (aa *authApplierV3) checkLeasePuts(leaseID lease.LeaseID) error {
+ l := aa.lessor.Lookup(leaseID)
+ if l != nil {
+ return aa.checkLeasePutsKeys(l)
+ }
+
+ return nil
+}
+
+func (aa *authApplierV3) checkLeasePutsKeys(l *lease.Lease) error {
+ // early return for most-common scenario of either disabled auth or admin user.
+ // IsAdminPermitted also checks whether auth is enabled
+ if err := aa.as.IsAdminPermitted(&aa.authInfo); err == nil {
+ return nil
+ }
+
+ for _, key := range l.Keys() {
+ if err := aa.as.IsPutPermitted(&aa.authInfo, []byte(key)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (aa *authApplierV3) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
+ err := aa.as.IsAdminPermitted(&aa.authInfo)
+ if err != nil && r.Name != aa.authInfo.Username {
+ aa.authInfo.Username = ""
+ aa.authInfo.Revision = 0
+ return &pb.AuthUserGetResponse{}, err
+ }
+
+ return aa.applierV3.UserGet(r)
+}
+
+func (aa *authApplierV3) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
+ err := aa.as.IsAdminPermitted(&aa.authInfo)
+ if err != nil && !aa.as.HasRole(aa.authInfo.Username, r.Role) {
+ aa.authInfo.Username = ""
+ aa.authInfo.Revision = 0
+ return &pb.AuthRoleGetResponse{}, err
+ }
+
+ return aa.applierV3.RoleGet(r)
+}
+
+func needAdminPermission(r *pb.InternalRaftRequest) bool {
+ switch {
+ case r.AuthEnable != nil:
+ return true
+ case r.AuthDisable != nil:
+ return true
+ case r.AuthStatus != nil:
+ return true
+ case r.AuthUserAdd != nil:
+ return true
+ case r.AuthUserDelete != nil:
+ return true
+ case r.AuthUserChangePassword != nil:
+ return true
+ case r.AuthUserGrantRole != nil:
+ return true
+ case r.AuthUserRevokeRole != nil:
+ return true
+ case r.AuthRoleAdd != nil:
+ return true
+ case r.AuthRoleGrantPermission != nil:
+ return true
+ case r.AuthRoleRevokePermission != nil:
+ return true
+ case r.AuthRoleDelete != nil:
+ return true
+ case r.AuthUserList != nil:
+ return true
+ case r.AuthRoleList != nil:
+ return true
+ default:
+ return false
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/corrupt.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/corrupt.go
new file mode 100644
index 0000000..c198119
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/corrupt.go
@@ -0,0 +1,55 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package apply
+
+import (
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/pkg/v3/traceutil"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+)
+
+type applierV3Corrupt struct {
+ applierV3
+}
+
+func newApplierV3Corrupt(a applierV3) *applierV3Corrupt { return &applierV3Corrupt{a} }
+
+func (a *applierV3Corrupt) Put(_ *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
+ return nil, nil, errors.ErrCorrupt
+}
+
+func (a *applierV3Corrupt) Range(_ *pb.RangeRequest) (*pb.RangeResponse, *traceutil.Trace, error) {
+ return nil, nil, errors.ErrCorrupt
+}
+
+func (a *applierV3Corrupt) DeleteRange(_ *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, *traceutil.Trace, error) {
+ return nil, nil, errors.ErrCorrupt
+}
+
+func (a *applierV3Corrupt) Txn(_ *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
+ return nil, nil, errors.ErrCorrupt
+}
+
+func (a *applierV3Corrupt) Compaction(_ *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error) {
+ return nil, nil, nil, errors.ErrCorrupt
+}
+
+func (a *applierV3Corrupt) LeaseGrant(_ *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ return nil, errors.ErrCorrupt
+}
+
+func (a *applierV3Corrupt) LeaseRevoke(_ *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
+ return nil, errors.ErrCorrupt
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/metrics.go
new file mode 100644
index 0000000..cafbdc7
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/metrics.go
@@ -0,0 +1,31 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package apply
+
+import "github.com/prometheus/client_golang/prometheus"
+
+var alarms = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "server",
+ Name: "alarms",
+ Help: "Alarms for every member in cluster. 1 for 'server_id' label with current ID. 2 for 'alarm_type' label with type of this alarm",
+ },
+ []string{"server_id", "alarm_type"},
+)
+
+func init() {
+ prometheus.MustRegister(alarms)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/uber_applier.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/uber_applier.go
new file mode 100644
index 0000000..ec7e2aa
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply/uber_applier.go
@@ -0,0 +1,228 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package apply
+
+import (
+ "errors"
+ "time"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm"
+ "go.etcd.io/etcd/server/v3/etcdserver/cindex"
+ "go.etcd.io/etcd/server/v3/etcdserver/txn"
+ "go.etcd.io/etcd/server/v3/lease"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+type UberApplier interface {
+ Apply(r *pb.InternalRaftRequest) *Result
+}
+
+type uberApplier struct {
+ lg *zap.Logger
+
+ alarmStore *v3alarm.AlarmStore
+ warningApplyDuration time.Duration
+
+ // This is the applier that is taking in consideration current alarms
+ applyV3 applierV3
+
+ // This is the applier used for wrapping when alarms change
+ applyV3base applierV3
+}
+
+func NewUberApplier(
+ lg *zap.Logger,
+ be backend.Backend,
+ kv mvcc.KV,
+ alarmStore *v3alarm.AlarmStore,
+ authStore auth.AuthStore,
+ lessor lease.Lessor,
+ cluster *membership.RaftCluster,
+ raftStatus RaftStatusGetter,
+ snapshotServer SnapshotServer,
+ consistentIndex cindex.ConsistentIndexer,
+ warningApplyDuration time.Duration,
+ txnModeWriteWithSharedBuffer bool,
+ quotaBackendBytesCfg int64,
+) UberApplier {
+ applyV3base := newApplierV3(lg, be, kv, alarmStore, authStore, lessor, cluster, raftStatus, snapshotServer, consistentIndex, txnModeWriteWithSharedBuffer, quotaBackendBytesCfg)
+
+ ua := &uberApplier{
+ lg: lg,
+ alarmStore: alarmStore,
+ warningApplyDuration: warningApplyDuration,
+ applyV3: applyV3base,
+ applyV3base: applyV3base,
+ }
+ ua.restoreAlarms()
+ return ua
+}
+
+func newApplierV3(
+ lg *zap.Logger,
+ be backend.Backend,
+ kv mvcc.KV,
+ alarmStore *v3alarm.AlarmStore,
+ authStore auth.AuthStore,
+ lessor lease.Lessor,
+ cluster *membership.RaftCluster,
+ raftStatus RaftStatusGetter,
+ snapshotServer SnapshotServer,
+ consistentIndex cindex.ConsistentIndexer,
+ txnModeWriteWithSharedBuffer bool,
+ quotaBackendBytesCfg int64,
+) applierV3 {
+ applierBackend := newApplierV3Backend(lg, kv, alarmStore, authStore, lessor, cluster, raftStatus, snapshotServer, consistentIndex, txnModeWriteWithSharedBuffer)
+ return newAuthApplierV3(
+ authStore,
+ newQuotaApplierV3(lg, quotaBackendBytesCfg, be, applierBackend),
+ lessor,
+ )
+}
+
+func (a *uberApplier) restoreAlarms() {
+ noSpaceAlarms := len(a.alarmStore.Get(pb.AlarmType_NOSPACE)) > 0
+ corruptAlarms := len(a.alarmStore.Get(pb.AlarmType_CORRUPT)) > 0
+ a.applyV3 = a.applyV3base
+ if noSpaceAlarms {
+ a.applyV3 = newApplierV3Capped(a.applyV3)
+ }
+ if corruptAlarms {
+ a.applyV3 = newApplierV3Corrupt(a.applyV3)
+ }
+}
+
+func (a *uberApplier) Apply(r *pb.InternalRaftRequest) *Result {
+ // We first execute chain of Apply() calls down the hierarchy:
+ // (i.e. CorruptApplier -> CappedApplier -> Auth -> Quota -> Backend),
+ // then dispatch() unpacks the request to a specific method (like Put),
+ // that gets executed down the hierarchy again:
+ // i.e. CorruptApplier.Put(CappedApplier.Put(...(BackendApplier.Put(...)))).
+ return a.applyV3.Apply(r, a.dispatch)
+}
+
+// dispatch translates the request (r) into appropriate call (like Put) on
+// the underlying applyV3 object.
+func (a *uberApplier) dispatch(r *pb.InternalRaftRequest) *Result {
+ op := "unknown"
+ ar := &Result{}
+ defer func(start time.Time) {
+ success := ar.Err == nil || errors.Is(ar.Err, mvcc.ErrCompacted)
+ txn.ApplySecObserve(v3Version, op, success, time.Since(start))
+ txn.WarnOfExpensiveRequest(a.lg, a.warningApplyDuration, start, &pb.InternalRaftStringer{Request: r}, ar.Resp, ar.Err)
+ if !success {
+ txn.WarnOfFailedRequest(a.lg, start, &pb.InternalRaftStringer{Request: r}, ar.Resp, ar.Err)
+ }
+ }(time.Now())
+
+ switch {
+ case r.Range != nil:
+ op = "Range"
+ ar.Resp, ar.Trace, ar.Err = a.applyV3.Range(r.Range)
+ case r.Put != nil:
+ op = "Put"
+ ar.Resp, ar.Trace, ar.Err = a.applyV3.Put(r.Put)
+ case r.DeleteRange != nil:
+ op = "DeleteRange"
+ ar.Resp, ar.Trace, ar.Err = a.applyV3.DeleteRange(r.DeleteRange)
+ case r.Txn != nil:
+ op = "Txn"
+ ar.Resp, ar.Trace, ar.Err = a.applyV3.Txn(r.Txn)
+ case r.Compaction != nil:
+ op = "Compaction"
+ ar.Resp, ar.Physc, ar.Trace, ar.Err = a.applyV3.Compaction(r.Compaction)
+ case r.LeaseGrant != nil:
+ op = "LeaseGrant"
+ ar.Resp, ar.Err = a.applyV3.LeaseGrant(r.LeaseGrant)
+ case r.LeaseRevoke != nil:
+ op = "LeaseRevoke"
+ ar.Resp, ar.Err = a.applyV3.LeaseRevoke(r.LeaseRevoke)
+ case r.LeaseCheckpoint != nil:
+ op = "LeaseCheckpoint"
+ ar.Resp, ar.Err = a.applyV3.LeaseCheckpoint(r.LeaseCheckpoint)
+ case r.Alarm != nil:
+ op = "Alarm"
+ ar.Resp, ar.Err = a.Alarm(r.Alarm)
+ case r.Authenticate != nil:
+ op = "Authenticate"
+ ar.Resp, ar.Err = a.applyV3.Authenticate(r.Authenticate)
+ case r.AuthEnable != nil:
+ op = "AuthEnable"
+ ar.Resp, ar.Err = a.applyV3.AuthEnable()
+ case r.AuthDisable != nil:
+ op = "AuthDisable"
+ ar.Resp, ar.Err = a.applyV3.AuthDisable()
+ case r.AuthStatus != nil:
+ ar.Resp, ar.Err = a.applyV3.AuthStatus()
+ case r.AuthUserAdd != nil:
+ op = "AuthUserAdd"
+ ar.Resp, ar.Err = a.applyV3.UserAdd(r.AuthUserAdd)
+ case r.AuthUserDelete != nil:
+ op = "AuthUserDelete"
+ ar.Resp, ar.Err = a.applyV3.UserDelete(r.AuthUserDelete)
+ case r.AuthUserChangePassword != nil:
+ op = "AuthUserChangePassword"
+ ar.Resp, ar.Err = a.applyV3.UserChangePassword(r.AuthUserChangePassword)
+ case r.AuthUserGrantRole != nil:
+ op = "AuthUserGrantRole"
+ ar.Resp, ar.Err = a.applyV3.UserGrantRole(r.AuthUserGrantRole)
+ case r.AuthUserGet != nil:
+ op = "AuthUserGet"
+ ar.Resp, ar.Err = a.applyV3.UserGet(r.AuthUserGet)
+ case r.AuthUserRevokeRole != nil:
+ op = "AuthUserRevokeRole"
+ ar.Resp, ar.Err = a.applyV3.UserRevokeRole(r.AuthUserRevokeRole)
+ case r.AuthRoleAdd != nil:
+ op = "AuthRoleAdd"
+ ar.Resp, ar.Err = a.applyV3.RoleAdd(r.AuthRoleAdd)
+ case r.AuthRoleGrantPermission != nil:
+ op = "AuthRoleGrantPermission"
+ ar.Resp, ar.Err = a.applyV3.RoleGrantPermission(r.AuthRoleGrantPermission)
+ case r.AuthRoleGet != nil:
+ op = "AuthRoleGet"
+ ar.Resp, ar.Err = a.applyV3.RoleGet(r.AuthRoleGet)
+ case r.AuthRoleRevokePermission != nil:
+ op = "AuthRoleRevokePermission"
+ ar.Resp, ar.Err = a.applyV3.RoleRevokePermission(r.AuthRoleRevokePermission)
+ case r.AuthRoleDelete != nil:
+ op = "AuthRoleDelete"
+ ar.Resp, ar.Err = a.applyV3.RoleDelete(r.AuthRoleDelete)
+ case r.AuthUserList != nil:
+ op = "AuthUserList"
+ ar.Resp, ar.Err = a.applyV3.UserList(r.AuthUserList)
+ case r.AuthRoleList != nil:
+ op = "AuthRoleList"
+ ar.Resp, ar.Err = a.applyV3.RoleList(r.AuthRoleList)
+ default:
+ a.lg.Panic("not implemented apply", zap.Stringer("raft-request", r))
+ }
+ return ar
+}
+
+func (a *uberApplier) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
+ resp, err := a.applyV3.Alarm(ar)
+
+ if ar.Action == pb.AlarmRequest_ACTIVATE ||
+ ar.Action == pb.AlarmRequest_DEACTIVATE {
+ a.restoreAlarms()
+ }
+ return resp, err
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply_v2.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply_v2.go
new file mode 100644
index 0000000..60442fc
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/apply_v2.go
@@ -0,0 +1,64 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "encoding/json"
+ "net/http"
+ "path"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/membershippb"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+)
+
+func v2ToV3Request(lg *zap.Logger, r *RequestV2) pb.InternalRaftRequest {
+ if r.Method != http.MethodPut || (!storeMemberAttributeRegexp.MatchString(r.Path) && r.Path != membership.StoreClusterVersionKey()) {
+ lg.Panic("detected disallowed v2 WAL for stage --v2-deprecation=write-only", zap.String("method", r.Method))
+ }
+ if storeMemberAttributeRegexp.MatchString(r.Path) {
+ id := membership.MustParseMemberIDFromKey(lg, path.Dir(r.Path))
+ var attr membership.Attributes
+ if err := json.Unmarshal([]byte(r.Val), &attr); err != nil {
+ lg.Panic("failed to unmarshal", zap.String("value", r.Val), zap.Error(err))
+ }
+ return pb.InternalRaftRequest{
+ Header: &pb.RequestHeader{
+ ID: r.ID,
+ },
+ ClusterMemberAttrSet: &membershippb.ClusterMemberAttrSetRequest{
+ Member_ID: uint64(id),
+ MemberAttributes: &membershippb.Attributes{
+ Name: attr.Name,
+ ClientUrls: attr.ClientURLs,
+ },
+ },
+ }
+ }
+ if r.Path == membership.StoreClusterVersionKey() {
+ return pb.InternalRaftRequest{
+ Header: &pb.RequestHeader{
+ ID: r.ID,
+ },
+ ClusterVersionSet: &membershippb.ClusterVersionSetRequest{
+ Ver: r.Val,
+ },
+ }
+ }
+ lg.Panic("detected disallowed v2 WAL for stage --v2-deprecation=write-only", zap.String("method", r.Method))
+ return pb.InternalRaftRequest{}
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/bootstrap.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/bootstrap.go
new file mode 100644
index 0000000..b76d979
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/bootstrap.go
@@ -0,0 +1,733 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/coreos/go-semver/semver"
+ "github.com/dustin/go-humanize"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/client/pkg/v3/fileutil"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/pkg/v3/pbutil"
+ "go.etcd.io/etcd/server/v3/config"
+ "go.etcd.io/etcd/server/v3/etcdserver/api"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2discovery"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery"
+ "go.etcd.io/etcd/server/v3/etcdserver/cindex"
+ servererrors "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ serverstorage "go.etcd.io/etcd/server/v3/storage"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/schema"
+ "go.etcd.io/etcd/server/v3/storage/wal"
+ "go.etcd.io/etcd/server/v3/storage/wal/walpb"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) {
+ if cfg.MaxRequestBytes > recommendedMaxRequestBytes {
+ cfg.Logger.Warn(
+ "exceeded recommended request limit",
+ zap.Uint("max-request-bytes", cfg.MaxRequestBytes),
+ zap.String("max-request-size", humanize.Bytes(uint64(cfg.MaxRequestBytes))),
+ zap.Int("recommended-request-bytes", recommendedMaxRequestBytes),
+ zap.String("recommended-request-size", recommendedMaxRequestBytesString),
+ )
+ }
+
+ if terr := fileutil.TouchDirAll(cfg.Logger, cfg.DataDir); terr != nil {
+ return nil, fmt.Errorf("cannot access data directory: %w", terr)
+ }
+
+ if terr := fileutil.TouchDirAll(cfg.Logger, cfg.MemberDir()); terr != nil {
+ return nil, fmt.Errorf("cannot access member directory: %w", terr)
+ }
+ ss := bootstrapSnapshot(cfg)
+ prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.PeerDialTimeout())
+ if err != nil {
+ return nil, err
+ }
+
+ haveWAL := wal.Exist(cfg.WALDir())
+ st := v2store.New(StoreClusterPrefix, StoreKeysPrefix)
+ backend, err := bootstrapBackend(cfg, haveWAL, st, ss)
+ if err != nil {
+ return nil, err
+ }
+ var bwal *bootstrappedWAL
+
+ if haveWAL {
+ if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
+ return nil, fmt.Errorf("cannot write to WAL directory: %w", err)
+ }
+ cfg.Logger.Info("Bootstrapping WAL from snapshot")
+ bwal = bootstrapWALFromSnapshot(cfg, backend.snapshot, backend.ci)
+ }
+
+ cfg.Logger.Info("bootstrapping cluster")
+ cluster, err := bootstrapCluster(cfg, bwal, prt)
+ if err != nil {
+ backend.Close()
+ return nil, err
+ }
+
+ cfg.Logger.Info("bootstrapping storage")
+ s := bootstrapStorage(cfg, st, backend, bwal, cluster)
+
+ if err = cluster.Finalize(cfg, s); err != nil {
+ backend.Close()
+ return nil, err
+ }
+
+ cfg.Logger.Info("bootstrapping raft")
+ raft := bootstrapRaft(cfg, cluster, s.wal)
+ return &bootstrappedServer{
+ prt: prt,
+ ss: ss,
+ storage: s,
+ cluster: cluster,
+ raft: raft,
+ }, nil
+}
+
+type bootstrappedServer struct {
+ storage *bootstrappedStorage
+ cluster *bootstrappedCluster
+ raft *bootstrappedRaft
+ prt http.RoundTripper
+ ss *snap.Snapshotter
+}
+
+func (s *bootstrappedServer) Close() {
+ s.storage.Close()
+}
+
+type bootstrappedStorage struct {
+ backend *bootstrappedBackend
+ wal *bootstrappedWAL
+ st v2store.Store
+}
+
+func (s *bootstrappedStorage) Close() {
+ s.backend.Close()
+}
+
+type bootstrappedBackend struct {
+ beHooks *serverstorage.BackendHooks
+ be backend.Backend
+ ci cindex.ConsistentIndexer
+ beExist bool
+ snapshot *raftpb.Snapshot
+}
+
+func (s *bootstrappedBackend) Close() {
+ s.be.Close()
+}
+
+type bootstrappedCluster struct {
+ remotes []*membership.Member
+ cl *membership.RaftCluster
+ nodeID types.ID
+}
+
+type bootstrappedRaft struct {
+ lg *zap.Logger
+ heartbeat time.Duration
+
+ peers []raft.Peer
+ config *raft.Config
+ storage *raft.MemoryStorage
+}
+
+func bootstrapStorage(cfg config.ServerConfig, st v2store.Store, be *bootstrappedBackend, wal *bootstrappedWAL, cl *bootstrappedCluster) *bootstrappedStorage {
+ if wal == nil {
+ wal = bootstrapNewWAL(cfg, cl)
+ }
+
+ return &bootstrappedStorage{
+ backend: be,
+ st: st,
+ wal: wal,
+ }
+}
+
+func bootstrapSnapshot(cfg config.ServerConfig) *snap.Snapshotter {
+ if err := fileutil.TouchDirAll(cfg.Logger, cfg.SnapDir()); err != nil {
+ cfg.Logger.Fatal(
+ "failed to create snapshot directory",
+ zap.String("path", cfg.SnapDir()),
+ zap.Error(err),
+ )
+ }
+
+ if err := fileutil.RemoveMatchFile(cfg.Logger, cfg.SnapDir(), func(fileName string) bool {
+ return strings.HasPrefix(fileName, "tmp")
+ }); err != nil {
+ cfg.Logger.Error(
+ "failed to remove temp file(s) in snapshot directory",
+ zap.String("path", cfg.SnapDir()),
+ zap.Error(err),
+ )
+ }
+ return snap.New(cfg.Logger, cfg.SnapDir())
+}
+
+func bootstrapBackend(cfg config.ServerConfig, haveWAL bool, st v2store.Store, ss *snap.Snapshotter) (backend *bootstrappedBackend, err error) {
+ beExist := fileutil.Exist(cfg.BackendPath())
+ ci := cindex.NewConsistentIndex(nil)
+ beHooks := serverstorage.NewBackendHooks(cfg.Logger, ci)
+ be := serverstorage.OpenBackend(cfg, beHooks)
+ defer func() {
+ if err != nil && be != nil {
+ be.Close()
+ }
+ }()
+ ci.SetBackend(be)
+ schema.CreateMetaBucket(be.BatchTx())
+ if cfg.BootstrapDefragThresholdMegabytes != 0 {
+ err = maybeDefragBackend(cfg, be)
+ if err != nil {
+ return nil, err
+ }
+ }
+ cfg.Logger.Info("restore consistentIndex", zap.Uint64("index", ci.ConsistentIndex()))
+
+ // TODO(serathius): Implement schema setup in fresh storage
+ var snapshot *raftpb.Snapshot
+ if haveWAL {
+ snapshot, be, err = recoverSnapshot(cfg, st, be, beExist, beHooks, ci, ss)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if beExist {
+ s1, s2 := be.Size(), be.SizeInUse()
+ cfg.Logger.Info(
+ "recovered v3 backend",
+ zap.Int64("backend-size-bytes", s1),
+ zap.String("backend-size", humanize.Bytes(uint64(s1))),
+ zap.Int64("backend-size-in-use-bytes", s2),
+ zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))),
+ )
+ if err = schema.Validate(cfg.Logger, be.ReadTx()); err != nil {
+ cfg.Logger.Error("Failed to validate schema", zap.Error(err))
+ return nil, err
+ }
+ }
+
+ return &bootstrappedBackend{
+ beHooks: beHooks,
+ be: be,
+ ci: ci,
+ beExist: beExist,
+ snapshot: snapshot,
+ }, nil
+}
+
+func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error {
+ size := be.Size()
+ sizeInUse := be.SizeInUse()
+ freeableMemory := uint(size - sizeInUse)
+ thresholdBytes := cfg.BootstrapDefragThresholdMegabytes * 1024 * 1024
+ if freeableMemory < thresholdBytes {
+ cfg.Logger.Info("Skipping defragmentation",
+ zap.Int64("current-db-size-bytes", size),
+ zap.String("current-db-size", humanize.Bytes(uint64(size))),
+ zap.Int64("current-db-size-in-use-bytes", sizeInUse),
+ zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse))),
+ zap.Uint("experimental-bootstrap-defrag-threshold-bytes", thresholdBytes),
+ zap.String("experimental-bootstrap-defrag-threshold", humanize.Bytes(uint64(thresholdBytes))),
+ )
+ return nil
+ }
+ return be.Defrag()
+}
+
+func bootstrapCluster(cfg config.ServerConfig, bwal *bootstrappedWAL, prt http.RoundTripper) (c *bootstrappedCluster, err error) {
+ switch {
+ case bwal == nil && !cfg.NewCluster:
+ c, err = bootstrapExistingClusterNoWAL(cfg, prt)
+ case bwal == nil && cfg.NewCluster:
+ c, err = bootstrapNewClusterNoWAL(cfg, prt)
+ case bwal != nil && bwal.haveWAL:
+ c, err = bootstrapClusterWithWAL(cfg, bwal.meta)
+ default:
+ return nil, fmt.Errorf("unsupported bootstrap config")
+ }
+ if err != nil {
+ return nil, err
+ }
+ return c, nil
+}
+
+func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (*bootstrappedCluster, error) {
+ if err := cfg.VerifyJoinExisting(); err != nil {
+ return nil, err
+ }
+ cl, err := membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, cfg.InitialPeerURLsMap, membership.WithMaxLearners(cfg.MaxLearners))
+ if err != nil {
+ return nil, err
+ }
+ existingCluster, gerr := GetClusterFromRemotePeers(cfg.Logger, getRemotePeerURLs(cl, cfg.Name), prt)
+ if gerr != nil {
+ return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %w", gerr)
+ }
+ if err := membership.ValidateClusterAndAssignIDs(cfg.Logger, cl, existingCluster); err != nil {
+ return nil, fmt.Errorf("error validating peerURLs %s: %w", existingCluster, err)
+ }
+ if !isCompatibleWithCluster(cfg.Logger, cl, cl.MemberByName(cfg.Name).ID, prt, cfg.ReqTimeout()) {
+ return nil, fmt.Errorf("incompatible with current running cluster")
+ }
+ scaleUpLearners := false
+ if err := membership.ValidateMaxLearnerConfig(cfg.MaxLearners, existingCluster.Members(), scaleUpLearners); err != nil {
+ return nil, err
+ }
+ remotes := existingCluster.Members()
+ cl.SetID(types.ID(0), existingCluster.ID())
+ member := cl.MemberByName(cfg.Name)
+ return &bootstrappedCluster{
+ remotes: remotes,
+ cl: cl,
+ nodeID: member.ID,
+ }, nil
+}
+
+func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (*bootstrappedCluster, error) {
+ if err := cfg.VerifyBootstrap(); err != nil {
+ return nil, err
+ }
+ cl, err := membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, cfg.InitialPeerURLsMap, membership.WithMaxLearners(cfg.MaxLearners))
+ if err != nil {
+ return nil, err
+ }
+ m := cl.MemberByName(cfg.Name)
+ if isMemberBootstrapped(cfg.Logger, cl, cfg.Name, prt, cfg.BootstrapTimeoutEffective()) {
+ return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
+ }
+ if cfg.ShouldDiscover() {
+ var str string
+ if cfg.DiscoveryURL != "" {
+ cfg.Logger.Warn("V2 discovery is deprecated!")
+ str, err = v2discovery.JoinCluster(cfg.Logger, cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
+ } else {
+ cfg.Logger.Info("Bootstrapping cluster using v3 discovery.")
+ str, err = v3discovery.JoinCluster(cfg.Logger, &cfg.DiscoveryCfg, m.ID, cfg.InitialPeerURLsMap.String())
+ }
+ if err != nil {
+ return nil, &servererrors.DiscoveryError{Op: "join", Err: err}
+ }
+ var urlsmap types.URLsMap
+ urlsmap, err = types.NewURLsMap(str)
+ if err != nil {
+ return nil, err
+ }
+ if config.CheckDuplicateURL(urlsmap) {
+ return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
+ }
+ if cl, err = membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, urlsmap, membership.WithMaxLearners(cfg.MaxLearners)); err != nil {
+ return nil, err
+ }
+ }
+ return &bootstrappedCluster{
+ remotes: nil,
+ cl: cl,
+ nodeID: m.ID,
+ }, nil
+}
+
+func bootstrapClusterWithWAL(cfg config.ServerConfig, meta *snapshotMetadata) (*bootstrappedCluster, error) {
+ if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
+ return nil, fmt.Errorf("cannot write to member directory: %w", err)
+ }
+
+ if cfg.ShouldDiscover() {
+ cfg.Logger.Warn(
+ "discovery token is ignored since cluster already initialized; valid logs are found",
+ zap.String("wal-dir", cfg.WALDir()),
+ )
+ }
+ cl := membership.NewCluster(cfg.Logger, membership.WithMaxLearners(cfg.MaxLearners))
+
+ scaleUpLearners := false
+ if err := membership.ValidateMaxLearnerConfig(cfg.MaxLearners, cl.Members(), scaleUpLearners); err != nil {
+ return nil, err
+ }
+
+ cl.SetID(meta.nodeID, meta.clusterID)
+ return &bootstrappedCluster{
+ cl: cl,
+ nodeID: meta.nodeID,
+ }, nil
+}
+
+func recoverSnapshot(cfg config.ServerConfig, st v2store.Store, be backend.Backend, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer, ss *snap.Snapshotter) (*raftpb.Snapshot, backend.Backend, error) {
+ // Find a snapshot to start/restart a raft node
+ walSnaps, err := wal.ValidSnapshotEntries(cfg.Logger, cfg.WALDir())
+ if err != nil {
+ return nil, be, err
+ }
+ // snapshot files can be orphaned if etcd crashes after writing them but before writing the corresponding
+ // bwal log entries
+ snapshot, err := ss.LoadNewestAvailable(walSnaps)
+ if err != nil && !errors.Is(err, snap.ErrNoSnapshot) {
+ return nil, be, err
+ }
+
+ if snapshot != nil {
+ if err = st.Recovery(snapshot.Data); err != nil {
+ cfg.Logger.Panic("failed to recover from snapshot", zap.Error(err))
+ }
+
+ if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, st, cfg.V2Deprecation); err != nil {
+ cfg.Logger.Error("illegal v2store content", zap.Error(err))
+ return nil, be, err
+ }
+
+ cfg.Logger.Info(
+ "recovered v2 store from snapshot",
+ zap.Uint64("snapshot-index", snapshot.Metadata.Index),
+ zap.String("snapshot-size", humanize.Bytes(uint64(snapshot.Size()))),
+ )
+
+ if be, err = serverstorage.RecoverSnapshotBackend(cfg, be, *snapshot, beExist, beHooks); err != nil {
+ cfg.Logger.Panic("failed to recover v3 backend from snapshot", zap.Error(err))
+ }
+ // A snapshot db may have already been recovered, and the old db should have
+ // already been closed in this case, so we should set the backend again.
+ ci.SetBackend(be)
+
+ if beExist {
+ // TODO: remove kvindex != 0 checking when we do not expect users to upgrade
+ // etcd from pre-3.0 release.
+ kvindex := ci.ConsistentIndex()
+ if kvindex < snapshot.Metadata.Index {
+ if kvindex != 0 {
+ return nil, be, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d)", cfg.BackendPath(), kvindex, snapshot.Metadata.Index)
+ }
+ cfg.Logger.Warn(
+ "consistent index was never saved",
+ zap.Uint64("snapshot-index", snapshot.Metadata.Index),
+ )
+ }
+ }
+ } else {
+ cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!")
+ }
+ return snapshot, be, nil
+}
+
+func (c *bootstrappedCluster) Finalize(cfg config.ServerConfig, s *bootstrappedStorage) error {
+ if !s.wal.haveWAL {
+ c.cl.SetID(c.nodeID, c.cl.ID())
+ }
+ c.cl.SetStore(s.st)
+ c.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, s.backend.be))
+
+ // Workaround the issues which have already been affected
+ // by https://github.com/etcd-io/etcd/issues/19557.
+ c.cl.SyncLearnerPromotionIfNeeded()
+
+ if s.wal.haveWAL {
+ c.cl.Recover(api.UpdateCapability)
+ if c.databaseFileMissing(s) {
+ bepath := cfg.BackendPath()
+ os.RemoveAll(bepath)
+ return fmt.Errorf("database file (%v) of the backend is missing", bepath)
+ }
+ }
+ scaleUpLearners := false
+ return membership.ValidateMaxLearnerConfig(cfg.MaxLearners, c.cl.Members(), scaleUpLearners)
+}
+
+func (c *bootstrappedCluster) databaseFileMissing(s *bootstrappedStorage) bool {
+ v3Cluster := c.cl.Version() != nil && !c.cl.Version().LessThan(semver.Version{Major: 3})
+ return v3Cluster && !s.backend.beExist
+}
+
+func bootstrapRaft(cfg config.ServerConfig, cluster *bootstrappedCluster, bwal *bootstrappedWAL) *bootstrappedRaft {
+ switch {
+ case !bwal.haveWAL && !cfg.NewCluster:
+ return bootstrapRaftFromCluster(cfg, cluster.cl, nil, bwal)
+ case !bwal.haveWAL && cfg.NewCluster:
+ return bootstrapRaftFromCluster(cfg, cluster.cl, cluster.cl.MemberIDs(), bwal)
+ case bwal.haveWAL:
+ return bootstrapRaftFromWAL(cfg, bwal)
+ default:
+ cfg.Logger.Panic("unsupported bootstrap config")
+ return nil
+ }
+}
+
+func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID, bwal *bootstrappedWAL) *bootstrappedRaft {
+ member := cl.MemberByName(cfg.Name)
+ peers := make([]raft.Peer, len(ids))
+ for i, id := range ids {
+ var ctx []byte
+ ctx, err := json.Marshal((*cl).Member(id))
+ if err != nil {
+ cfg.Logger.Panic("failed to marshal member", zap.Error(err))
+ }
+ peers[i] = raft.Peer{ID: uint64(id), Context: ctx}
+ }
+ cfg.Logger.Info(
+ "starting local member",
+ zap.String("local-member-id", member.ID.String()),
+ zap.String("cluster-id", cl.ID().String()),
+ )
+ s := bwal.MemoryStorage()
+ return &bootstrappedRaft{
+ lg: cfg.Logger,
+ heartbeat: time.Duration(cfg.TickMs) * time.Millisecond,
+ config: raftConfig(cfg, uint64(member.ID), s),
+ peers: peers,
+ storage: s,
+ }
+}
+
+func bootstrapRaftFromWAL(cfg config.ServerConfig, bwal *bootstrappedWAL) *bootstrappedRaft {
+ s := bwal.MemoryStorage()
+ return &bootstrappedRaft{
+ lg: cfg.Logger,
+ heartbeat: time.Duration(cfg.TickMs) * time.Millisecond,
+ config: raftConfig(cfg, uint64(bwal.meta.nodeID), s),
+ storage: s,
+ }
+}
+
+func raftConfig(cfg config.ServerConfig, id uint64, s *raft.MemoryStorage) *raft.Config {
+ return &raft.Config{
+ ID: id,
+ ElectionTick: cfg.ElectionTicks,
+ HeartbeatTick: 1,
+ Storage: s,
+ MaxSizePerMsg: maxSizePerMsg,
+ MaxInflightMsgs: maxInflightMsgs,
+ CheckQuorum: true,
+ PreVote: cfg.PreVote,
+ Logger: NewRaftLoggerZap(cfg.Logger.Named("raft")),
+ }
+}
+
+func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL, cl *membership.RaftCluster) *raftNode {
+ var n raft.Node
+ if len(b.peers) == 0 {
+ n = raft.RestartNode(b.config)
+ } else {
+ n = raft.StartNode(b.config, b.peers)
+ }
+ raftStatusMu.Lock()
+ raftStatus = n.Status
+ raftStatusMu.Unlock()
+ return newRaftNode(
+ raftNodeConfig{
+ lg: b.lg,
+ isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
+ Node: n,
+ heartbeat: b.heartbeat,
+ raftStorage: b.storage,
+ storage: serverstorage.NewStorage(b.lg, wal, ss),
+ },
+ )
+}
+
+func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot, ci cindex.ConsistentIndexer) *bootstrappedWAL {
+ wal, st, ents, snap, meta := openWALFromSnapshot(cfg, snapshot)
+ bwal := &bootstrappedWAL{
+ lg: cfg.Logger,
+ w: wal,
+ st: st,
+ ents: ents,
+ snapshot: snap,
+ meta: meta,
+ haveWAL: true,
+ }
+
+ if cfg.ForceNewCluster {
+ consistentIndex := ci.ConsistentIndex()
+ oldCommitIndex := bwal.st.Commit
+ // If only `HardState.Commit` increases, HardState won't be persisted
+ // to disk, even though the committed entries might have already been
+ // applied. This can result in consistent_index > CommitIndex.
+ //
+ // When restarting etcd with `--force-new-cluster`, all uncommitted
+ // entries are dropped. To avoid losing entries that were actually
+ // committed, we reset Commit to max(HardState.Commit, consistent_index).
+ //
+ // See: https://github.com/etcd-io/raft/pull/300 for more details.
+ bwal.st.Commit = max(oldCommitIndex, consistentIndex)
+
+ // discard the previously uncommitted entries
+ bwal.ents = bwal.CommitedEntries()
+ entries := bwal.NewConfigChangeEntries()
+ // force commit config change entries
+ bwal.AppendAndCommitEntries(entries)
+ cfg.Logger.Info(
+ "forcing restart member",
+ zap.String("cluster-id", meta.clusterID.String()),
+ zap.String("local-member-id", meta.nodeID.String()),
+ zap.Uint64("wal-commit-index", oldCommitIndex),
+ zap.Uint64("commit-index", bwal.st.Commit),
+ )
+ } else {
+ cfg.Logger.Info(
+ "restarting local member",
+ zap.String("cluster-id", meta.clusterID.String()),
+ zap.String("local-member-id", meta.nodeID.String()),
+ zap.Uint64("commit-index", bwal.st.Commit),
+ )
+ }
+ return bwal
+}
+
+// openWALFromSnapshot reads the WAL at the given snap and returns the wal, its latest HardState and cluster ID, and all entries that appear
+// after the position of the given snap in the WAL.
+// The snap must have been previously saved to the WAL, or this call will panic.
+func openWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot) (*wal.WAL, *raftpb.HardState, []raftpb.Entry, *raftpb.Snapshot, *snapshotMetadata) {
+ var walsnap walpb.Snapshot
+ if snapshot != nil {
+ walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
+ }
+ repaired := false
+ for {
+ w, err := wal.Open(cfg.Logger, cfg.WALDir(), walsnap)
+ if err != nil {
+ cfg.Logger.Fatal("failed to open WAL", zap.Error(err))
+ }
+ if cfg.UnsafeNoFsync {
+ w.SetUnsafeNoFsync()
+ }
+ wmetadata, st, ents, err := w.ReadAll()
+ if err != nil {
+ w.Close()
+ // we can only repair ErrUnexpectedEOF and we never repair twice.
+ if repaired || !errors.Is(err, io.ErrUnexpectedEOF) {
+ cfg.Logger.Fatal("failed to read WAL, cannot be repaired", zap.Error(err))
+ }
+ if !wal.Repair(cfg.Logger, cfg.WALDir()) {
+ cfg.Logger.Fatal("failed to repair WAL", zap.Error(err))
+ } else {
+ cfg.Logger.Info("repaired WAL", zap.Error(err))
+ repaired = true
+ }
+ continue
+ }
+ var metadata etcdserverpb.Metadata
+ pbutil.MustUnmarshal(&metadata, wmetadata)
+ id := types.ID(metadata.NodeID)
+ cid := types.ID(metadata.ClusterID)
+ meta := &snapshotMetadata{clusterID: cid, nodeID: id}
+ return w, &st, ents, snapshot, meta
+ }
+}
+
+type snapshotMetadata struct {
+ nodeID, clusterID types.ID
+}
+
+func bootstrapNewWAL(cfg config.ServerConfig, cl *bootstrappedCluster) *bootstrappedWAL {
+ metadata := pbutil.MustMarshal(
+ &etcdserverpb.Metadata{
+ NodeID: uint64(cl.nodeID),
+ ClusterID: uint64(cl.cl.ID()),
+ },
+ )
+ w, err := wal.Create(cfg.Logger, cfg.WALDir(), metadata)
+ if err != nil {
+ cfg.Logger.Panic("failed to create WAL", zap.Error(err))
+ }
+ if cfg.UnsafeNoFsync {
+ w.SetUnsafeNoFsync()
+ }
+ return &bootstrappedWAL{
+ lg: cfg.Logger,
+ w: w,
+ }
+}
+
+type bootstrappedWAL struct {
+ lg *zap.Logger
+
+ haveWAL bool
+ w *wal.WAL
+ st *raftpb.HardState
+ ents []raftpb.Entry
+ snapshot *raftpb.Snapshot
+ meta *snapshotMetadata
+}
+
+func (wal *bootstrappedWAL) MemoryStorage() *raft.MemoryStorage {
+ s := raft.NewMemoryStorage()
+ if wal.snapshot != nil {
+ s.ApplySnapshot(*wal.snapshot)
+ }
+ if wal.st != nil {
+ s.SetHardState(*wal.st)
+ }
+ if len(wal.ents) != 0 {
+ s.Append(wal.ents)
+ }
+ return s
+}
+
+func (wal *bootstrappedWAL) CommitedEntries() []raftpb.Entry {
+ for i, ent := range wal.ents {
+ if ent.Index > wal.st.Commit {
+ wal.lg.Info(
+ "discarding uncommitted WAL entries",
+ zap.Uint64("entry-index", ent.Index),
+ zap.Uint64("commit-index-from-wal", wal.st.Commit),
+ zap.Int("number-of-discarded-entries", len(wal.ents)-i),
+ )
+ return wal.ents[:i]
+ }
+ }
+ return wal.ents
+}
+
+func (wal *bootstrappedWAL) NewConfigChangeEntries() []raftpb.Entry {
+ return serverstorage.CreateConfigChangeEnts(
+ wal.lg,
+ serverstorage.GetEffectiveNodeIDsFromWALEntries(wal.lg, wal.snapshot, wal.ents),
+ uint64(wal.meta.nodeID),
+ wal.st.Term,
+ wal.st.Commit,
+ )
+}
+
+func (wal *bootstrappedWAL) AppendAndCommitEntries(ents []raftpb.Entry) {
+ wal.ents = append(wal.ents, ents...)
+ err := wal.w.Save(raftpb.HardState{}, ents)
+ if err != nil {
+ wal.lg.Fatal("failed to save hard state and entries", zap.Error(err))
+ }
+ if len(wal.ents) != 0 {
+ wal.st.Commit = wal.ents[len(wal.ents)-1].Index
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/cindex/cindex.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/cindex/cindex.go
new file mode 100644
index 0000000..b865742
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/cindex/cindex.go
@@ -0,0 +1,178 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cindex
+
+import (
+ "sync"
+ "sync/atomic"
+
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/schema"
+)
+
+type Backend interface {
+ ReadTx() backend.ReadTx
+}
+
+// ConsistentIndexer is an interface that wraps the Get/Set/Save method for consistentIndex.
+type ConsistentIndexer interface {
+ // ConsistentIndex returns the consistent index of current executing entry.
+ ConsistentIndex() uint64
+
+ // ConsistentApplyingIndex returns the consistent applying index of current executing entry.
+ ConsistentApplyingIndex() (uint64, uint64)
+
+ // UnsafeConsistentIndex is similar to ConsistentIndex, but it doesn't lock the transaction.
+ UnsafeConsistentIndex() uint64
+
+ // SetConsistentIndex set the consistent index of current executing entry.
+ SetConsistentIndex(v uint64, term uint64)
+
+ // SetConsistentApplyingIndex set the consistent applying index of current executing entry.
+ SetConsistentApplyingIndex(v uint64, term uint64)
+
+ // UnsafeSave must be called holding the lock on the tx.
+ // It saves consistentIndex to the underlying stable storage.
+ UnsafeSave(tx backend.UnsafeReadWriter)
+
+ // SetBackend set the available backend.BatchTx for ConsistentIndexer.
+ SetBackend(be Backend)
+}
+
+// consistentIndex implements the ConsistentIndexer interface.
+type consistentIndex struct {
+ // consistentIndex represents the offset of an entry in a consistent replica log.
+ // It caches the "consistent_index" key's value.
+ // Accessed through atomics so must be 64-bit aligned.
+ consistentIndex uint64
+ // term represents the RAFT term of committed entry in a consistent replica log.
+ // Accessed through atomics so must be 64-bit aligned.
+ // The value is being persisted in the backend since v3.5.
+ term uint64
+
+ // applyingIndex and applyingTerm are just temporary cache of the raftpb.Entry.Index
+ // and raftpb.Entry.Term, and they are not ready to be persisted yet. They will be
+ // saved to consistentIndex and term above in the txPostLockInsideApplyHook.
+ //
+ // TODO(ahrtr): try to remove the OnPreCommitUnsafe, and compare the
+ // performance difference. Afterwards we can make a decision on whether
+ // or not we should remove OnPreCommitUnsafe. If it is true, then we
+ // can remove applyingIndex and applyingTerm, and save the e.Index and
+ // e.Term to consistentIndex and term directly in applyEntries, and
+ // persist them into db in the txPostLockInsideApplyHook.
+ applyingIndex uint64
+ applyingTerm uint64
+
+ // be is used for initial read consistentIndex
+ be Backend
+ // mutex is protecting be.
+ mutex sync.Mutex
+}
+
+// NewConsistentIndex creates a new consistent index.
+// If `be` is nil, it must be set (SetBackend) before first access using `ConsistentIndex()`.
+func NewConsistentIndex(be Backend) ConsistentIndexer {
+ return &consistentIndex{be: be}
+}
+
+func (ci *consistentIndex) ConsistentIndex() uint64 {
+ if index := atomic.LoadUint64(&ci.consistentIndex); index > 0 {
+ return index
+ }
+ ci.mutex.Lock()
+ defer ci.mutex.Unlock()
+
+ v, term := schema.ReadConsistentIndex(ci.be.ReadTx())
+ ci.SetConsistentIndex(v, term)
+ return v
+}
+
+func (ci *consistentIndex) UnsafeConsistentIndex() uint64 {
+ if index := atomic.LoadUint64(&ci.consistentIndex); index > 0 {
+ return index
+ }
+
+ v, term := schema.UnsafeReadConsistentIndex(ci.be.ReadTx())
+ ci.SetConsistentIndex(v, term)
+ return v
+}
+
+func (ci *consistentIndex) SetConsistentIndex(v uint64, term uint64) {
+ atomic.StoreUint64(&ci.consistentIndex, v)
+ atomic.StoreUint64(&ci.term, term)
+}
+
+func (ci *consistentIndex) UnsafeSave(tx backend.UnsafeReadWriter) {
+ index := atomic.LoadUint64(&ci.consistentIndex)
+ term := atomic.LoadUint64(&ci.term)
+ schema.UnsafeUpdateConsistentIndex(tx, index, term)
+}
+
+func (ci *consistentIndex) SetBackend(be Backend) {
+ ci.mutex.Lock()
+ defer ci.mutex.Unlock()
+ ci.be = be
+ // After the backend is changed, the first access should re-read it.
+ ci.SetConsistentIndex(0, 0)
+}
+
+func (ci *consistentIndex) ConsistentApplyingIndex() (uint64, uint64) {
+ return atomic.LoadUint64(&ci.applyingIndex), atomic.LoadUint64(&ci.applyingTerm)
+}
+
+func (ci *consistentIndex) SetConsistentApplyingIndex(v uint64, term uint64) {
+ atomic.StoreUint64(&ci.applyingIndex, v)
+ atomic.StoreUint64(&ci.applyingTerm, term)
+}
+
+func NewFakeConsistentIndex(index uint64) ConsistentIndexer {
+ return &fakeConsistentIndex{index: index}
+}
+
+type fakeConsistentIndex struct {
+ index uint64
+ term uint64
+}
+
+func (f *fakeConsistentIndex) ConsistentIndex() uint64 {
+ return atomic.LoadUint64(&f.index)
+}
+
+func (f *fakeConsistentIndex) ConsistentApplyingIndex() (uint64, uint64) {
+ return atomic.LoadUint64(&f.index), atomic.LoadUint64(&f.term)
+}
+
+func (f *fakeConsistentIndex) UnsafeConsistentIndex() uint64 {
+ return atomic.LoadUint64(&f.index)
+}
+
+func (f *fakeConsistentIndex) SetConsistentIndex(index uint64, term uint64) {
+ atomic.StoreUint64(&f.index, index)
+ atomic.StoreUint64(&f.term, term)
+}
+
+func (f *fakeConsistentIndex) SetConsistentApplyingIndex(index uint64, term uint64) {
+ atomic.StoreUint64(&f.index, index)
+ atomic.StoreUint64(&f.term, term)
+}
+
+func (f *fakeConsistentIndex) UnsafeSave(_ backend.UnsafeReadWriter) {}
+func (f *fakeConsistentIndex) SetBackend(_ Backend) {}
+
+func UpdateConsistentIndexForce(tx backend.BatchTx, index uint64, term uint64) {
+ tx.LockOutsideApply()
+ defer tx.Unlock()
+ schema.UnsafeUpdateConsistentIndexForce(tx, index, term)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/cindex/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/cindex/doc.go
new file mode 100644
index 0000000..7d3e4b7
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/cindex/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cindex provides an interface and implementation for getting/saving consistentIndex.
+package cindex
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/cluster_util.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/cluster_util.go
new file mode 100644
index 0000000..425ed97
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/cluster_util.go
@@ -0,0 +1,442 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+)
+
+// isMemberBootstrapped tries to check if the given member has been bootstrapped
+// in the given cluster.
+func isMemberBootstrapped(lg *zap.Logger, cl *membership.RaftCluster, member string, rt http.RoundTripper, timeout time.Duration) bool {
+ rcl, err := getClusterFromRemotePeers(lg, getRemotePeerURLs(cl, member), timeout, false, rt)
+ if err != nil {
+ return false
+ }
+ id := cl.MemberByName(member).ID
+ m := rcl.Member(id)
+ if m == nil {
+ return false
+ }
+ if len(m.ClientURLs) > 0 {
+ return true
+ }
+ return false
+}
+
+// GetClusterFromRemotePeers takes a set of URLs representing etcd peers, and
+// attempts to construct a Cluster by accessing the members endpoint on one of
+// these URLs. The first URL to provide a response is used. If no URLs provide
+// a response, or a Cluster cannot be successfully created from a received
+// response, an error is returned.
+// Each request has a 10-second timeout. Because the upper limit of TTL is 5s,
+// 10 second is enough for building connection and finishing request.
+func GetClusterFromRemotePeers(lg *zap.Logger, urls []string, rt http.RoundTripper) (*membership.RaftCluster, error) {
+ return getClusterFromRemotePeers(lg, urls, 10*time.Second, true, rt)
+}
+
+// If logerr is true, it prints out more error messages.
+func getClusterFromRemotePeers(lg *zap.Logger, urls []string, timeout time.Duration, logerr bool, rt http.RoundTripper) (*membership.RaftCluster, error) {
+ if lg == nil {
+ lg = zap.NewNop()
+ }
+ cc := &http.Client{
+ Transport: rt,
+ Timeout: timeout,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ },
+ }
+ for _, u := range urls {
+ addr := u + "/members"
+ resp, err := cc.Get(addr)
+ if err != nil {
+ if logerr {
+ lg.Warn("failed to get cluster response", zap.String("address", addr), zap.Error(err))
+ }
+ continue
+ }
+ b, err := io.ReadAll(resp.Body)
+ resp.Body.Close()
+ if err != nil {
+ if logerr {
+ lg.Warn("failed to read body of cluster response", zap.String("address", addr), zap.Error(err))
+ }
+ continue
+ }
+ var membs []*membership.Member
+ if err = json.Unmarshal(b, &membs); err != nil {
+ if logerr {
+ lg.Warn("failed to unmarshal cluster response", zap.String("address", addr), zap.Error(err))
+ }
+ continue
+ }
+ id, err := types.IDFromString(resp.Header.Get("X-Etcd-Cluster-ID"))
+ if err != nil {
+ if logerr {
+ lg.Warn(
+ "failed to parse cluster ID",
+ zap.String("address", addr),
+ zap.String("header", resp.Header.Get("X-Etcd-Cluster-ID")),
+ zap.Error(err),
+ )
+ }
+ continue
+ }
+
+ // check the length of membership members
+ // if the membership members are present then prepare and return raft cluster
+ // if membership members are not present then the raft cluster formed will be
+ // an invalid empty cluster hence return failed to get raft cluster member(s) from the given urls error
+ if len(membs) > 0 {
+ return membership.NewClusterFromMembers(lg, id, membs), nil
+ }
+ return nil, fmt.Errorf("failed to get raft cluster member(s) from the given URLs")
+ }
+ return nil, fmt.Errorf("could not retrieve cluster information from the given URLs")
+}
+
+// getRemotePeerURLs returns peer urls of remote members in the cluster. The
+// returned list is sorted in ascending lexicographical order.
+func getRemotePeerURLs(cl *membership.RaftCluster, local string) []string {
+ us := make([]string, 0)
+ for _, m := range cl.Members() {
+ if m.Name == local {
+ continue
+ }
+ us = append(us, m.PeerURLs...)
+ }
+ sort.Strings(us)
+ return us
+}
+
+// getMembersVersions returns the versions of the members in the given cluster.
+// The key of the returned map is the member's ID. The value of the returned map
+// is the semver versions string, including server and cluster.
+// If it fails to get the version of a member, the key will be nil.
+func getMembersVersions(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper, timeout time.Duration) map[string]*version.Versions {
+ members := cl.Members()
+ vers := make(map[string]*version.Versions)
+ for _, m := range members {
+ if m.ID == local {
+ cv := "not_decided"
+ if cl.Version() != nil {
+ cv = cl.Version().String()
+ }
+ vers[m.ID.String()] = &version.Versions{Server: version.Version, Cluster: cv}
+ continue
+ }
+ ver, err := getVersion(lg, m, rt, timeout)
+ if err != nil {
+ lg.Warn("failed to get version", zap.String("remote-member-id", m.ID.String()), zap.Error(err))
+ vers[m.ID.String()] = nil
+ } else {
+ vers[m.ID.String()] = ver
+ }
+ }
+ return vers
+}
+
+// allowedVersionRange decides the available version range of the cluster that local server can join in;
+// if the downgrade enabled status is true, the version window is [oneMinorHigher, oneMinorHigher]
+// if the downgrade is not enabled, the version window is [MinClusterVersion, localVersion]
+func allowedVersionRange(downgradeEnabled bool) (minV *semver.Version, maxV *semver.Version) {
+ minV = semver.Must(semver.NewVersion(version.MinClusterVersion))
+ maxV = semver.Must(semver.NewVersion(version.Version))
+ maxV = &semver.Version{Major: maxV.Major, Minor: maxV.Minor}
+
+ if downgradeEnabled {
+ // Todo: handle the case that downgrading from higher major version(e.g. downgrade from v4.0 to v3.x)
+ maxV.Minor = maxV.Minor + 1
+ minV = &semver.Version{Major: maxV.Major, Minor: maxV.Minor}
+ }
+ return minV, maxV
+}
+
+// isCompatibleWithCluster return true if the local member has a compatible version with
+// the current running cluster.
+// The version is considered as compatible when at least one of the other members in the cluster has a
+// cluster version in the range of [MinV, MaxV] and no known members has a cluster version
+// out of the range.
+// We set this rule since when the local member joins, another member might be offline.
+func isCompatibleWithCluster(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper, timeout time.Duration) bool {
+ vers := getMembersVersions(lg, cl, local, rt, timeout)
+ minV, maxV := allowedVersionRange(getDowngradeEnabledFromRemotePeers(lg, cl, local, rt, timeout))
+ return isCompatibleWithVers(lg, vers, local, minV, maxV)
+}
+
+func isCompatibleWithVers(lg *zap.Logger, vers map[string]*version.Versions, local types.ID, minV, maxV *semver.Version) bool {
+ var ok bool
+ for id, v := range vers {
+ // ignore comparison with local version
+ if id == local.String() {
+ continue
+ }
+ if v == nil {
+ continue
+ }
+ clusterv, err := semver.NewVersion(v.Cluster)
+ if err != nil {
+ lg.Warn(
+ "failed to parse cluster version of remote member",
+ zap.String("remote-member-id", id),
+ zap.String("remote-member-cluster-version", v.Cluster),
+ zap.Error(err),
+ )
+ continue
+ }
+ if clusterv.LessThan(*minV) {
+ lg.Warn(
+ "cluster version of remote member is not compatible; too low",
+ zap.String("remote-member-id", id),
+ zap.String("remote-member-cluster-version", clusterv.String()),
+ zap.String("minimum-cluster-version-supported", minV.String()),
+ )
+ return false
+ }
+ if maxV.LessThan(*clusterv) {
+ lg.Warn(
+ "cluster version of remote member is not compatible; too high",
+ zap.String("remote-member-id", id),
+ zap.String("remote-member-cluster-version", clusterv.String()),
+ zap.String("maximum-cluster-version-supported", maxV.String()),
+ )
+ return false
+ }
+ ok = true
+ }
+ return ok
+}
+
+// getVersion returns the Versions of the given member via its
+// peerURLs. Returns the last error if it fails to get the version.
+func getVersion(lg *zap.Logger, m *membership.Member, rt http.RoundTripper, timeout time.Duration) (*version.Versions, error) {
+ cc := &http.Client{
+ Transport: rt,
+ Timeout: timeout,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ },
+ }
+ var (
+ err error
+ resp *http.Response
+ )
+
+ for _, u := range m.PeerURLs {
+ addr := u + "/version"
+ resp, err = cc.Get(addr)
+ if err != nil {
+ lg.Warn(
+ "failed to reach the peer URL",
+ zap.String("address", addr),
+ zap.String("remote-member-id", m.ID.String()),
+ zap.Error(err),
+ )
+ continue
+ }
+ var b []byte
+ b, err = io.ReadAll(resp.Body)
+ resp.Body.Close()
+ if err != nil {
+ lg.Warn(
+ "failed to read body of response",
+ zap.String("address", addr),
+ zap.String("remote-member-id", m.ID.String()),
+ zap.Error(err),
+ )
+ continue
+ }
+ var vers version.Versions
+ if err = json.Unmarshal(b, &vers); err != nil {
+ lg.Warn(
+ "failed to unmarshal response",
+ zap.String("address", addr),
+ zap.String("remote-member-id", m.ID.String()),
+ zap.Error(err),
+ )
+ continue
+ }
+ return &vers, nil
+ }
+ return nil, err
+}
+
+func promoteMemberHTTP(ctx context.Context, url string, id uint64, peerRt http.RoundTripper) ([]*membership.Member, error) {
+ cc := &http.Client{
+ Transport: peerRt,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ },
+ }
+ // TODO: refactor member http handler code
+ // cannot import etcdhttp, so manually construct url
+ requestURL := url + "/members/promote/" + fmt.Sprintf("%d", id)
+ req, err := http.NewRequest(http.MethodPost, requestURL, nil)
+ if err != nil {
+ return nil, err
+ }
+ req = req.WithContext(ctx)
+ resp, err := cc.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+ b, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ if resp.StatusCode == http.StatusRequestTimeout {
+ return nil, errors.ErrTimeout
+ }
+ if resp.StatusCode == http.StatusPreconditionFailed {
+ // both ErrMemberNotLearner and ErrLearnerNotReady have same http status code
+ if strings.Contains(string(b), errors.ErrLearnerNotReady.Error()) {
+ return nil, errors.ErrLearnerNotReady
+ }
+ if strings.Contains(string(b), membership.ErrMemberNotLearner.Error()) {
+ return nil, membership.ErrMemberNotLearner
+ }
+ return nil, fmt.Errorf("member promote: unknown error(%s)", b)
+ }
+ if resp.StatusCode == http.StatusNotFound {
+ return nil, membership.ErrIDNotFound
+ }
+
+ if resp.StatusCode != http.StatusOK { // all other types of errors
+ return nil, fmt.Errorf("member promote: unknown error(%s)", b)
+ }
+
+ var membs []*membership.Member
+ if err := json.Unmarshal(b, &membs); err != nil {
+ return nil, err
+ }
+ return membs, nil
+}
+
+// getDowngradeEnabledFromRemotePeers will get the downgrade enabled status of the cluster.
+func getDowngradeEnabledFromRemotePeers(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper, timeout time.Duration) bool {
+ members := cl.Members()
+
+ for _, m := range members {
+ if m.ID == local {
+ continue
+ }
+ enable, err := getDowngradeEnabled(lg, m, rt, timeout)
+ if err == nil {
+ // Since the "/downgrade/enabled" serves linearized data,
+ // this function can return once it gets a non-error response from the endpoint.
+ return enable
+ }
+ lg.Warn("failed to get downgrade enabled status", zap.String("remote-member-id", m.ID.String()), zap.Error(err))
+ }
+ return false
+}
+
+// getDowngradeEnabled returns the downgrade enabled status of the given member
+// via its peerURLs. Returns the last error if it fails to get it.
+func getDowngradeEnabled(lg *zap.Logger, m *membership.Member, rt http.RoundTripper, timeout time.Duration) (bool, error) {
+ cc := &http.Client{
+ Transport: rt,
+ Timeout: timeout,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ },
+ }
+ var (
+ err error
+ resp *http.Response
+ )
+
+ for _, u := range m.PeerURLs {
+ addr := u + DowngradeEnabledPath
+ resp, err = cc.Get(addr)
+ if err != nil {
+ lg.Warn(
+ "failed to reach the peer URL",
+ zap.String("address", addr),
+ zap.String("remote-member-id", m.ID.String()),
+ zap.Error(err),
+ )
+ continue
+ }
+ var b []byte
+ b, err = io.ReadAll(resp.Body)
+ resp.Body.Close()
+ if err != nil {
+ lg.Warn(
+ "failed to read body of response",
+ zap.String("address", addr),
+ zap.String("remote-member-id", m.ID.String()),
+ zap.Error(err),
+ )
+ continue
+ }
+ var enable bool
+ if enable, err = strconv.ParseBool(string(b)); err != nil {
+ lg.Warn(
+ "failed to convert response",
+ zap.String("address", addr),
+ zap.String("remote-member-id", m.ID.String()),
+ zap.Error(err),
+ )
+ continue
+ }
+ return enable, nil
+ }
+ return false, err
+}
+
+func convertToClusterVersion(v string) (*semver.Version, error) {
+ ver, err := semver.NewVersion(v)
+ if err != nil {
+ // allow input version format Major.Minor
+ ver, err = semver.NewVersion(v + ".0")
+ if err != nil {
+ return nil, errors.ErrWrongDowngradeVersionFormat
+ }
+ }
+ // cluster version only keeps major.minor, remove patch version
+ ver = &semver.Version{Major: ver.Major, Minor: ver.Minor}
+ return ver, nil
+}
+
+func GetMembershipInfoInV2Format(lg *zap.Logger, cl *membership.RaftCluster) []byte {
+ st := v2store.New(StoreClusterPrefix, StoreKeysPrefix)
+ cl.Store(st)
+ d, err := st.SaveNoCopy()
+ if err != nil {
+ lg.Panic("failed to save v2 store", zap.Error(err))
+ }
+ return d
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/corrupt.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/corrupt.go
new file mode 100644
index 0000000..5ec111b
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/corrupt.go
@@ -0,0 +1,626 @@
+// Copyright 2017 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "sort"
+ "strings"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+type CorruptionChecker interface {
+ InitialCheck() error
+ PeriodicCheck() error
+ CompactHashCheck()
+}
+
+type corruptionChecker struct {
+ lg *zap.Logger
+
+ hasher Hasher
+
+ mux sync.RWMutex
+ latestRevisionChecked int64
+}
+
+type Hasher interface {
+ mvcc.HashStorage
+ ReqTimeout() time.Duration
+ MemberID() types.ID
+ PeerHashByRev(int64) []*peerHashKVResp
+ LinearizableReadNotify(context.Context) error
+ TriggerCorruptAlarm(types.ID)
+}
+
+func newCorruptionChecker(lg *zap.Logger, s *EtcdServer, storage mvcc.HashStorage) *corruptionChecker {
+ return &corruptionChecker{
+ lg: lg,
+ hasher: hasherAdapter{s, storage},
+ }
+}
+
+type hasherAdapter struct {
+ *EtcdServer
+ mvcc.HashStorage
+}
+
+func (h hasherAdapter) ReqTimeout() time.Duration {
+ return h.EtcdServer.Cfg.ReqTimeout()
+}
+
+func (h hasherAdapter) PeerHashByRev(rev int64) []*peerHashKVResp {
+ return h.EtcdServer.getPeerHashKVs(rev)
+}
+
+func (h hasherAdapter) TriggerCorruptAlarm(memberID types.ID) {
+ h.EtcdServer.triggerCorruptAlarm(memberID)
+}
+
+// InitialCheck compares initial hash values with its peers
+// before serving any peer/client traffic. Only mismatch when hashes
+// are different at requested revision, with same compact revision.
+func (cm *corruptionChecker) InitialCheck() error {
+ cm.lg.Info(
+ "starting initial corruption check",
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ zap.Duration("timeout", cm.hasher.ReqTimeout()),
+ )
+
+ h, _, err := cm.hasher.HashByRev(0)
+ if err != nil {
+ return fmt.Errorf("%s failed to fetch hash (%w)", cm.hasher.MemberID(), err)
+ }
+ peers := cm.hasher.PeerHashByRev(h.Revision)
+ mismatch := 0
+ for _, p := range peers {
+ if p.resp != nil {
+ peerID := types.ID(p.resp.Header.MemberId)
+ fields := []zap.Field{
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ zap.Int64("local-member-revision", h.Revision),
+ zap.Int64("local-member-compact-revision", h.CompactRevision),
+ zap.Uint32("local-member-hash", h.Hash),
+ zap.String("remote-peer-id", peerID.String()),
+ zap.Strings("remote-peer-endpoints", p.eps),
+ zap.Int64("remote-peer-revision", p.resp.Header.Revision),
+ zap.Int64("remote-peer-compact-revision", p.resp.CompactRevision),
+ zap.Uint32("remote-peer-hash", p.resp.Hash),
+ }
+
+ if h.Hash != p.resp.Hash {
+ if h.CompactRevision == p.resp.CompactRevision {
+ cm.lg.Warn("found different hash values from remote peer", fields...)
+ mismatch++
+ } else {
+ cm.lg.Warn("found different compact revision values from remote peer", fields...)
+ }
+ }
+
+ continue
+ }
+
+ if p.err != nil {
+ switch {
+ case errors.Is(p.err, rpctypes.ErrFutureRev):
+ cm.lg.Warn(
+ "cannot fetch hash from slow remote peer",
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ zap.Int64("local-member-revision", h.Revision),
+ zap.Int64("local-member-compact-revision", h.CompactRevision),
+ zap.Uint32("local-member-hash", h.Hash),
+ zap.String("remote-peer-id", p.id.String()),
+ zap.Strings("remote-peer-endpoints", p.eps),
+ zap.Error(err),
+ )
+ case errors.Is(p.err, rpctypes.ErrCompacted):
+ cm.lg.Warn(
+ "cannot fetch hash from remote peer; local member is behind",
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ zap.Int64("local-member-revision", h.Revision),
+ zap.Int64("local-member-compact-revision", h.CompactRevision),
+ zap.Uint32("local-member-hash", h.Hash),
+ zap.String("remote-peer-id", p.id.String()),
+ zap.Strings("remote-peer-endpoints", p.eps),
+ zap.Error(err),
+ )
+ case errors.Is(p.err, rpctypes.ErrClusterIDMismatch):
+ cm.lg.Warn(
+ "cluster ID mismatch",
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ zap.Int64("local-member-revision", h.Revision),
+ zap.Int64("local-member-compact-revision", h.CompactRevision),
+ zap.Uint32("local-member-hash", h.Hash),
+ zap.String("remote-peer-id", p.id.String()),
+ zap.Strings("remote-peer-endpoints", p.eps),
+ zap.Error(err),
+ )
+ }
+ }
+ }
+ if mismatch > 0 {
+ return fmt.Errorf("%s found data inconsistency with peers", cm.hasher.MemberID())
+ }
+
+ cm.lg.Info(
+ "initial corruption checking passed; no corruption",
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ )
+ return nil
+}
+
+func (cm *corruptionChecker) PeriodicCheck() error {
+ h, _, err := cm.hasher.HashByRev(0)
+ if err != nil {
+ return err
+ }
+ peers := cm.hasher.PeerHashByRev(h.Revision)
+
+ ctx, cancel := context.WithTimeout(context.Background(), cm.hasher.ReqTimeout())
+ err = cm.hasher.LinearizableReadNotify(ctx)
+ cancel()
+ if err != nil {
+ return err
+ }
+
+ h2, rev2, err := cm.hasher.HashByRev(0)
+ if err != nil {
+ return err
+ }
+
+ alarmed := false
+ mismatch := func(id types.ID) {
+ if alarmed {
+ return
+ }
+ alarmed = true
+ cm.hasher.TriggerCorruptAlarm(id)
+ }
+
+ if h2.Hash != h.Hash && h2.Revision == h.Revision && h.CompactRevision == h2.CompactRevision {
+ cm.lg.Warn(
+ "found hash mismatch",
+ zap.Int64("revision-1", h.Revision),
+ zap.Int64("compact-revision-1", h.CompactRevision),
+ zap.Uint32("hash-1", h.Hash),
+ zap.Int64("revision-2", h2.Revision),
+ zap.Int64("compact-revision-2", h2.CompactRevision),
+ zap.Uint32("hash-2", h2.Hash),
+ )
+ mismatch(cm.hasher.MemberID())
+ }
+
+ checkedCount := 0
+ for _, p := range peers {
+ if p.resp == nil {
+ continue
+ }
+ checkedCount++
+
+ // leader expects follower's latest revision less than or equal to leader's
+ if p.resp.Header.Revision > rev2 {
+ cm.lg.Warn(
+ "revision from follower must be less than or equal to leader's",
+ zap.Int64("leader-revision", rev2),
+ zap.Int64("follower-revision", p.resp.Header.Revision),
+ zap.String("follower-peer-id", p.id.String()),
+ )
+ mismatch(p.id)
+ }
+
+ // leader expects follower's latest compact revision less than or equal to leader's
+ if p.resp.CompactRevision > h2.CompactRevision {
+ cm.lg.Warn(
+ "compact revision from follower must be less than or equal to leader's",
+ zap.Int64("leader-compact-revision", h2.CompactRevision),
+ zap.Int64("follower-compact-revision", p.resp.CompactRevision),
+ zap.String("follower-peer-id", p.id.String()),
+ )
+ mismatch(p.id)
+ }
+
+ // follower's compact revision is leader's old one, then hashes must match
+ if p.resp.CompactRevision == h.CompactRevision && p.resp.Hash != h.Hash {
+ cm.lg.Warn(
+ "same compact revision then hashes must match",
+ zap.Int64("leader-compact-revision", h2.CompactRevision),
+ zap.Uint32("leader-hash", h.Hash),
+ zap.Int64("follower-compact-revision", p.resp.CompactRevision),
+ zap.Uint32("follower-hash", p.resp.Hash),
+ zap.String("follower-peer-id", p.id.String()),
+ )
+ mismatch(p.id)
+ }
+ }
+ cm.lg.Info("finished peer corruption check", zap.Int("number-of-peers-checked", checkedCount))
+ return nil
+}
+
+// CompactHashCheck is based on the fact that 'compactions' are coordinated
+// between raft members and performed at the same revision. For each compacted
+// revision there is KV store hash computed and saved for some time.
+//
+// This method communicates with peers to find a recent common revision across
+// members, and raises alarm if 2 or more members at the same compact revision
+// have different hashes.
+//
+// We might miss opportunity to perform the check if the compaction is still
+// ongoing on one of the members, or it was unresponsive. In such situation the
+// method still passes without raising alarm.
+func (cm *corruptionChecker) CompactHashCheck() {
+ cm.lg.Info("starting compact hash check",
+ zap.String("local-member-id", cm.hasher.MemberID().String()),
+ zap.Duration("timeout", cm.hasher.ReqTimeout()),
+ )
+ hashes := cm.uncheckedRevisions()
+ // Assume that revisions are ordered from largest to smallest
+ for i, hash := range hashes {
+ peers := cm.hasher.PeerHashByRev(hash.Revision)
+ if len(peers) == 0 {
+ continue
+ }
+ if cm.checkPeerHashes(hash, peers) {
+ cm.lg.Info("finished compaction hash check", zap.Int("number-of-hashes-checked", i+1))
+ return
+ }
+ }
+ cm.lg.Info("finished compaction hash check", zap.Int("number-of-hashes-checked", len(hashes)))
+}
+
+// check peers hash and raise alarms if detected corruption.
+// return a bool indicate whether to check next hash.
+//
+// true: successfully checked hash on whole cluster or raised alarms, so no need to check next hash
+// false: skipped some members, so need to check next hash
+func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers []*peerHashKVResp) bool {
+ leaderID := cm.hasher.MemberID()
+ hash2members := map[uint32]types.IDSlice{leaderHash.Hash: {leaderID}}
+
+ peersChecked := 0
+ // group all peers by hash
+ for _, peer := range peers {
+ skipped := false
+ reason := ""
+
+ if peer.resp == nil {
+ skipped = true
+ reason = "no response"
+ } else if peer.resp.CompactRevision != leaderHash.CompactRevision {
+ skipped = true
+ reason = fmt.Sprintf("the peer's CompactRevision %d doesn't match leader's CompactRevision %d",
+ peer.resp.CompactRevision, leaderHash.CompactRevision)
+ }
+ if skipped {
+ cm.lg.Warn("Skipped peer's hash", zap.Int("number-of-peers", len(peers)),
+ zap.String("leader-id", leaderID.String()),
+ zap.String("peer-id", peer.id.String()),
+ zap.String("reason", reason))
+ continue
+ }
+
+ peersChecked++
+ if ids, ok := hash2members[peer.resp.Hash]; !ok {
+ hash2members[peer.resp.Hash] = []types.ID{peer.id}
+ } else {
+ ids = append(ids, peer.id)
+ hash2members[peer.resp.Hash] = ids
+ }
+ }
+
+ // All members have the same CompactRevision and Hash.
+ if len(hash2members) == 1 {
+ return cm.handleConsistentHash(leaderHash, peersChecked, len(peers))
+ }
+
+ // Detected hashes mismatch
+ // The first step is to figure out the majority with the same hash.
+ memberCnt := len(peers) + 1
+ quorum := memberCnt/2 + 1
+ quorumExist := false
+ for k, v := range hash2members {
+ if len(v) >= quorum {
+ quorumExist = true
+ // remove the majority, and we might raise alarms for the left members.
+ delete(hash2members, k)
+ break
+ }
+ }
+
+ if !quorumExist {
+ // If quorum doesn't exist, we don't know which members data are
+ // corrupted. In such situation, we intentionally set the memberID
+ // as 0, it means it affects the whole cluster.
+ cm.lg.Error("Detected compaction hash mismatch but cannot identify the corrupted members, so intentionally set the memberID as 0",
+ zap.String("leader-id", leaderID.String()),
+ zap.Int64("leader-revision", leaderHash.Revision),
+ zap.Int64("leader-compact-revision", leaderHash.CompactRevision),
+ zap.Uint32("leader-hash", leaderHash.Hash),
+ )
+ cm.hasher.TriggerCorruptAlarm(0)
+ }
+
+ // Raise alarm for the left members if the quorum is present.
+ // But we should always generate error log for debugging.
+ for k, v := range hash2members {
+ if quorumExist {
+ for _, pid := range v {
+ cm.hasher.TriggerCorruptAlarm(pid)
+ }
+ }
+
+ cm.lg.Error("Detected compaction hash mismatch",
+ zap.String("leader-id", leaderID.String()),
+ zap.Int64("leader-revision", leaderHash.Revision),
+ zap.Int64("leader-compact-revision", leaderHash.CompactRevision),
+ zap.Uint32("leader-hash", leaderHash.Hash),
+ zap.Uint32("peer-hash", k),
+ zap.String("peer-ids", v.String()),
+ zap.Bool("quorum-exist", quorumExist),
+ )
+ }
+
+ return true
+}
+
+func (cm *corruptionChecker) handleConsistentHash(hash mvcc.KeyValueHash, peersChecked, peerCnt int) bool {
+ if peersChecked == peerCnt {
+ cm.lg.Info("successfully checked hash on whole cluster",
+ zap.Int("number-of-peers-checked", peersChecked),
+ zap.Int64("revision", hash.Revision),
+ zap.Int64("compactRevision", hash.CompactRevision),
+ )
+ cm.mux.Lock()
+ if hash.Revision > cm.latestRevisionChecked {
+ cm.latestRevisionChecked = hash.Revision
+ }
+ cm.mux.Unlock()
+ return true
+ }
+ cm.lg.Warn("skipped revision in compaction hash check; was not able to check all peers",
+ zap.Int("number-of-peers-checked", peersChecked),
+ zap.Int("number-of-peers", peerCnt),
+ zap.Int64("revision", hash.Revision),
+ zap.Int64("compactRevision", hash.CompactRevision),
+ )
+ // The only case which needs to check next hash
+ return false
+}
+
+func (cm *corruptionChecker) uncheckedRevisions() []mvcc.KeyValueHash {
+ cm.mux.RLock()
+ lastRevisionChecked := cm.latestRevisionChecked
+ cm.mux.RUnlock()
+
+ hashes := cm.hasher.Hashes()
+ // Sort in descending order
+ sort.Slice(hashes, func(i, j int) bool {
+ return hashes[i].Revision > hashes[j].Revision
+ })
+ for i, hash := range hashes {
+ if hash.Revision <= lastRevisionChecked {
+ return hashes[:i]
+ }
+ }
+ return hashes
+}
+
+func (s *EtcdServer) triggerCorruptAlarm(id types.ID) {
+ a := &pb.AlarmRequest{
+ MemberID: uint64(id),
+ Action: pb.AlarmRequest_ACTIVATE,
+ Alarm: pb.AlarmType_CORRUPT,
+ }
+ s.GoAttach(func() {
+ s.raftRequest(s.ctx, pb.InternalRaftRequest{Alarm: a})
+ })
+}
+
+type peerInfo struct {
+ id types.ID
+ eps []string
+}
+
+type peerHashKVResp struct {
+ peerInfo
+ resp *pb.HashKVResponse
+ err error
+}
+
+func (s *EtcdServer) getPeerHashKVs(rev int64) []*peerHashKVResp {
+ // TODO: handle the case when "s.cluster.Members" have not
+ // been populated (e.g. no snapshot to load from disk)
+ members := s.cluster.Members()
+ peers := make([]peerInfo, 0, len(members))
+ for _, m := range members {
+ if m.ID == s.MemberID() {
+ continue
+ }
+ peers = append(peers, peerInfo{id: m.ID, eps: m.PeerURLs})
+ }
+
+ lg := s.Logger()
+
+ cc := &http.Client{
+ Transport: s.peerRt,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ },
+ }
+ var resps []*peerHashKVResp
+ for _, p := range peers {
+ if len(p.eps) == 0 {
+ continue
+ }
+
+ respsLen := len(resps)
+ var lastErr error
+ for _, ep := range p.eps {
+ var resp *pb.HashKVResponse
+
+ ctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
+ resp, lastErr = HashByRev(ctx, s.cluster.ID(), cc, ep, rev)
+ cancel()
+ if lastErr == nil {
+ resps = append(resps, &peerHashKVResp{peerInfo: p, resp: resp, err: nil})
+ break
+ }
+ lg.Warn(
+ "failed hash kv request",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.Int64("requested-revision", rev),
+ zap.String("remote-peer-endpoint", ep),
+ zap.Error(lastErr),
+ )
+ }
+
+ // failed to get hashKV from all endpoints of this peer
+ if respsLen == len(resps) {
+ resps = append(resps, &peerHashKVResp{peerInfo: p, resp: nil, err: lastErr})
+ }
+ }
+ return resps
+}
+
+const PeerHashKVPath = "/members/hashkv"
+
+type hashKVHandler struct {
+ lg *zap.Logger
+ server *EtcdServer
+}
+
+func (s *EtcdServer) HashKVHandler() http.Handler {
+ return &hashKVHandler{lg: s.Logger(), server: s}
+}
+
+func (h *hashKVHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ w.Header().Set("Allow", http.MethodGet)
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ return
+ }
+ if r.URL.Path != PeerHashKVPath {
+ http.Error(w, "bad path", http.StatusBadRequest)
+ return
+ }
+ if gcid := r.Header.Get("X-Etcd-Cluster-ID"); gcid != "" && gcid != h.server.cluster.ID().String() {
+ http.Error(w, rafthttp.ErrClusterIDMismatch.Error(), http.StatusPreconditionFailed)
+ return
+ }
+
+ defer r.Body.Close()
+ b, err := io.ReadAll(r.Body)
+ if err != nil {
+ http.Error(w, "error reading body", http.StatusBadRequest)
+ return
+ }
+
+ req := &pb.HashKVRequest{}
+ if err = json.Unmarshal(b, req); err != nil {
+ h.lg.Warn("failed to unmarshal request", zap.Error(err))
+ http.Error(w, "error unmarshalling request", http.StatusBadRequest)
+ return
+ }
+ hash, rev, err := h.server.KV().HashStorage().HashByRev(req.Revision)
+ if err != nil {
+ h.lg.Warn(
+ "failed to get hashKV",
+ zap.Int64("requested-revision", req.Revision),
+ zap.Error(err),
+ )
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+ resp := &pb.HashKVResponse{
+ Header: &pb.ResponseHeader{Revision: rev},
+ Hash: hash.Hash,
+ CompactRevision: hash.CompactRevision,
+ HashRevision: hash.Revision,
+ }
+ respBytes, err := json.Marshal(resp)
+ if err != nil {
+ h.lg.Warn("failed to marshal hashKV response", zap.Error(err))
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("X-Etcd-Cluster-ID", h.server.Cluster().ID().String())
+ w.Header().Set("Content-Type", "application/json")
+ w.Write(respBytes)
+}
+
+// HashByRev fetch hash of kv store at the given rev via http call to the given url
+func HashByRev(ctx context.Context, cid types.ID, cc *http.Client, url string, rev int64) (*pb.HashKVResponse, error) {
+ hashReq := &pb.HashKVRequest{Revision: rev}
+ hashReqBytes, err := json.Marshal(hashReq)
+ if err != nil {
+ return nil, err
+ }
+ requestURL := url + PeerHashKVPath
+ req, err := http.NewRequest(http.MethodGet, requestURL, bytes.NewReader(hashReqBytes))
+ if err != nil {
+ return nil, err
+ }
+ req = req.WithContext(ctx)
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("X-Etcd-Cluster-ID", cid.String())
+ req.Cancel = ctx.Done()
+
+ resp, err := cc.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+ b, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ if resp.StatusCode == http.StatusBadRequest {
+ if strings.Contains(string(b), mvcc.ErrCompacted.Error()) {
+ return nil, rpctypes.ErrCompacted
+ }
+ if strings.Contains(string(b), mvcc.ErrFutureRev.Error()) {
+ return nil, rpctypes.ErrFutureRev
+ }
+ } else if resp.StatusCode == http.StatusPreconditionFailed {
+ if strings.Contains(string(b), rafthttp.ErrClusterIDMismatch.Error()) {
+ return nil, rpctypes.ErrClusterIDMismatch
+ }
+ }
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("unknown error: %s", b)
+ }
+
+ hashResp := &pb.HashKVResponse{}
+ if err := json.Unmarshal(b, hashResp); err != nil {
+ return nil, err
+ }
+ return hashResp, nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/doc.go
new file mode 100644
index 0000000..b195d2d
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package etcdserver defines how etcd servers interact and store their states.
+package etcdserver
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/errors/errors.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/errors/errors.go
new file mode 100644
index 0000000..8de698a
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/errors/errors.go
@@ -0,0 +1,54 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package errors
+
+import (
+ "errors"
+ "fmt"
+)
+
+var (
+ ErrUnknownMethod = errors.New("etcdserver: unknown method")
+ ErrStopped = errors.New("etcdserver: server stopped")
+ ErrCanceled = errors.New("etcdserver: request cancelled")
+ ErrTimeout = errors.New("etcdserver: request timed out")
+ ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
+ ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
+ ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
+ ErrTimeoutWaitAppliedIndex = errors.New("etcdserver: request timed out, waiting for the applied index took too long")
+ ErrLeaderChanged = errors.New("etcdserver: leader changed")
+ ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
+ ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which is in sync with leader")
+ ErrNoLeader = errors.New("etcdserver: no leader")
+ ErrNotLeader = errors.New("etcdserver: not leader")
+ ErrRequestTooLarge = errors.New("etcdserver: request is too large")
+ ErrNoSpace = errors.New("etcdserver: no space")
+ ErrTooManyRequests = errors.New("etcdserver: too many requests")
+ ErrUnhealthy = errors.New("etcdserver: unhealthy cluster")
+ ErrCorrupt = errors.New("etcdserver: corrupt cluster")
+ ErrBadLeaderTransferee = errors.New("etcdserver: bad leader transferee")
+ ErrClusterVersionUnavailable = errors.New("etcdserver: cluster version not found during downgrade")
+ ErrWrongDowngradeVersionFormat = errors.New("etcdserver: wrong downgrade target version format")
+ ErrKeyNotFound = errors.New("etcdserver: key not found")
+)
+
+type DiscoveryError struct {
+ Op string
+ Err error
+}
+
+func (e DiscoveryError) Error() string {
+ return fmt.Sprintf("failed to %s discovery cluster (%v)", e.Op, e.Err)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/metrics.go
new file mode 100644
index 0000000..7176d30
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/metrics.go
@@ -0,0 +1,224 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ goruntime "runtime"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/pkg/v3/runtime"
+)
+
+var (
+ hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "has_leader",
+ Help: "Whether or not a leader exists. 1 is existence, 0 is not.",
+ })
+ isLeader = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "is_leader",
+ Help: "Whether or not this member is a leader. 1 if is, 0 otherwise.",
+ })
+ leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "leader_changes_seen_total",
+ Help: "The number of leader changes seen.",
+ })
+ learnerPromoteFailed = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "learner_promote_failures",
+ Help: "The total number of failed learner promotions (likely learner not ready) while this member is leader.",
+ },
+ []string{"Reason"},
+ )
+ learnerPromoteSucceed = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "learner_promote_successes",
+ Help: "The total number of successful learner promotions while this member is leader.",
+ })
+ heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "heartbeat_send_failures_total",
+ Help: "The total number of leader heartbeat send failures (likely overloaded from slow disk).",
+ })
+ applySnapshotInProgress = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "snapshot_apply_in_progress_total",
+ Help: "1 if the server is applying the incoming snapshot. 0 if none.",
+ })
+ proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "proposals_committed_total",
+ Help: "The total number of consensus proposals committed.",
+ })
+ proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "proposals_applied_total",
+ Help: "The total number of consensus proposals applied.",
+ })
+ proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "proposals_pending",
+ Help: "The current number of pending proposals to commit.",
+ })
+ proposalsFailed = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "proposals_failed_total",
+ Help: "The total number of failed proposals seen.",
+ })
+ slowReadIndex = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "slow_read_indexes_total",
+ Help: "The total number of pending read indexes not in sync with leader's or timed out read index requests.",
+ })
+ readIndexFailed = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "read_indexes_failed_total",
+ Help: "The total number of failed read indexes seen.",
+ })
+ leaseExpired = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd_debugging",
+ Subsystem: "server",
+ Name: "lease_expired_total",
+ Help: "The total number of expired leases.",
+ })
+ currentVersion = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "version",
+ Help: "Which version is running. 1 for 'server_version' label with current version.",
+ },
+ []string{"server_version"},
+ )
+ currentGoVersion = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "go_version",
+ Help: "Which Go version server is running with. 1 for 'server_go_version' label with current version.",
+ },
+ []string{"server_go_version"},
+ )
+ serverID = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "id",
+ Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
+ },
+ []string{"server_id"},
+ )
+ serverFeatureEnabled = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "etcd_server_feature_enabled",
+ Help: "Whether or not a feature is enabled. 1 is enabled, 0 is not.",
+ },
+ []string{"name", "stage"},
+ )
+ fdUsed = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "os",
+ Subsystem: "fd",
+ Name: "used",
+ Help: "The number of used file descriptors.",
+ })
+ fdLimit = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "os",
+ Subsystem: "fd",
+ Name: "limit",
+ Help: "The file descriptor limit.",
+ })
+)
+
+func init() {
+ prometheus.MustRegister(hasLeader)
+ prometheus.MustRegister(isLeader)
+ prometheus.MustRegister(leaderChanges)
+ prometheus.MustRegister(heartbeatSendFailures)
+ prometheus.MustRegister(applySnapshotInProgress)
+ prometheus.MustRegister(proposalsCommitted)
+ prometheus.MustRegister(proposalsApplied)
+ prometheus.MustRegister(proposalsPending)
+ prometheus.MustRegister(proposalsFailed)
+ prometheus.MustRegister(slowReadIndex)
+ prometheus.MustRegister(readIndexFailed)
+ prometheus.MustRegister(leaseExpired)
+ prometheus.MustRegister(currentVersion)
+ prometheus.MustRegister(currentGoVersion)
+ prometheus.MustRegister(serverID)
+ prometheus.MustRegister(serverFeatureEnabled)
+ prometheus.MustRegister(learnerPromoteSucceed)
+ prometheus.MustRegister(learnerPromoteFailed)
+ prometheus.MustRegister(fdUsed)
+ prometheus.MustRegister(fdLimit)
+
+ currentVersion.With(prometheus.Labels{
+ "server_version": version.Version,
+ }).Set(1)
+ currentGoVersion.With(prometheus.Labels{
+ "server_go_version": goruntime.Version(),
+ }).Set(1)
+}
+
+func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) {
+ // This ticker will check File Descriptor Requirements ,and count all fds in used.
+ // And recorded some logs when in used >= limit/5*4. Just recorded message.
+ // If fds was more than 10K,It's low performance due to FDUsage() works.
+ // So need to increase it.
+ // See https://github.com/etcd-io/etcd/issues/11969 for more detail.
+ ticker := time.NewTicker(10 * time.Minute)
+ defer ticker.Stop()
+ for {
+ used, err := runtime.FDUsage()
+ if err != nil {
+ lg.Warn("failed to get file descriptor usage", zap.Error(err))
+ return
+ }
+ fdUsed.Set(float64(used))
+ limit, err := runtime.FDLimit()
+ if err != nil {
+ lg.Warn("failed to get file descriptor limit", zap.Error(err))
+ return
+ }
+ fdLimit.Set(float64(limit))
+ if used >= limit/5*4 {
+ lg.Warn("80% of file descriptors are used", zap.Uint64("used", used), zap.Uint64("limit", limit))
+ }
+ select {
+ case <-ticker.C:
+ case <-done:
+ return
+ }
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go
new file mode 100644
index 0000000..fd4b5da
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go
@@ -0,0 +1,446 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "expvar"
+ "fmt"
+ "log"
+ "sync"
+ "time"
+
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/client/pkg/v3/logutil"
+ "go.etcd.io/etcd/pkg/v3/contention"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp"
+ serverstorage "go.etcd.io/etcd/server/v3/storage"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ // The max throughput of etcd will not exceed 100MB/s (100K * 1KB value).
+ // Assuming the RTT is around 10ms, 1MB max size is large enough.
+ maxSizePerMsg = 1 * 1024 * 1024
+ // Never overflow the rafthttp buffer, which is 4096.
+ // TODO: a better const?
+ maxInflightMsgs = 4096 / 8
+)
+
+var (
+ // protects raftStatus
+ raftStatusMu sync.Mutex
+ // indirection for expvar func interface
+ // expvar panics when publishing duplicate name
+ // expvar does not support remove a registered name
+ // so only register a func that calls raftStatus
+ // and change raftStatus as we need.
+ raftStatus func() raft.Status
+)
+
+func init() {
+ expvar.Publish("raft.status", expvar.Func(func() any {
+ raftStatusMu.Lock()
+ defer raftStatusMu.Unlock()
+ if raftStatus == nil {
+ return nil
+ }
+ return raftStatus()
+ }))
+}
+
+// toApply contains entries, snapshot to be applied. Once
+// an toApply is consumed, the entries will be persisted to
+// raft storage concurrently; the application must read
+// notifyc before assuming the raft messages are stable.
+type toApply struct {
+ entries []raftpb.Entry
+ snapshot raftpb.Snapshot
+ // notifyc synchronizes etcd server applies with the raft node
+ notifyc chan struct{}
+ // raftAdvancedC notifies EtcdServer.apply that
+ // 'raftLog.applied' has advanced by r.Advance
+ // it should be used only when entries contain raftpb.EntryConfChange
+ raftAdvancedC <-chan struct{}
+}
+
+type raftNode struct {
+ lg *zap.Logger
+
+ tickMu *sync.RWMutex
+ // timestamp of the latest tick
+ latestTickTs time.Time
+ raftNodeConfig
+
+ // a chan to send/receive snapshot
+ msgSnapC chan raftpb.Message
+
+ // a chan to send out apply
+ applyc chan toApply
+
+ // a chan to send out readState
+ readStateC chan raft.ReadState
+
+ // utility
+ ticker *time.Ticker
+ // contention detectors for raft heartbeat message
+ td *contention.TimeoutDetector
+
+ stopped chan struct{}
+ done chan struct{}
+}
+
+type raftNodeConfig struct {
+ lg *zap.Logger
+
+ // to check if msg receiver is removed from cluster
+ isIDRemoved func(id uint64) bool
+ raft.Node
+ raftStorage *raft.MemoryStorage
+ storage serverstorage.Storage
+ heartbeat time.Duration // for logging
+ // transport specifies the transport to send and receive msgs to members.
+ // Sending messages MUST NOT block. It is okay to drop messages, since
+ // clients should timeout and reissue their messages.
+ // If transport is nil, server will panic.
+ transport rafthttp.Transporter
+}
+
+func newRaftNode(cfg raftNodeConfig) *raftNode {
+ var lg raft.Logger
+ if cfg.lg != nil {
+ lg = NewRaftLoggerZap(cfg.lg)
+ } else {
+ lcfg := logutil.DefaultZapLoggerConfig
+ var err error
+ lg, err = NewRaftLogger(&lcfg)
+ if err != nil {
+ log.Fatalf("cannot create raft logger %v", err)
+ }
+ }
+ raft.SetLogger(lg)
+ r := &raftNode{
+ lg: cfg.lg,
+ tickMu: new(sync.RWMutex),
+ raftNodeConfig: cfg,
+ latestTickTs: time.Now(),
+ // set up contention detectors for raft heartbeat message.
+ // expect to send a heartbeat within 2 heartbeat intervals.
+ td: contention.NewTimeoutDetector(2 * cfg.heartbeat),
+ readStateC: make(chan raft.ReadState, 1),
+ msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap),
+ applyc: make(chan toApply),
+ stopped: make(chan struct{}),
+ done: make(chan struct{}),
+ }
+ if r.heartbeat == 0 {
+ r.ticker = &time.Ticker{}
+ } else {
+ r.ticker = time.NewTicker(r.heartbeat)
+ }
+ return r
+}
+
+// raft.Node does not have locks in Raft package
+func (r *raftNode) tick() {
+ r.tickMu.Lock()
+ r.Tick()
+ r.latestTickTs = time.Now()
+ r.tickMu.Unlock()
+}
+
+func (r *raftNode) getLatestTickTs() time.Time {
+ r.tickMu.RLock()
+ defer r.tickMu.RUnlock()
+ return r.latestTickTs
+}
+
+// start prepares and starts raftNode in a new goroutine. It is no longer safe
+// to modify the fields after it has been started.
+func (r *raftNode) start(rh *raftReadyHandler) {
+ internalTimeout := time.Second
+
+ go func() {
+ defer r.onStop()
+ islead := false
+
+ for {
+ select {
+ case <-r.ticker.C:
+ r.tick()
+ case rd := <-r.Ready():
+ if rd.SoftState != nil {
+ newLeader := rd.SoftState.Lead != raft.None && rh.getLead() != rd.SoftState.Lead
+ if newLeader {
+ leaderChanges.Inc()
+ }
+
+ if rd.SoftState.Lead == raft.None {
+ hasLeader.Set(0)
+ } else {
+ hasLeader.Set(1)
+ }
+
+ rh.updateLead(rd.SoftState.Lead)
+ islead = rd.RaftState == raft.StateLeader
+ if islead {
+ isLeader.Set(1)
+ } else {
+ isLeader.Set(0)
+ }
+ rh.updateLeadership(newLeader)
+ r.td.Reset()
+ }
+
+ if len(rd.ReadStates) != 0 {
+ select {
+ case r.readStateC <- rd.ReadStates[len(rd.ReadStates)-1]:
+ case <-time.After(internalTimeout):
+ r.lg.Warn("timed out sending read state", zap.Duration("timeout", internalTimeout))
+ case <-r.stopped:
+ return
+ }
+ }
+
+ notifyc := make(chan struct{}, 1)
+ raftAdvancedC := make(chan struct{}, 1)
+ ap := toApply{
+ entries: rd.CommittedEntries,
+ snapshot: rd.Snapshot,
+ notifyc: notifyc,
+ raftAdvancedC: raftAdvancedC,
+ }
+
+ updateCommittedIndex(&ap, rh)
+
+ select {
+ case r.applyc <- ap:
+ case <-r.stopped:
+ return
+ }
+
+ // the leader can write to its disk in parallel with replicating to the followers and then
+ // writing to their disks.
+ // For more details, check raft thesis 10.2.1
+ if islead {
+ // gofail: var raftBeforeLeaderSend struct{}
+ r.transport.Send(r.processMessages(rd.Messages))
+ }
+
+ // Must save the snapshot file and WAL snapshot entry before saving any other entries or hardstate to
+ // ensure that recovery after a snapshot restore is possible.
+ if !raft.IsEmptySnap(rd.Snapshot) {
+ // gofail: var raftBeforeSaveSnap struct{}
+ if err := r.storage.SaveSnap(rd.Snapshot); err != nil {
+ r.lg.Fatal("failed to save Raft snapshot", zap.Error(err))
+ }
+ // gofail: var raftAfterSaveSnap struct{}
+ }
+
+ // gofail: var raftBeforeSave struct{}
+ if err := r.storage.Save(rd.HardState, rd.Entries); err != nil {
+ r.lg.Fatal("failed to save Raft hard state and entries", zap.Error(err))
+ }
+ if !raft.IsEmptyHardState(rd.HardState) {
+ proposalsCommitted.Set(float64(rd.HardState.Commit))
+ }
+ // gofail: var raftAfterSave struct{}
+
+ if !raft.IsEmptySnap(rd.Snapshot) {
+ // Force WAL to fsync its hard state before Release() releases
+ // old data from the WAL. Otherwise could get an error like:
+ // panic: tocommit(107) is out of range [lastIndex(84)]. Was the raft log corrupted, truncated, or lost?
+ // See https://github.com/etcd-io/etcd/issues/10219 for more details.
+ if err := r.storage.Sync(); err != nil {
+ r.lg.Fatal("failed to sync Raft snapshot", zap.Error(err))
+ }
+
+ // etcdserver now claim the snapshot has been persisted onto the disk
+ notifyc <- struct{}{}
+
+ // gofail: var raftBeforeApplySnap struct{}
+ r.raftStorage.ApplySnapshot(rd.Snapshot)
+ r.lg.Info("applied incoming Raft snapshot", zap.Uint64("snapshot-index", rd.Snapshot.Metadata.Index))
+ // gofail: var raftAfterApplySnap struct{}
+
+ if err := r.storage.Release(rd.Snapshot); err != nil {
+ r.lg.Fatal("failed to release Raft wal", zap.Error(err))
+ }
+ // gofail: var raftAfterWALRelease struct{}
+ }
+
+ r.raftStorage.Append(rd.Entries)
+
+ confChanged := false
+ for _, ent := range rd.CommittedEntries {
+ if ent.Type == raftpb.EntryConfChange {
+ confChanged = true
+ break
+ }
+ }
+
+ if !islead {
+ // finish processing incoming messages before we signal notifyc chan
+ msgs := r.processMessages(rd.Messages)
+
+ // now unblocks 'applyAll' that waits on Raft log disk writes before triggering snapshots
+ notifyc <- struct{}{}
+
+ // Candidate or follower needs to wait for all pending configuration
+ // changes to be applied before sending messages.
+ // Otherwise we might incorrectly count votes (e.g. votes from removed members).
+ // Also slow machine's follower raft-layer could proceed to become the leader
+ // on its own single-node cluster, before toApply-layer applies the config change.
+ // We simply wait for ALL pending entries to be applied for now.
+ // We might improve this later on if it causes unnecessary long blocking issues.
+
+ if confChanged {
+ // blocks until 'applyAll' calls 'applyWait.Trigger'
+ // to be in sync with scheduled config-change job
+ // (assume notifyc has cap of 1)
+ select {
+ case notifyc <- struct{}{}:
+ case <-r.stopped:
+ return
+ }
+ }
+
+ // gofail: var raftBeforeFollowerSend struct{}
+ r.transport.Send(msgs)
+ } else {
+ // leader already processed 'MsgSnap' and signaled
+ notifyc <- struct{}{}
+ }
+
+ // gofail: var raftBeforeAdvance struct{}
+ r.Advance()
+
+ if confChanged {
+ // notify etcdserver that raft has already been notified or advanced.
+ raftAdvancedC <- struct{}{}
+ }
+ case <-r.stopped:
+ return
+ }
+ }
+ }()
+}
+
+func updateCommittedIndex(ap *toApply, rh *raftReadyHandler) {
+ var ci uint64
+ if len(ap.entries) != 0 {
+ ci = ap.entries[len(ap.entries)-1].Index
+ }
+ if ap.snapshot.Metadata.Index > ci {
+ ci = ap.snapshot.Metadata.Index
+ }
+ if ci != 0 {
+ rh.updateCommittedIndex(ci)
+ }
+}
+
+func (r *raftNode) processMessages(ms []raftpb.Message) []raftpb.Message {
+ sentAppResp := false
+ for i := len(ms) - 1; i >= 0; i-- {
+ if r.isIDRemoved(ms[i].To) {
+ ms[i].To = 0
+ continue
+ }
+
+ if ms[i].Type == raftpb.MsgAppResp {
+ if sentAppResp {
+ ms[i].To = 0
+ } else {
+ sentAppResp = true
+ }
+ }
+
+ if ms[i].Type == raftpb.MsgSnap {
+ // There are two separate data store: the store for v2, and the KV for v3.
+ // The msgSnap only contains the most recent snapshot of store without KV.
+ // So we need to redirect the msgSnap to etcd server main loop for merging in the
+ // current store snapshot and KV snapshot.
+ select {
+ case r.msgSnapC <- ms[i]:
+ default:
+ // drop msgSnap if the inflight chan if full.
+ }
+ ms[i].To = 0
+ }
+ if ms[i].Type == raftpb.MsgHeartbeat {
+ ok, exceed := r.td.Observe(ms[i].To)
+ if !ok {
+ // TODO: limit request rate.
+ r.lg.Warn(
+ "leader failed to send out heartbeat on time; took too long, leader is overloaded likely from slow disk",
+ zap.String("to", fmt.Sprintf("%x", ms[i].To)),
+ zap.Duration("heartbeat-interval", r.heartbeat),
+ zap.Duration("expected-duration", 2*r.heartbeat),
+ zap.Duration("exceeded-duration", exceed),
+ )
+ heartbeatSendFailures.Inc()
+ }
+ }
+ }
+ return ms
+}
+
+func (r *raftNode) apply() chan toApply {
+ return r.applyc
+}
+
+func (r *raftNode) stop() {
+ select {
+ case r.stopped <- struct{}{}:
+ // Not already stopped, so trigger it
+ case <-r.done:
+ // Has already been stopped - no need to do anything
+ return
+ }
+ // Block until the stop has been acknowledged by start()
+ <-r.done
+}
+
+func (r *raftNode) onStop() {
+ r.Stop()
+ r.ticker.Stop()
+ r.transport.Stop()
+ if err := r.storage.Close(); err != nil {
+ r.lg.Panic("failed to close Raft storage", zap.Error(err))
+ }
+ close(r.done)
+}
+
+// for testing
+func (r *raftNode) pauseSending() {
+ p := r.transport.(rafthttp.Pausable)
+ p.Pause()
+}
+
+func (r *raftNode) resumeSending() {
+ p := r.transport.(rafthttp.Pausable)
+ p.Resume()
+}
+
+// advanceTicks advances ticks of Raft node.
+// This can be used for fast-forwarding election
+// ticks in multi data-center deployments, thus
+// speeding up election process.
+func (r *raftNode) advanceTicks(ticks int) {
+ for i := 0; i < ticks; i++ {
+ r.tick()
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/server.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/server.go
new file mode 100644
index 0000000..0eb16b7
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/server.go
@@ -0,0 +1,2592 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "context"
+ "encoding/json"
+ errorspkg "errors"
+ "expvar"
+ "fmt"
+ "math"
+ "net/http"
+ "path"
+ "reflect"
+ "regexp"
+ "strconv"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/coreos/go-semver/semver"
+ humanize "github.com/dustin/go-humanize"
+ "github.com/prometheus/client_golang/prometheus"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/membershippb"
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/client/pkg/v3/fileutil"
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/client/pkg/v3/verify"
+ "go.etcd.io/etcd/pkg/v3/featuregate"
+ "go.etcd.io/etcd/pkg/v3/idutil"
+ "go.etcd.io/etcd/pkg/v3/notify"
+ "go.etcd.io/etcd/pkg/v3/pbutil"
+ "go.etcd.io/etcd/pkg/v3/runtime"
+ "go.etcd.io/etcd/pkg/v3/schedule"
+ "go.etcd.io/etcd/pkg/v3/traceutil"
+ "go.etcd.io/etcd/pkg/v3/wait"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/config"
+ "go.etcd.io/etcd/server/v3/etcdserver/api"
+ httptypes "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor"
+ "go.etcd.io/etcd/server/v3/etcdserver/apply"
+ "go.etcd.io/etcd/server/v3/etcdserver/cindex"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ "go.etcd.io/etcd/server/v3/etcdserver/txn"
+ serverversion "go.etcd.io/etcd/server/v3/etcdserver/version"
+ "go.etcd.io/etcd/server/v3/features"
+ "go.etcd.io/etcd/server/v3/lease"
+ "go.etcd.io/etcd/server/v3/lease/leasehttp"
+ serverstorage "go.etcd.io/etcd/server/v3/storage"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+ "go.etcd.io/etcd/server/v3/storage/schema"
+ "go.etcd.io/raft/v3"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+const (
+ DefaultSnapshotCount = 10000
+
+ // DefaultSnapshotCatchUpEntries is the number of entries for a slow follower
+ // to catch-up after compacting the raft storage entries.
+ // We expect the follower has a millisecond level latency with the leader.
+ // The max throughput is around 10K. Keep a 5K entries is enough for helping
+ // follower to catch up.
+ DefaultSnapshotCatchUpEntries uint64 = 5000
+
+ StoreClusterPrefix = "/0"
+ StoreKeysPrefix = "/1"
+
+ // HealthInterval is the minimum time the cluster should be healthy
+ // before accepting add and delete member requests.
+ HealthInterval = 5 * time.Second
+
+ purgeFileInterval = 30 * time.Second
+
+ // max number of in-flight snapshot messages etcdserver allows to have
+ // This number is more than enough for most clusters with 5 machines.
+ maxInFlightMsgSnap = 16
+
+ releaseDelayAfterSnapshot = 30 * time.Second
+
+ // maxPendingRevokes is the maximum number of outstanding expired lease revocations.
+ maxPendingRevokes = 16
+
+ recommendedMaxRequestBytes = 10 * 1024 * 1024
+
+ // readyPercentThreshold is a threshold used to determine
+ // whether a learner is ready for a transition into a full voting member or not.
+ readyPercentThreshold = 0.9
+
+ DowngradeEnabledPath = "/downgrade/enabled"
+ memorySnapshotCount = 100
+)
+
+var (
+ // monitorVersionInterval should be smaller than the timeout
+ // on the connection. Or we will not be able to reuse the connection
+ // (since it will timeout).
+ monitorVersionInterval = rafthttp.ConnWriteTimeout - time.Second
+
+ recommendedMaxRequestBytesString = humanize.Bytes(uint64(recommendedMaxRequestBytes))
+ storeMemberAttributeRegexp = regexp.MustCompile(path.Join(membership.StoreMembersPrefix, "[[:xdigit:]]{1,16}", "attributes"))
+)
+
+func init() {
+ expvar.Publish(
+ "file_descriptor_limit",
+ expvar.Func(
+ func() any {
+ n, _ := runtime.FDLimit()
+ return n
+ },
+ ),
+ )
+}
+
+type Response struct {
+ Term uint64
+ Index uint64
+ Event *v2store.Event
+ Watcher v2store.Watcher
+ Err error
+}
+
+type ServerV2 interface {
+ Server
+ Leader() types.ID
+
+ ClientCertAuthEnabled() bool
+}
+
+type ServerV3 interface {
+ Server
+ apply.RaftStatusGetter
+}
+
+func (s *EtcdServer) ClientCertAuthEnabled() bool { return s.Cfg.ClientCertAuthEnabled }
+
+type Server interface {
+ // AddMember attempts to add a member into the cluster. It will return
+ // ErrIDRemoved if member ID is removed from the cluster, or return
+ // ErrIDExists if member ID exists in the cluster.
+ AddMember(ctx context.Context, memb membership.Member) ([]*membership.Member, error)
+ // RemoveMember attempts to remove a member from the cluster. It will
+ // return ErrIDRemoved if member ID is removed from the cluster, or return
+ // ErrIDNotFound if member ID is not in the cluster.
+ RemoveMember(ctx context.Context, id uint64) ([]*membership.Member, error)
+ // UpdateMember attempts to update an existing member in the cluster. It will
+ // return ErrIDNotFound if the member ID does not exist.
+ UpdateMember(ctx context.Context, updateMemb membership.Member) ([]*membership.Member, error)
+ // PromoteMember attempts to promote a non-voting node to a voting node. It will
+ // return ErrIDNotFound if the member ID does not exist.
+ // return ErrLearnerNotReady if the member are not ready.
+ // return ErrMemberNotLearner if the member is not a learner.
+ PromoteMember(ctx context.Context, id uint64) ([]*membership.Member, error)
+
+ // ClusterVersion is the cluster-wide minimum major.minor version.
+ // Cluster version is set to the min version that an etcd member is
+ // compatible with when first bootstrap.
+ //
+ // ClusterVersion is nil until the cluster is bootstrapped (has a quorum).
+ //
+ // During a rolling upgrades, the ClusterVersion will be updated
+ // automatically after a sync. (5 second by default)
+ //
+ // The API/raft component can utilize ClusterVersion to determine if
+ // it can accept a client request or a raft RPC.
+ // NOTE: ClusterVersion might be nil when etcd 2.1 works with etcd 2.0 and
+ // the leader is etcd 2.0. etcd 2.0 leader will not update clusterVersion since
+ // this feature is introduced post 2.0.
+ ClusterVersion() *semver.Version
+ // StorageVersion is the storage schema version. It's supported starting
+ // from 3.6.
+ StorageVersion() *semver.Version
+ Cluster() api.Cluster
+ Alarms() []*pb.AlarmMember
+
+ // LeaderChangedNotify returns a channel for application level code to be notified
+ // when etcd leader changes, this function is intend to be used only in application
+ // which embed etcd.
+ // Caution:
+ // 1. the returned channel is being closed when the leadership changes.
+ // 2. so the new channel needs to be obtained for each raft term.
+ // 3. user can loose some consecutive channel changes using this API.
+ LeaderChangedNotify() <-chan struct{}
+}
+
+// EtcdServer is the production implementation of the Server interface
+type EtcdServer struct {
+ // inflightSnapshots holds count the number of snapshots currently inflight.
+ inflightSnapshots int64 // must use atomic operations to access; keep 64-bit aligned.
+ appliedIndex uint64 // must use atomic operations to access; keep 64-bit aligned.
+ committedIndex uint64 // must use atomic operations to access; keep 64-bit aligned.
+ term uint64 // must use atomic operations to access; keep 64-bit aligned.
+ lead uint64 // must use atomic operations to access; keep 64-bit aligned.
+
+ consistIndex cindex.ConsistentIndexer // consistIndex is used to get/set/save consistentIndex
+ r raftNode // uses 64-bit atomics; keep 64-bit aligned.
+
+ readych chan struct{}
+ Cfg config.ServerConfig
+
+ lgMu *sync.RWMutex
+ lg *zap.Logger
+
+ w wait.Wait
+
+ readMu sync.RWMutex
+ // read routine notifies etcd server that it waits for reading by sending an empty struct to
+ // readwaitC
+ readwaitc chan struct{}
+ // readNotifier is used to notify the read routine that it can process the request
+ // when there is no error
+ readNotifier *notifier
+
+ // stop signals the run goroutine should shutdown.
+ stop chan struct{}
+ // stopping is closed by run goroutine on shutdown.
+ stopping chan struct{}
+ // done is closed when all goroutines from start() complete.
+ done chan struct{}
+ // leaderChanged is used to notify the linearizable read loop to drop the old read requests.
+ leaderChanged *notify.Notifier
+
+ errorc chan error
+ memberID types.ID
+ attributes membership.Attributes
+
+ cluster *membership.RaftCluster
+
+ v2store v2store.Store
+ snapshotter *snap.Snapshotter
+
+ uberApply apply.UberApplier
+
+ applyWait wait.WaitTime
+
+ kv mvcc.WatchableKV
+ lessor lease.Lessor
+ bemu sync.RWMutex
+ be backend.Backend
+ beHooks *serverstorage.BackendHooks
+ authStore auth.AuthStore
+ alarmStore *v3alarm.AlarmStore
+
+ stats *stats.ServerStats
+ lstats *stats.LeaderStats
+
+ SyncTicker *time.Ticker
+ // compactor is used to auto-compact the KV.
+ compactor v3compactor.Compactor
+
+ // peerRt used to send requests (version, lease) to peers.
+ peerRt http.RoundTripper
+ reqIDGen *idutil.Generator
+
+ // wgMu blocks concurrent waitgroup mutation while server stopping
+ wgMu sync.RWMutex
+ // wg is used to wait for the goroutines that depends on the server state
+ // to exit when stopping the server.
+ wg sync.WaitGroup
+
+ // ctx is used for etcd-initiated requests that may need to be canceled
+ // on etcd server shutdown.
+ ctx context.Context
+ cancel context.CancelFunc
+
+ leadTimeMu sync.RWMutex
+ leadElectedTime time.Time
+
+ firstCommitInTerm *notify.Notifier
+ clusterVersionChanged *notify.Notifier
+
+ *AccessController
+ // forceDiskSnapshot can force snapshot be triggered after apply, independent of the snapshotCount.
+ // Should only be set within apply code path. Used to force snapshot after cluster version downgrade.
+ // TODO: Replace with flush db in v3.7 assuming v3.6 bootstraps from db file.
+ forceDiskSnapshot bool
+ corruptionChecker CorruptionChecker
+}
+
+// NewServer creates a new EtcdServer from the supplied configuration. The
+// configuration is considered static for the lifetime of the EtcdServer.
+func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) {
+ b, err := bootstrap(cfg)
+ if err != nil {
+ cfg.Logger.Error("bootstrap failed", zap.Error(err))
+ return nil, err
+ }
+ cfg.Logger.Info("bootstrap successfully")
+
+ defer func() {
+ if err != nil {
+ b.Close()
+ }
+ }()
+
+ sstats := stats.NewServerStats(cfg.Name, b.cluster.cl.String())
+ lstats := stats.NewLeaderStats(cfg.Logger, b.cluster.nodeID.String())
+
+ heartbeat := time.Duration(cfg.TickMs) * time.Millisecond
+ srv = &EtcdServer{
+ readych: make(chan struct{}),
+ Cfg: cfg,
+ lgMu: new(sync.RWMutex),
+ lg: cfg.Logger,
+ errorc: make(chan error, 1),
+ v2store: b.storage.st,
+ snapshotter: b.ss,
+ r: *b.raft.newRaftNode(b.ss, b.storage.wal.w, b.cluster.cl),
+ memberID: b.cluster.nodeID,
+ attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
+ cluster: b.cluster.cl,
+ stats: sstats,
+ lstats: lstats,
+ SyncTicker: time.NewTicker(500 * time.Millisecond),
+ peerRt: b.prt,
+ reqIDGen: idutil.NewGenerator(uint16(b.cluster.nodeID), time.Now()),
+ AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist},
+ consistIndex: b.storage.backend.ci,
+ firstCommitInTerm: notify.NewNotifier(),
+ clusterVersionChanged: notify.NewNotifier(),
+ }
+
+ addFeatureGateMetrics(cfg.ServerFeatureGate, serverFeatureEnabled)
+ serverID.With(prometheus.Labels{"server_id": b.cluster.nodeID.String()}).Set(1)
+ srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged)
+
+ srv.be = b.storage.backend.be
+ srv.beHooks = b.storage.backend.beHooks
+ minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat
+
+ // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
+ // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
+ srv.lessor = lease.NewLessor(srv.Logger(), srv.be, srv.cluster, lease.LessorConfig{
+ MinLeaseTTL: int64(math.Ceil(minTTL.Seconds())),
+ CheckpointInterval: cfg.LeaseCheckpointInterval,
+ CheckpointPersist: cfg.ServerFeatureGate.Enabled(features.LeaseCheckpointPersist),
+ ExpiredLeasesRetryInterval: srv.Cfg.ReqTimeout(),
+ })
+
+ tp, err := auth.NewTokenProvider(cfg.Logger, cfg.AuthToken,
+ func(index uint64) <-chan struct{} {
+ return srv.applyWait.Wait(index)
+ },
+ time.Duration(cfg.TokenTTL)*time.Second,
+ )
+ if err != nil {
+ cfg.Logger.Warn("failed to create token provider", zap.Error(err))
+ return nil, err
+ }
+
+ mvccStoreConfig := mvcc.StoreConfig{
+ CompactionBatchLimit: cfg.CompactionBatchLimit,
+ CompactionSleepInterval: cfg.CompactionSleepInterval,
+ }
+ srv.kv = mvcc.New(srv.Logger(), srv.be, srv.lessor, mvccStoreConfig)
+ srv.corruptionChecker = newCorruptionChecker(cfg.Logger, srv, srv.kv.HashStorage())
+
+ srv.authStore = auth.NewAuthStore(srv.Logger(), schema.NewAuthBackend(srv.Logger(), srv.be), tp, int(cfg.BcryptCost))
+
+ newSrv := srv // since srv == nil in defer if srv is returned as nil
+ defer func() {
+ // closing backend without first closing kv can cause
+ // resumed compactions to fail with closed tx errors
+ if err != nil {
+ newSrv.kv.Close()
+ }
+ }()
+ if num := cfg.AutoCompactionRetention; num != 0 {
+ srv.compactor, err = v3compactor.New(cfg.Logger, cfg.AutoCompactionMode, num, srv.kv, srv)
+ if err != nil {
+ return nil, err
+ }
+ srv.compactor.Run()
+ }
+
+ if err = srv.restoreAlarms(); err != nil {
+ return nil, err
+ }
+ srv.uberApply = srv.NewUberApplier()
+
+ if srv.FeatureEnabled(features.LeaseCheckpoint) {
+ // setting checkpointer enables lease checkpoint feature.
+ srv.lessor.SetCheckpointer(func(ctx context.Context, cp *pb.LeaseCheckpointRequest) error {
+ if !srv.ensureLeadership() {
+ srv.lg.Warn("Ignore the checkpoint request because current member isn't a leader",
+ zap.Uint64("local-member-id", uint64(srv.MemberID())))
+ return lease.ErrNotPrimary
+ }
+
+ srv.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseCheckpoint: cp})
+ return nil
+ })
+ }
+
+ // Set the hook after EtcdServer finishes the initialization to avoid
+ // the hook being called during the initialization process.
+ srv.be.SetTxPostLockInsideApplyHook(srv.getTxPostLockInsideApplyHook())
+
+ // TODO: move transport initialization near the definition of remote
+ tr := &rafthttp.Transport{
+ Logger: cfg.Logger,
+ TLSInfo: cfg.PeerTLSInfo,
+ DialTimeout: cfg.PeerDialTimeout(),
+ ID: b.cluster.nodeID,
+ URLs: cfg.PeerURLs,
+ ClusterID: b.cluster.cl.ID(),
+ Raft: srv,
+ Snapshotter: b.ss,
+ ServerStats: sstats,
+ LeaderStats: lstats,
+ ErrorC: srv.errorc,
+ }
+ if err = tr.Start(); err != nil {
+ return nil, err
+ }
+ // add all remotes into transport
+ for _, m := range b.cluster.remotes {
+ if m.ID != b.cluster.nodeID {
+ tr.AddRemote(m.ID, m.PeerURLs)
+ }
+ }
+ for _, m := range b.cluster.cl.Members() {
+ if m.ID != b.cluster.nodeID {
+ tr.AddPeer(m.ID, m.PeerURLs)
+ }
+ }
+ srv.r.transport = tr
+
+ return srv, nil
+}
+
+func (s *EtcdServer) Logger() *zap.Logger {
+ s.lgMu.RLock()
+ l := s.lg
+ s.lgMu.RUnlock()
+ return l
+}
+
+func (s *EtcdServer) Config() config.ServerConfig {
+ return s.Cfg
+}
+
+// FeatureEnabled returns true if the feature is enabled by the etcd server, false otherwise.
+func (s *EtcdServer) FeatureEnabled(f featuregate.Feature) bool {
+ return s.Cfg.ServerFeatureGate.Enabled(f)
+}
+
+func tickToDur(ticks int, tickMs uint) string {
+ return fmt.Sprintf("%v", time.Duration(ticks)*time.Duration(tickMs)*time.Millisecond)
+}
+
+func (s *EtcdServer) adjustTicks() {
+ lg := s.Logger()
+ clusterN := len(s.cluster.Members())
+
+ // single-node fresh start, or single-node recovers from snapshot
+ if clusterN == 1 {
+ ticks := s.Cfg.ElectionTicks - 1
+ lg.Info(
+ "started as single-node; fast-forwarding election ticks",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.Int("forward-ticks", ticks),
+ zap.String("forward-duration", tickToDur(ticks, s.Cfg.TickMs)),
+ zap.Int("election-ticks", s.Cfg.ElectionTicks),
+ zap.String("election-timeout", tickToDur(s.Cfg.ElectionTicks, s.Cfg.TickMs)),
+ )
+ s.r.advanceTicks(ticks)
+ return
+ }
+
+ if !s.Cfg.InitialElectionTickAdvance {
+ lg.Info("skipping initial election tick advance", zap.Int("election-ticks", s.Cfg.ElectionTicks))
+ return
+ }
+ lg.Info("starting initial election tick advance", zap.Int("election-ticks", s.Cfg.ElectionTicks))
+
+ // retry up to "rafthttp.ConnReadTimeout", which is 5-sec
+ // until peer connection reports; otherwise:
+ // 1. all connections failed, or
+ // 2. no active peers, or
+ // 3. restarted single-node with no snapshot
+ // then, do nothing, because advancing ticks would have no effect
+ waitTime := rafthttp.ConnReadTimeout
+ itv := 50 * time.Millisecond
+ for i := int64(0); i < int64(waitTime/itv); i++ {
+ select {
+ case <-time.After(itv):
+ case <-s.stopping:
+ return
+ }
+
+ peerN := s.r.transport.ActivePeers()
+ if peerN > 1 {
+ // multi-node received peer connection reports
+ // adjust ticks, in case slow leader message receive
+ ticks := s.Cfg.ElectionTicks - 2
+
+ lg.Info(
+ "initialized peer connections; fast-forwarding election ticks",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.Int("forward-ticks", ticks),
+ zap.String("forward-duration", tickToDur(ticks, s.Cfg.TickMs)),
+ zap.Int("election-ticks", s.Cfg.ElectionTicks),
+ zap.String("election-timeout", tickToDur(s.Cfg.ElectionTicks, s.Cfg.TickMs)),
+ zap.Int("active-remote-members", peerN),
+ )
+
+ s.r.advanceTicks(ticks)
+ return
+ }
+ }
+}
+
+// Start performs any initialization of the Server necessary for it to
+// begin serving requests. It must be called before Do or Process.
+// Start must be non-blocking; any long-running server functionality
+// should be implemented in goroutines.
+func (s *EtcdServer) Start() {
+ s.start()
+ s.GoAttach(func() { s.adjustTicks() })
+ s.GoAttach(func() { s.publishV3(s.Cfg.ReqTimeout()) })
+ s.GoAttach(s.purgeFile)
+ s.GoAttach(func() { monitorFileDescriptor(s.Logger(), s.stopping) })
+ s.GoAttach(s.monitorClusterVersions)
+ s.GoAttach(s.monitorStorageVersion)
+ s.GoAttach(s.linearizableReadLoop)
+ s.GoAttach(s.monitorKVHash)
+ s.GoAttach(s.monitorCompactHash)
+ s.GoAttach(s.monitorDowngrade)
+}
+
+// start prepares and starts server in a new goroutine. It is no longer safe to
+// modify a server's fields after it has been sent to Start.
+// This function is just used for testing.
+func (s *EtcdServer) start() {
+ lg := s.Logger()
+
+ if s.Cfg.SnapshotCount == 0 {
+ lg.Info(
+ "updating snapshot-count to default",
+ zap.Uint64("given-snapshot-count", s.Cfg.SnapshotCount),
+ zap.Uint64("updated-snapshot-count", DefaultSnapshotCount),
+ )
+ s.Cfg.SnapshotCount = DefaultSnapshotCount
+ }
+ if s.Cfg.SnapshotCatchUpEntries == 0 {
+ lg.Info(
+ "updating snapshot catch-up entries to default",
+ zap.Uint64("given-snapshot-catchup-entries", s.Cfg.SnapshotCatchUpEntries),
+ zap.Uint64("updated-snapshot-catchup-entries", DefaultSnapshotCatchUpEntries),
+ )
+ s.Cfg.SnapshotCatchUpEntries = DefaultSnapshotCatchUpEntries
+ }
+
+ s.w = wait.New()
+ s.applyWait = wait.NewTimeList()
+ s.done = make(chan struct{})
+ s.stop = make(chan struct{})
+ s.stopping = make(chan struct{}, 1)
+ s.ctx, s.cancel = context.WithCancel(context.Background())
+ s.readwaitc = make(chan struct{}, 1)
+ s.readNotifier = newNotifier()
+ s.leaderChanged = notify.NewNotifier()
+ if s.ClusterVersion() != nil {
+ lg.Info(
+ "starting etcd server",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("local-server-version", version.Version),
+ zap.String("cluster-id", s.Cluster().ID().String()),
+ zap.String("cluster-version", version.Cluster(s.ClusterVersion().String())),
+ )
+ membership.ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(s.ClusterVersion().String())}).Set(1)
+ } else {
+ lg.Info(
+ "starting etcd server",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("local-server-version", version.Version),
+ zap.String("cluster-version", "to_be_decided"),
+ )
+ }
+
+ // TODO: if this is an empty log, writes all peer infos
+ // into the first entry
+ go s.run()
+}
+
+func (s *EtcdServer) purgeFile() {
+ lg := s.Logger()
+ var dberrc, serrc, werrc <-chan error
+ var dbdonec, sdonec, wdonec <-chan struct{}
+ if s.Cfg.MaxSnapFiles > 0 {
+ dbdonec, dberrc = fileutil.PurgeFileWithoutFlock(lg, s.Cfg.SnapDir(), "snap.db", s.Cfg.MaxSnapFiles, purgeFileInterval, s.stopping)
+ sdonec, serrc = fileutil.PurgeFileWithoutFlock(lg, s.Cfg.SnapDir(), "snap", s.Cfg.MaxSnapFiles, purgeFileInterval, s.stopping)
+ }
+ if s.Cfg.MaxWALFiles > 0 {
+ wdonec, werrc = fileutil.PurgeFileWithDoneNotify(lg, s.Cfg.WALDir(), "wal", s.Cfg.MaxWALFiles, purgeFileInterval, s.stopping)
+ }
+
+ select {
+ case e := <-dberrc:
+ lg.Fatal("failed to purge snap db file", zap.Error(e))
+ case e := <-serrc:
+ lg.Fatal("failed to purge snap file", zap.Error(e))
+ case e := <-werrc:
+ lg.Fatal("failed to purge wal file", zap.Error(e))
+ case <-s.stopping:
+ if dbdonec != nil {
+ <-dbdonec
+ }
+ if sdonec != nil {
+ <-sdonec
+ }
+ if wdonec != nil {
+ <-wdonec
+ }
+ return
+ }
+}
+
+func (s *EtcdServer) Cluster() api.Cluster { return s.cluster }
+
+func (s *EtcdServer) ApplyWait() <-chan struct{} { return s.applyWait.Wait(s.getCommittedIndex()) }
+
+type ServerPeer interface {
+ ServerV2
+ RaftHandler() http.Handler
+ LeaseHandler() http.Handler
+}
+
+func (s *EtcdServer) LeaseHandler() http.Handler {
+ if s.lessor == nil {
+ return nil
+ }
+ return leasehttp.NewHandler(s.lessor, s.ApplyWait)
+}
+
+func (s *EtcdServer) RaftHandler() http.Handler { return s.r.transport.Handler() }
+
+type ServerPeerV2 interface {
+ ServerPeer
+ HashKVHandler() http.Handler
+ DowngradeEnabledHandler() http.Handler
+}
+
+func (s *EtcdServer) DowngradeInfo() *serverversion.DowngradeInfo { return s.cluster.DowngradeInfo() }
+
+type downgradeEnabledHandler struct {
+ lg *zap.Logger
+ cluster api.Cluster
+ server *EtcdServer
+}
+
+func (s *EtcdServer) DowngradeEnabledHandler() http.Handler {
+ return &downgradeEnabledHandler{
+ lg: s.Logger(),
+ cluster: s.cluster,
+ server: s,
+ }
+}
+
+func (h *downgradeEnabledHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ w.Header().Set("Allow", http.MethodGet)
+ http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
+
+ if r.URL.Path != DowngradeEnabledPath {
+ http.Error(w, "bad path", http.StatusBadRequest)
+ return
+ }
+
+ ctx, cancel := context.WithTimeout(context.Background(), h.server.Cfg.ReqTimeout())
+ defer cancel()
+
+ // serve with linearized downgrade info
+ if err := h.server.linearizableReadNotify(ctx); err != nil {
+ http.Error(w, fmt.Sprintf("failed linearized read: %v", err),
+ http.StatusInternalServerError)
+ return
+ }
+ enabled := h.server.DowngradeInfo().Enabled
+ w.Header().Set("Content-Type", "text/plain")
+ w.Write([]byte(strconv.FormatBool(enabled)))
+}
+
+// Process takes a raft message and applies it to the server's raft state
+// machine, respecting any timeout of the given context.
+func (s *EtcdServer) Process(ctx context.Context, m raftpb.Message) error {
+ lg := s.Logger()
+ if s.cluster.IsIDRemoved(types.ID(m.From)) {
+ lg.Warn(
+ "rejected Raft message from removed member",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("removed-member-id", types.ID(m.From).String()),
+ )
+ return httptypes.NewHTTPError(http.StatusForbidden, "cannot process message from removed member")
+ }
+ if s.MemberID() != types.ID(m.To) {
+ lg.Warn(
+ "rejected Raft message to mismatch member",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("mismatch-member-id", types.ID(m.To).String()),
+ )
+ return httptypes.NewHTTPError(http.StatusForbidden, "cannot process message to mismatch member")
+ }
+ if m.Type == raftpb.MsgApp {
+ s.stats.RecvAppendReq(types.ID(m.From).String(), m.Size())
+ }
+ return s.r.Step(ctx, m)
+}
+
+func (s *EtcdServer) IsIDRemoved(id uint64) bool { return s.cluster.IsIDRemoved(types.ID(id)) }
+
+func (s *EtcdServer) ReportUnreachable(id uint64) { s.r.ReportUnreachable(id) }
+
+// ReportSnapshot reports snapshot sent status to the raft state machine,
+// and clears the used snapshot from the snapshot store.
+func (s *EtcdServer) ReportSnapshot(id uint64, status raft.SnapshotStatus) {
+ s.r.ReportSnapshot(id, status)
+}
+
+type etcdProgress struct {
+ confState raftpb.ConfState
+ diskSnapshotIndex uint64
+ memorySnapshotIndex uint64
+ appliedt uint64
+ appliedi uint64
+}
+
+// raftReadyHandler contains a set of EtcdServer operations to be called by raftNode,
+// and helps decouple state machine logic from Raft algorithms.
+// TODO: add a state machine interface to toApply the commit entries and do snapshot/recover
+type raftReadyHandler struct {
+ getLead func() (lead uint64)
+ updateLead func(lead uint64)
+ updateLeadership func(newLeader bool)
+ updateCommittedIndex func(uint64)
+}
+
+func (s *EtcdServer) run() {
+ lg := s.Logger()
+
+ sn, err := s.r.raftStorage.Snapshot()
+ if err != nil {
+ lg.Panic("failed to get snapshot from Raft storage", zap.Error(err))
+ }
+
+ // asynchronously accept toApply packets, dispatch progress in-order
+ sched := schedule.NewFIFOScheduler(lg)
+
+ rh := &raftReadyHandler{
+ getLead: func() (lead uint64) { return s.getLead() },
+ updateLead: func(lead uint64) { s.setLead(lead) },
+ updateLeadership: func(newLeader bool) {
+ if !s.isLeader() {
+ if s.lessor != nil {
+ s.lessor.Demote()
+ }
+ if s.compactor != nil {
+ s.compactor.Pause()
+ }
+ } else {
+ if newLeader {
+ t := time.Now()
+ s.leadTimeMu.Lock()
+ s.leadElectedTime = t
+ s.leadTimeMu.Unlock()
+ }
+ if s.compactor != nil {
+ s.compactor.Resume()
+ }
+ }
+ if newLeader {
+ s.leaderChanged.Notify()
+ }
+ // TODO: remove the nil checking
+ // current test utility does not provide the stats
+ if s.stats != nil {
+ s.stats.BecomeLeader()
+ }
+ },
+ updateCommittedIndex: func(ci uint64) {
+ cci := s.getCommittedIndex()
+ if ci > cci {
+ s.setCommittedIndex(ci)
+ }
+ },
+ }
+ s.r.start(rh)
+
+ ep := etcdProgress{
+ confState: sn.Metadata.ConfState,
+ diskSnapshotIndex: sn.Metadata.Index,
+ memorySnapshotIndex: sn.Metadata.Index,
+ appliedt: sn.Metadata.Term,
+ appliedi: sn.Metadata.Index,
+ }
+
+ defer func() {
+ s.wgMu.Lock() // block concurrent waitgroup adds in GoAttach while stopping
+ close(s.stopping)
+ s.wgMu.Unlock()
+ s.cancel()
+ sched.Stop()
+
+ // wait for goroutines before closing raft so wal stays open
+ s.wg.Wait()
+
+ s.SyncTicker.Stop()
+
+ // must stop raft after scheduler-- etcdserver can leak rafthttp pipelines
+ // by adding a peer after raft stops the transport
+ s.r.stop()
+
+ s.Cleanup()
+
+ close(s.done)
+ }()
+
+ var expiredLeaseC <-chan []*lease.Lease
+ if s.lessor != nil {
+ expiredLeaseC = s.lessor.ExpiredLeasesC()
+ }
+
+ for {
+ select {
+ case ap := <-s.r.apply():
+ f := schedule.NewJob("server_applyAll", func(context.Context) { s.applyAll(&ep, &ap) })
+ sched.Schedule(f)
+ case leases := <-expiredLeaseC:
+ s.revokeExpiredLeases(leases)
+ case err := <-s.errorc:
+ lg.Warn("server error", zap.Error(err))
+ lg.Warn("data-dir used by this member must be removed")
+ return
+ case <-s.stop:
+ return
+ }
+ }
+}
+
+func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) {
+ s.GoAttach(func() {
+ // We shouldn't revoke any leases if current member isn't a leader,
+ // because the operation should only be performed by the leader. When
+ // the leader gets blocked on the raft loop, such as writing WAL entries,
+ // it can't process any events or messages from raft. It may think it
+ // is still the leader even the leader has already changed.
+ // Refer to https://github.com/etcd-io/etcd/issues/15247
+ lg := s.Logger()
+ if !s.ensureLeadership() {
+ lg.Warn("Ignore the lease revoking request because current member isn't a leader",
+ zap.Uint64("local-member-id", uint64(s.MemberID())))
+ return
+ }
+
+ // Increases throughput of expired leases deletion process through parallelization
+ c := make(chan struct{}, maxPendingRevokes)
+ for _, curLease := range leases {
+ select {
+ case c <- struct{}{}:
+ case <-s.stopping:
+ return
+ }
+
+ f := func(lid int64) {
+ s.GoAttach(func() {
+ ctx := s.authStore.WithRoot(s.ctx)
+ _, lerr := s.LeaseRevoke(ctx, &pb.LeaseRevokeRequest{ID: lid})
+ if lerr == nil {
+ leaseExpired.Inc()
+ } else {
+ lg.Warn(
+ "failed to revoke lease",
+ zap.String("lease-id", fmt.Sprintf("%016x", lid)),
+ zap.Error(lerr),
+ )
+ }
+
+ <-c
+ })
+ }
+
+ f(int64(curLease.ID))
+ }
+ })
+}
+
+// isActive checks if the etcd instance is still actively processing the
+// heartbeat message (ticks). It returns false if no heartbeat has been
+// received within 3 * tickMs.
+func (s *EtcdServer) isActive() bool {
+ latestTickTs := s.r.getLatestTickTs()
+ threshold := 3 * time.Duration(s.Cfg.TickMs) * time.Millisecond
+ return latestTickTs.Add(threshold).After(time.Now())
+}
+
+// ensureLeadership checks whether current member is still the leader.
+func (s *EtcdServer) ensureLeadership() bool {
+ lg := s.Logger()
+
+ if s.isActive() {
+ lg.Debug("The member is active, skip checking leadership",
+ zap.Time("latestTickTs", s.r.getLatestTickTs()),
+ zap.Time("now", time.Now()))
+ return true
+ }
+
+ ctx, cancel := context.WithTimeout(s.ctx, s.Cfg.ReqTimeout())
+ defer cancel()
+ if err := s.linearizableReadNotify(ctx); err != nil {
+ lg.Warn("Failed to check current member's leadership",
+ zap.Error(err))
+ return false
+ }
+
+ newLeaderID := s.raftStatus().Lead
+ if newLeaderID != uint64(s.MemberID()) {
+ lg.Warn("Current member isn't a leader",
+ zap.Uint64("local-member-id", uint64(s.MemberID())),
+ zap.Uint64("new-lead", newLeaderID))
+ return false
+ }
+
+ return true
+}
+
+// Cleanup removes allocated objects by EtcdServer.NewServer in
+// situation that EtcdServer::Start was not called (that takes care of cleanup).
+func (s *EtcdServer) Cleanup() {
+ // kv, lessor and backend can be nil if running without v3 enabled
+ // or running unit tests.
+ if s.lessor != nil {
+ s.lessor.Stop()
+ }
+ if s.kv != nil {
+ s.kv.Close()
+ }
+ if s.authStore != nil {
+ s.authStore.Close()
+ }
+ if s.be != nil {
+ s.be.Close()
+ }
+ if s.compactor != nil {
+ s.compactor.Stop()
+ }
+}
+
+func (s *EtcdServer) Defragment() error {
+ s.bemu.Lock()
+ defer s.bemu.Unlock()
+ return s.be.Defrag()
+}
+
+func (s *EtcdServer) applyAll(ep *etcdProgress, apply *toApply) {
+ s.applySnapshot(ep, apply)
+ s.applyEntries(ep, apply)
+ backend.VerifyBackendConsistency(s.Backend(), s.Logger(), true, schema.AllBuckets...)
+
+ proposalsApplied.Set(float64(ep.appliedi))
+ s.applyWait.Trigger(ep.appliedi)
+
+ // wait for the raft routine to finish the disk writes before triggering a
+ // snapshot. or applied index might be greater than the last index in raft
+ // storage, since the raft routine might be slower than toApply routine.
+ <-apply.notifyc
+
+ s.snapshotIfNeededAndCompactRaftLog(ep)
+ select {
+ // snapshot requested via send()
+ case m := <-s.r.msgSnapC:
+ merged := s.createMergedSnapshotMessage(m, ep.appliedt, ep.appliedi, ep.confState)
+ s.sendMergedSnap(merged)
+ default:
+ }
+}
+
+func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
+ if raft.IsEmptySnap(toApply.snapshot) {
+ return
+ }
+ applySnapshotInProgress.Inc()
+
+ lg := s.Logger()
+ lg.Info(
+ "applying snapshot",
+ zap.Uint64("current-snapshot-index", ep.diskSnapshotIndex),
+ zap.Uint64("current-applied-index", ep.appliedi),
+ zap.Uint64("incoming-leader-snapshot-index", toApply.snapshot.Metadata.Index),
+ zap.Uint64("incoming-leader-snapshot-term", toApply.snapshot.Metadata.Term),
+ )
+ defer func() {
+ lg.Info(
+ "applied snapshot",
+ zap.Uint64("current-snapshot-index", ep.diskSnapshotIndex),
+ zap.Uint64("current-applied-index", ep.appliedi),
+ zap.Uint64("incoming-leader-snapshot-index", toApply.snapshot.Metadata.Index),
+ zap.Uint64("incoming-leader-snapshot-term", toApply.snapshot.Metadata.Term),
+ )
+ applySnapshotInProgress.Dec()
+ }()
+
+ if toApply.snapshot.Metadata.Index <= ep.appliedi {
+ lg.Panic(
+ "unexpected leader snapshot from outdated index",
+ zap.Uint64("current-snapshot-index", ep.diskSnapshotIndex),
+ zap.Uint64("current-applied-index", ep.appliedi),
+ zap.Uint64("incoming-leader-snapshot-index", toApply.snapshot.Metadata.Index),
+ zap.Uint64("incoming-leader-snapshot-term", toApply.snapshot.Metadata.Term),
+ )
+ }
+
+ // wait for raftNode to persist snapshot onto the disk
+ <-toApply.notifyc
+
+ bemuUnlocked := false
+ s.bemu.Lock()
+ defer func() {
+ if !bemuUnlocked {
+ s.bemu.Unlock()
+ }
+ }()
+
+ // gofail: var applyBeforeOpenSnapshot struct{}
+ newbe, err := serverstorage.OpenSnapshotBackend(s.Cfg, s.snapshotter, toApply.snapshot, s.beHooks)
+ if err != nil {
+ lg.Panic("failed to open snapshot backend", zap.Error(err))
+ }
+ lg.Info("applySnapshot: opened snapshot backend")
+ // gofail: var applyAfterOpenSnapshot struct{}
+
+ // We need to set the backend to consistIndex before recovering the lessor,
+ // because lessor.Recover will commit the boltDB transaction, accordingly it
+ // will get the old consistent_index persisted into the db in OnPreCommitUnsafe.
+ // Eventually the new consistent_index value coming from snapshot is overwritten
+ // by the old value.
+ s.consistIndex.SetBackend(newbe)
+ verifySnapshotIndex(toApply.snapshot, s.consistIndex.ConsistentIndex())
+
+ // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
+ // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
+ if s.lessor != nil {
+ lg.Info("restoring lease store")
+
+ s.lessor.Recover(newbe, func() lease.TxnDelete { return s.kv.Write(traceutil.TODO()) })
+
+ lg.Info("restored lease store")
+ }
+
+ lg.Info("restoring mvcc store")
+
+ if err := s.kv.Restore(newbe); err != nil {
+ lg.Panic("failed to restore mvcc store", zap.Error(err))
+ }
+
+ newbe.SetTxPostLockInsideApplyHook(s.getTxPostLockInsideApplyHook())
+
+ lg.Info("restored mvcc store", zap.Uint64("consistent-index", s.consistIndex.ConsistentIndex()))
+
+ oldbe := s.be
+ s.be = newbe
+ s.bemu.Unlock()
+ bemuUnlocked = true
+
+ // Closing old backend might block until all the txns
+ // on the backend are finished.
+ // We do not want to wait on closing the old backend.
+ go func() {
+ lg.Info("closing old backend file")
+ defer func() {
+ lg.Info("closed old backend file")
+ }()
+ if err := oldbe.Close(); err != nil {
+ lg.Panic("failed to close old backend", zap.Error(err))
+ }
+ }()
+
+ lg.Info("restoring alarm store")
+
+ if err := s.restoreAlarms(); err != nil {
+ lg.Panic("failed to restore alarm store", zap.Error(err))
+ }
+
+ lg.Info("restored alarm store")
+
+ if s.authStore != nil {
+ lg.Info("restoring auth store")
+
+ s.authStore.Recover(schema.NewAuthBackend(lg, newbe))
+
+ lg.Info("restored auth store")
+ }
+
+ lg.Info("restoring v2 store")
+ if err := s.v2store.Recovery(toApply.snapshot.Data); err != nil {
+ lg.Panic("failed to restore v2 store", zap.Error(err))
+ }
+
+ if err := serverstorage.AssertNoV2StoreContent(lg, s.v2store, s.Cfg.V2Deprecation); err != nil {
+ lg.Panic("illegal v2store content", zap.Error(err))
+ }
+
+ lg.Info("restored v2 store")
+
+ s.cluster.SetBackend(schema.NewMembershipBackend(lg, newbe))
+
+ lg.Info("restoring cluster configuration")
+
+ s.cluster.Recover(api.UpdateCapability)
+
+ lg.Info("restored cluster configuration")
+ lg.Info("removing old peers from network")
+
+ // recover raft transport
+ s.r.transport.RemoveAllPeers()
+
+ lg.Info("removed old peers from network")
+ lg.Info("adding peers from new cluster configuration")
+
+ for _, m := range s.cluster.Members() {
+ if m.ID == s.MemberID() {
+ continue
+ }
+ s.r.transport.AddPeer(m.ID, m.PeerURLs)
+ }
+
+ lg.Info("added peers from new cluster configuration")
+
+ ep.appliedt = toApply.snapshot.Metadata.Term
+ ep.appliedi = toApply.snapshot.Metadata.Index
+ ep.diskSnapshotIndex = ep.appliedi
+ ep.memorySnapshotIndex = ep.appliedi
+ ep.confState = toApply.snapshot.Metadata.ConfState
+
+ // As backends and implementations like alarmsStore changed, we need
+ // to re-bootstrap Appliers.
+ s.uberApply = s.NewUberApplier()
+}
+
+func (s *EtcdServer) NewUberApplier() apply.UberApplier {
+ return apply.NewUberApplier(s.lg, s.be, s.KV(), s.alarmStore, s.authStore, s.lessor, s.cluster, s, s, s.consistIndex,
+ s.Cfg.WarningApplyDuration, s.Cfg.ServerFeatureGate.Enabled(features.TxnModeWriteWithSharedBuffer), s.Cfg.QuotaBackendBytes)
+}
+
+func verifySnapshotIndex(snapshot raftpb.Snapshot, cindex uint64) {
+ verify.Verify(func() {
+ if cindex != snapshot.Metadata.Index {
+ panic(fmt.Sprintf("consistent_index(%d) isn't equal to snapshot index (%d)", cindex, snapshot.Metadata.Index))
+ }
+ })
+}
+
+func verifyConsistentIndexIsLatest(lg *zap.Logger, snapshot raftpb.Snapshot, cindex uint64) {
+ verify.Verify(func() {
+ if cindex < snapshot.Metadata.Index {
+ lg.Panic(fmt.Sprintf("consistent_index(%d) is older than snapshot index (%d)", cindex, snapshot.Metadata.Index))
+ }
+ })
+}
+
+func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *toApply) {
+ if len(apply.entries) == 0 {
+ return
+ }
+ firsti := apply.entries[0].Index
+ if firsti > ep.appliedi+1 {
+ lg := s.Logger()
+ lg.Panic(
+ "unexpected committed entry index",
+ zap.Uint64("current-applied-index", ep.appliedi),
+ zap.Uint64("first-committed-entry-index", firsti),
+ )
+ }
+ var ents []raftpb.Entry
+ if ep.appliedi+1-firsti < uint64(len(apply.entries)) {
+ ents = apply.entries[ep.appliedi+1-firsti:]
+ }
+ if len(ents) == 0 {
+ return
+ }
+ var shouldstop bool
+ if ep.appliedt, ep.appliedi, shouldstop = s.apply(ents, &ep.confState, apply.raftAdvancedC); shouldstop {
+ go s.stopWithDelay(10*100*time.Millisecond, fmt.Errorf("the member has been permanently removed from the cluster"))
+ }
+}
+
+func (s *EtcdServer) ForceSnapshot() {
+ s.forceDiskSnapshot = true
+}
+
+func (s *EtcdServer) snapshotIfNeededAndCompactRaftLog(ep *etcdProgress) {
+ // TODO: Remove disk snapshot in v3.7
+ shouldSnapshotToDisk := s.shouldSnapshotToDisk(ep)
+ shouldSnapshotToMemory := s.shouldSnapshotToMemory(ep)
+ if !shouldSnapshotToDisk && !shouldSnapshotToMemory {
+ return
+ }
+ s.snapshot(ep, shouldSnapshotToDisk)
+ s.compactRaftLog(ep.appliedi)
+}
+
+func (s *EtcdServer) shouldSnapshotToDisk(ep *etcdProgress) bool {
+ return (s.forceDiskSnapshot && ep.appliedi != ep.diskSnapshotIndex) || (ep.appliedi-ep.diskSnapshotIndex > s.Cfg.SnapshotCount)
+}
+
+func (s *EtcdServer) shouldSnapshotToMemory(ep *etcdProgress) bool {
+ return ep.appliedi > ep.memorySnapshotIndex+memorySnapshotCount
+}
+
+func (s *EtcdServer) hasMultipleVotingMembers() bool {
+ return s.cluster != nil && len(s.cluster.VotingMemberIDs()) > 1
+}
+
+func (s *EtcdServer) isLeader() bool {
+ return uint64(s.MemberID()) == s.Lead()
+}
+
+// MoveLeader transfers the leader to the given transferee.
+func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
+ member := s.cluster.Member(types.ID(transferee))
+ if member == nil || member.IsLearner {
+ return errors.ErrBadLeaderTransferee
+ }
+
+ now := time.Now()
+ interval := time.Duration(s.Cfg.TickMs) * time.Millisecond
+
+ lg := s.Logger()
+ lg.Info(
+ "leadership transfer starting",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("current-leader-member-id", types.ID(lead).String()),
+ zap.String("transferee-member-id", types.ID(transferee).String()),
+ )
+
+ s.r.TransferLeadership(ctx, lead, transferee)
+ for s.Lead() != transferee {
+ select {
+ case <-ctx.Done(): // time out
+ return errors.ErrTimeoutLeaderTransfer
+ case <-time.After(interval):
+ }
+ }
+
+ // TODO: drain all requests, or drop all messages to the old leader
+ lg.Info(
+ "leadership transfer finished",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("old-leader-member-id", types.ID(lead).String()),
+ zap.String("new-leader-member-id", types.ID(transferee).String()),
+ zap.Duration("took", time.Since(now)),
+ )
+ return nil
+}
+
+// TryTransferLeadershipOnShutdown transfers the leader to the chosen transferee. It is only used in server graceful shutdown.
+func (s *EtcdServer) TryTransferLeadershipOnShutdown() error {
+ lg := s.Logger()
+ if !s.isLeader() {
+ lg.Info(
+ "skipped leadership transfer; local server is not leader",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("current-leader-member-id", types.ID(s.Lead()).String()),
+ )
+ return nil
+ }
+
+ if !s.hasMultipleVotingMembers() {
+ lg.Info(
+ "skipped leadership transfer for single voting member cluster",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("current-leader-member-id", types.ID(s.Lead()).String()),
+ )
+ return nil
+ }
+
+ transferee, ok := longestConnected(s.r.transport, s.cluster.VotingMemberIDs())
+ if !ok {
+ return errors.ErrUnhealthy
+ }
+
+ tm := s.Cfg.ReqTimeout()
+ ctx, cancel := context.WithTimeout(s.ctx, tm)
+ err := s.MoveLeader(ctx, s.Lead(), uint64(transferee))
+ cancel()
+ return err
+}
+
+// HardStop stops the server without coordination with other members in the cluster.
+func (s *EtcdServer) HardStop() {
+ select {
+ case s.stop <- struct{}{}:
+ case <-s.done:
+ return
+ }
+ <-s.done
+}
+
+// Stop stops the server gracefully, and shuts down the running goroutine.
+// Stop should be called after a Start(s), otherwise it will block forever.
+// When stopping leader, Stop transfers its leadership to one of its peers
+// before stopping the server.
+// Stop terminates the Server and performs any necessary finalization.
+// Do and Process cannot be called after Stop has been invoked.
+func (s *EtcdServer) Stop() {
+ lg := s.Logger()
+ if err := s.TryTransferLeadershipOnShutdown(); err != nil {
+ lg.Warn("leadership transfer failed", zap.String("local-member-id", s.MemberID().String()), zap.Error(err))
+ }
+ s.HardStop()
+}
+
+// ReadyNotify returns a channel that will be closed when the server
+// is ready to serve client requests
+func (s *EtcdServer) ReadyNotify() <-chan struct{} { return s.readych }
+
+func (s *EtcdServer) stopWithDelay(d time.Duration, err error) {
+ select {
+ case <-time.After(d):
+ case <-s.done:
+ }
+ select {
+ case s.errorc <- err:
+ default:
+ }
+}
+
+// StopNotify returns a channel that receives an empty struct
+// when the server is stopped.
+func (s *EtcdServer) StopNotify() <-chan struct{} { return s.done }
+
+// StoppingNotify returns a channel that receives an empty struct
+// when the server is being stopped.
+func (s *EtcdServer) StoppingNotify() <-chan struct{} { return s.stopping }
+
+func (s *EtcdServer) checkMembershipOperationPermission(ctx context.Context) error {
+ if s.authStore == nil {
+ // In the context of ordinary etcd process, s.authStore will never be nil.
+ // This branch is for handling cases in server_test.go
+ return nil
+ }
+
+ // Note that this permission check is done in the API layer,
+ // so TOCTOU problem can be caused potentially in a schedule like this:
+ // update membership with user A -> revoke root role of A -> toApply membership change
+ // in the state machine layer
+ // However, both of membership change and role management requires the root privilege.
+ // So careful operation by admins can prevent the problem.
+ authInfo, err := s.AuthInfoFromCtx(ctx)
+ if err != nil {
+ return err
+ }
+
+ return s.AuthStore().IsAdminPermitted(authInfo)
+}
+
+func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) ([]*membership.Member, error) {
+ if err := s.checkMembershipOperationPermission(ctx); err != nil {
+ return nil, err
+ }
+
+ // TODO: move Member to protobuf type
+ b, err := json.Marshal(memb)
+ if err != nil {
+ return nil, err
+ }
+
+ // by default StrictReconfigCheck is enabled; reject new members if unhealthy.
+ if err := s.mayAddMember(memb); err != nil {
+ return nil, err
+ }
+
+ cc := raftpb.ConfChange{
+ Type: raftpb.ConfChangeAddNode,
+ NodeID: uint64(memb.ID),
+ Context: b,
+ }
+
+ if memb.IsLearner {
+ cc.Type = raftpb.ConfChangeAddLearnerNode
+ }
+
+ return s.configure(ctx, cc)
+}
+
+func (s *EtcdServer) mayAddMember(memb membership.Member) error {
+ lg := s.Logger()
+ if !s.Cfg.StrictReconfigCheck {
+ return nil
+ }
+
+ // protect quorum when adding voting member
+ if !memb.IsLearner && !s.cluster.IsReadyToAddVotingMember() {
+ lg.Warn(
+ "rejecting member add request; not enough healthy members",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
+ zap.Error(errors.ErrNotEnoughStartedMembers),
+ )
+ return errors.ErrNotEnoughStartedMembers
+ }
+
+ if !isConnectedFullySince(s.r.transport, time.Now().Add(-HealthInterval), s.MemberID(), s.cluster.VotingMembers()) {
+ lg.Warn(
+ "rejecting member add request; local member has not been connected to all peers, reconfigure breaks active quorum",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
+ zap.Error(errors.ErrUnhealthy),
+ )
+ return errors.ErrUnhealthy
+ }
+
+ return nil
+}
+
+func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
+ if err := s.checkMembershipOperationPermission(ctx); err != nil {
+ return nil, err
+ }
+
+ // by default StrictReconfigCheck is enabled; reject removal if leads to quorum loss
+ if err := s.mayRemoveMember(types.ID(id)); err != nil {
+ return nil, err
+ }
+
+ cc := raftpb.ConfChange{
+ Type: raftpb.ConfChangeRemoveNode,
+ NodeID: id,
+ }
+ return s.configure(ctx, cc)
+}
+
+// PromoteMember promotes a learner node to a voting node.
+func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
+ // only raft leader has information on whether the to-be-promoted learner node is ready. If promoteMember call
+ // fails with ErrNotLeader, forward the request to leader node via HTTP. If promoteMember call fails with error
+ // other than ErrNotLeader, return the error.
+ resp, err := s.promoteMember(ctx, id)
+ if err == nil {
+ learnerPromoteSucceed.Inc()
+ return resp, nil
+ }
+ if !errorspkg.Is(err, errors.ErrNotLeader) {
+ learnerPromoteFailed.WithLabelValues(err.Error()).Inc()
+ return resp, err
+ }
+
+ cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
+ defer cancel()
+ // forward to leader
+ for cctx.Err() == nil {
+ leader, err := s.waitLeader(cctx)
+ if err != nil {
+ return nil, err
+ }
+ for _, url := range leader.PeerURLs {
+ resp, err := promoteMemberHTTP(cctx, url, id, s.peerRt)
+ if err == nil {
+ return resp, nil
+ }
+ // If member promotion failed, return early. Otherwise keep retry.
+ if errorspkg.Is(err, errors.ErrLearnerNotReady) || errorspkg.Is(err, membership.ErrIDNotFound) || errorspkg.Is(err, membership.ErrMemberNotLearner) {
+ return nil, err
+ }
+ }
+ }
+
+ if errorspkg.Is(cctx.Err(), context.DeadlineExceeded) {
+ return nil, errors.ErrTimeout
+ }
+ return nil, errors.ErrCanceled
+}
+
+// promoteMember checks whether the to-be-promoted learner node is ready before sending the promote
+// request to raft.
+// The function returns ErrNotLeader if the local node is not raft leader (therefore does not have
+// enough information to determine if the learner node is ready), returns ErrLearnerNotReady if the
+// local node is leader (therefore has enough information) but decided the learner node is not ready
+// to be promoted.
+func (s *EtcdServer) promoteMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
+ if err := s.checkMembershipOperationPermission(ctx); err != nil {
+ return nil, err
+ }
+
+ // check if we can promote this learner.
+ if err := s.mayPromoteMember(types.ID(id)); err != nil {
+ return nil, err
+ }
+
+ // build the context for the promote confChange. mark IsLearner to false and IsPromote to true.
+ promoteChangeContext := membership.ConfigChangeContext{
+ Member: membership.Member{
+ ID: types.ID(id),
+ },
+ IsPromote: true,
+ }
+
+ b, err := json.Marshal(promoteChangeContext)
+ if err != nil {
+ return nil, err
+ }
+
+ cc := raftpb.ConfChange{
+ Type: raftpb.ConfChangeAddNode,
+ NodeID: id,
+ Context: b,
+ }
+
+ return s.configure(ctx, cc)
+}
+
+func (s *EtcdServer) mayPromoteMember(id types.ID) error {
+ lg := s.Logger()
+ if err := s.isLearnerReady(lg, uint64(id)); err != nil {
+ return err
+ }
+
+ if !s.Cfg.StrictReconfigCheck {
+ return nil
+ }
+ if !s.cluster.IsReadyToPromoteMember(uint64(id)) {
+ lg.Warn(
+ "rejecting member promote request; not enough healthy members",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("requested-member-remove-id", id.String()),
+ zap.Error(errors.ErrNotEnoughStartedMembers),
+ )
+ return errors.ErrNotEnoughStartedMembers
+ }
+
+ return nil
+}
+
+// check whether the learner catches up with leader or not.
+// Note: it will return nil if member is not found in cluster or if member is not learner.
+// These two conditions will be checked before toApply phase later.
+func (s *EtcdServer) isLearnerReady(lg *zap.Logger, id uint64) error {
+ if err := s.waitAppliedIndex(); err != nil {
+ return err
+ }
+
+ rs := s.raftStatus()
+
+ // leader's raftStatus.Progress is not nil
+ if rs.Progress == nil {
+ return errors.ErrNotLeader
+ }
+
+ var learnerMatch uint64
+ isFound := false
+ leaderID := rs.ID
+ for memberID, progress := range rs.Progress {
+ if id == memberID {
+ // check its status
+ learnerMatch = progress.Match
+ isFound = true
+ break
+ }
+ }
+
+ // We should return an error in API directly, to avoid the request
+ // being unnecessarily delivered to raft.
+ if !isFound {
+ return membership.ErrIDNotFound
+ }
+
+ leaderMatch := rs.Progress[leaderID].Match
+
+ learnerReadyPercent := float64(learnerMatch) / float64(leaderMatch)
+
+ // the learner's Match not caught up with leader yet
+ if learnerReadyPercent < readyPercentThreshold {
+ lg.Error(
+ "rejecting promote learner: learner is not ready",
+ zap.Float64("learner-ready-percent", learnerReadyPercent),
+ zap.Float64("ready-percent-threshold", readyPercentThreshold),
+ )
+ return errors.ErrLearnerNotReady
+ }
+
+ return nil
+}
+
+func (s *EtcdServer) mayRemoveMember(id types.ID) error {
+ if !s.Cfg.StrictReconfigCheck {
+ return nil
+ }
+
+ lg := s.Logger()
+ member := s.cluster.Member(id)
+ // no need to check quorum when removing non-voting member
+ if member != nil && member.IsLearner {
+ return nil
+ }
+
+ if !s.cluster.IsReadyToRemoveVotingMember(uint64(id)) {
+ lg.Warn(
+ "rejecting member remove request; not enough healthy members",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("requested-member-remove-id", id.String()),
+ zap.Error(errors.ErrNotEnoughStartedMembers),
+ )
+ return errors.ErrNotEnoughStartedMembers
+ }
+
+ // downed member is safe to remove since it's not part of the active quorum
+ if t := s.r.transport.ActiveSince(id); id != s.MemberID() && t.IsZero() {
+ return nil
+ }
+
+ // protect quorum if some members are down
+ m := s.cluster.VotingMembers()
+ active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.MemberID(), m)
+ if (active - 1) < 1+((len(m)-1)/2) {
+ lg.Warn(
+ "rejecting member remove request; local member has not been connected to all peers, reconfigure breaks active quorum",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("requested-member-remove", id.String()),
+ zap.Int("active-peers", active),
+ zap.Error(errors.ErrUnhealthy),
+ )
+ return errors.ErrUnhealthy
+ }
+
+ return nil
+}
+
+func (s *EtcdServer) UpdateMember(ctx context.Context, memb membership.Member) ([]*membership.Member, error) {
+ b, merr := json.Marshal(memb)
+ if merr != nil {
+ return nil, merr
+ }
+
+ if err := s.checkMembershipOperationPermission(ctx); err != nil {
+ return nil, err
+ }
+ cc := raftpb.ConfChange{
+ Type: raftpb.ConfChangeUpdateNode,
+ NodeID: uint64(memb.ID),
+ Context: b,
+ }
+ return s.configure(ctx, cc)
+}
+
+func (s *EtcdServer) setCommittedIndex(v uint64) {
+ atomic.StoreUint64(&s.committedIndex, v)
+}
+
+func (s *EtcdServer) getCommittedIndex() uint64 {
+ return atomic.LoadUint64(&s.committedIndex)
+}
+
+func (s *EtcdServer) setAppliedIndex(v uint64) {
+ atomic.StoreUint64(&s.appliedIndex, v)
+}
+
+func (s *EtcdServer) getAppliedIndex() uint64 {
+ return atomic.LoadUint64(&s.appliedIndex)
+}
+
+func (s *EtcdServer) setTerm(v uint64) {
+ atomic.StoreUint64(&s.term, v)
+}
+
+func (s *EtcdServer) getTerm() uint64 {
+ return atomic.LoadUint64(&s.term)
+}
+
+func (s *EtcdServer) setLead(v uint64) {
+ atomic.StoreUint64(&s.lead, v)
+}
+
+func (s *EtcdServer) getLead() uint64 {
+ return atomic.LoadUint64(&s.lead)
+}
+
+func (s *EtcdServer) LeaderChangedNotify() <-chan struct{} {
+ return s.leaderChanged.Receive()
+}
+
+// FirstCommitInTermNotify returns channel that will be unlocked on first
+// entry committed in new term, which is necessary for new leader to answer
+// read-only requests (leader is not able to respond any read-only requests
+// as long as linearizable semantic is required)
+func (s *EtcdServer) FirstCommitInTermNotify() <-chan struct{} {
+ return s.firstCommitInTerm.Receive()
+}
+
+// MemberId returns the ID of the local member.
+// Deprecated: Please use (*EtcdServer) MemberID instead.
+//
+//revive:disable:var-naming
+func (s *EtcdServer) MemberId() types.ID { return s.MemberID() }
+
+//revive:enable:var-naming
+
+func (s *EtcdServer) MemberID() types.ID { return s.memberID }
+
+func (s *EtcdServer) Leader() types.ID { return types.ID(s.getLead()) }
+
+func (s *EtcdServer) Lead() uint64 { return s.getLead() }
+
+func (s *EtcdServer) CommittedIndex() uint64 { return s.getCommittedIndex() }
+
+func (s *EtcdServer) AppliedIndex() uint64 { return s.getAppliedIndex() }
+
+func (s *EtcdServer) Term() uint64 { return s.getTerm() }
+
+type confChangeResponse struct {
+ membs []*membership.Member
+ raftAdvanceC <-chan struct{}
+ err error
+}
+
+// configure sends a configuration change through consensus and
+// then waits for it to be applied to the server. It
+// will block until the change is performed or there is an error.
+func (s *EtcdServer) configure(ctx context.Context, cc raftpb.ConfChange) ([]*membership.Member, error) {
+ lg := s.Logger()
+ cc.ID = s.reqIDGen.Next()
+ ch := s.w.Register(cc.ID)
+
+ start := time.Now()
+ if err := s.r.ProposeConfChange(ctx, cc); err != nil {
+ s.w.Trigger(cc.ID, nil)
+ return nil, err
+ }
+
+ select {
+ case x := <-ch:
+ if x == nil {
+ lg.Panic("failed to configure")
+ }
+ resp := x.(*confChangeResponse)
+ // etcdserver need to ensure the raft has already been notified
+ // or advanced before it responds to the client. Otherwise, the
+ // following config change request may be rejected.
+ // See https://github.com/etcd-io/etcd/issues/15528.
+ <-resp.raftAdvanceC
+ lg.Info(
+ "applied a configuration change through raft",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("raft-conf-change", cc.Type.String()),
+ zap.String("raft-conf-change-node-id", types.ID(cc.NodeID).String()),
+ )
+ return resp.membs, resp.err
+
+ case <-ctx.Done():
+ s.w.Trigger(cc.ID, nil) // GC wait
+ return nil, s.parseProposeCtxErr(ctx.Err(), start)
+
+ case <-s.stopping:
+ return nil, errors.ErrStopped
+ }
+}
+
+// publishV3 registers server information into the cluster using v3 request. The
+// information is the JSON representation of this server's member struct, updated
+// with the static clientURLs of the server.
+// The function keeps attempting to register until it succeeds,
+// or its server is stopped.
+func (s *EtcdServer) publishV3(timeout time.Duration) {
+ req := &membershippb.ClusterMemberAttrSetRequest{
+ Member_ID: uint64(s.MemberID()),
+ MemberAttributes: &membershippb.Attributes{
+ Name: s.attributes.Name,
+ ClientUrls: s.attributes.ClientURLs,
+ },
+ }
+ // gofail: var beforePublishing struct{}
+ lg := s.Logger()
+ for {
+ select {
+ case <-s.stopping:
+ lg.Warn(
+ "stopped publish because server is stopping",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("local-member-attributes", fmt.Sprintf("%+v", s.attributes)),
+ zap.Duration("publish-timeout", timeout),
+ )
+ return
+
+ default:
+ }
+
+ ctx, cancel := context.WithTimeout(s.ctx, timeout)
+ _, err := s.raftRequest(ctx, pb.InternalRaftRequest{ClusterMemberAttrSet: req})
+ cancel()
+ switch err {
+ case nil:
+ close(s.readych)
+ lg.Info(
+ "published local member to cluster through raft",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("local-member-attributes", fmt.Sprintf("%+v", s.attributes)),
+ zap.String("cluster-id", s.cluster.ID().String()),
+ zap.Duration("publish-timeout", timeout),
+ )
+ return
+
+ default:
+ lg.Warn(
+ "failed to publish local member to cluster through raft",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.String("local-member-attributes", fmt.Sprintf("%+v", s.attributes)),
+ zap.Duration("publish-timeout", timeout),
+ zap.Error(err),
+ )
+ }
+ }
+}
+
+func (s *EtcdServer) sendMergedSnap(merged snap.Message) {
+ atomic.AddInt64(&s.inflightSnapshots, 1)
+
+ lg := s.Logger()
+ fields := []zap.Field{
+ zap.String("from", s.MemberID().String()),
+ zap.String("to", types.ID(merged.To).String()),
+ zap.Int64("bytes", merged.TotalSize),
+ zap.String("size", humanize.Bytes(uint64(merged.TotalSize))),
+ }
+
+ now := time.Now()
+ s.r.transport.SendSnapshot(merged)
+ lg.Info("sending merged snapshot", fields...)
+
+ s.GoAttach(func() {
+ select {
+ case ok := <-merged.CloseNotify():
+ // delay releasing inflight snapshot for another 30 seconds to
+ // block log compaction.
+ // If the follower still fails to catch up, it is probably just too slow
+ // to catch up. We cannot avoid the snapshot cycle anyway.
+ if ok {
+ select {
+ case <-time.After(releaseDelayAfterSnapshot):
+ case <-s.stopping:
+ }
+ }
+
+ atomic.AddInt64(&s.inflightSnapshots, -1)
+
+ lg.Info("sent merged snapshot", append(fields, zap.Duration("took", time.Since(now)))...)
+
+ case <-s.stopping:
+ lg.Warn("canceled sending merged snapshot; server stopping", fields...)
+ return
+ }
+ })
+}
+
+// toApply takes entries received from Raft (after it has been committed) and
+// applies them to the current state of the EtcdServer.
+// The given entries should not be empty.
+func (s *EtcdServer) apply(
+ es []raftpb.Entry,
+ confState *raftpb.ConfState,
+ raftAdvancedC <-chan struct{},
+) (appliedt uint64, appliedi uint64, shouldStop bool) {
+ s.lg.Debug("Applying entries", zap.Int("num-entries", len(es)))
+ for i := range es {
+ e := es[i]
+ index := s.consistIndex.ConsistentIndex()
+ s.lg.Debug("Applying entry",
+ zap.Uint64("consistent-index", index),
+ zap.Uint64("entry-index", e.Index),
+ zap.Uint64("entry-term", e.Term),
+ zap.Stringer("entry-type", e.Type))
+
+ // We need to toApply all WAL entries on top of v2store
+ // and only 'unapplied' (e.Index>backend.ConsistentIndex) on the backend.
+ shouldApplyV3 := membership.ApplyV2storeOnly
+ if e.Index > index {
+ shouldApplyV3 = membership.ApplyBoth
+ // set the consistent index of current executing entry
+ s.consistIndex.SetConsistentApplyingIndex(e.Index, e.Term)
+ }
+ switch e.Type {
+ case raftpb.EntryNormal:
+ // gofail: var beforeApplyOneEntryNormal struct{}
+ s.applyEntryNormal(&e, shouldApplyV3)
+ s.setAppliedIndex(e.Index)
+ s.setTerm(e.Term)
+
+ case raftpb.EntryConfChange:
+ // gofail: var beforeApplyOneConfChange struct{}
+ var cc raftpb.ConfChange
+ pbutil.MustUnmarshal(&cc, e.Data)
+ removedSelf, err := s.applyConfChange(cc, confState, shouldApplyV3)
+ s.setAppliedIndex(e.Index)
+ s.setTerm(e.Term)
+ shouldStop = shouldStop || removedSelf
+ s.w.Trigger(cc.ID, &confChangeResponse{s.cluster.Members(), raftAdvancedC, err})
+
+ default:
+ lg := s.Logger()
+ lg.Panic(
+ "unknown entry type; must be either EntryNormal or EntryConfChange",
+ zap.String("type", e.Type.String()),
+ )
+ }
+ appliedi, appliedt = e.Index, e.Term
+ }
+ return appliedt, appliedi, shouldStop
+}
+
+// applyEntryNormal applies an EntryNormal type raftpb request to the EtcdServer
+func (s *EtcdServer) applyEntryNormal(e *raftpb.Entry, shouldApplyV3 membership.ShouldApplyV3) {
+ var ar *apply.Result
+ if shouldApplyV3 {
+ defer func() {
+ // The txPostLockInsideApplyHook will not get called in some cases,
+ // in which we should move the consistent index forward directly.
+ newIndex := s.consistIndex.ConsistentIndex()
+ if newIndex < e.Index {
+ s.consistIndex.SetConsistentIndex(e.Index, e.Term)
+ }
+ }()
+ }
+
+ // raft state machine may generate noop entry when leader confirmation.
+ // skip it in advance to avoid some potential bug in the future
+ if len(e.Data) == 0 {
+ s.firstCommitInTerm.Notify()
+
+ // promote lessor when the local member is leader and finished
+ // applying all entries from the last term.
+ if s.isLeader() {
+ s.lessor.Promote(s.Cfg.ElectionTimeout())
+ }
+ return
+ }
+
+ var raftReq pb.InternalRaftRequest
+ if !pbutil.MaybeUnmarshal(&raftReq, e.Data) { // backward compatible
+ var r pb.Request
+ rp := &r
+ pbutil.MustUnmarshal(rp, e.Data)
+ s.lg.Debug("applyEntryNormal", zap.Stringer("V2request", rp))
+ raftReq = v2ToV3Request(s.lg, (*RequestV2)(rp))
+ }
+ s.lg.Debug("applyEntryNormal", zap.Stringer("raftReq", &raftReq))
+
+ if raftReq.V2 != nil {
+ req := (*RequestV2)(raftReq.V2)
+ raftReq = v2ToV3Request(s.lg, req)
+ }
+
+ id := raftReq.ID
+ if id == 0 {
+ if raftReq.Header == nil {
+ s.lg.Panic("applyEntryNormal, could not find a header")
+ }
+ id = raftReq.Header.ID
+ }
+
+ needResult := s.w.IsRegistered(id)
+ if needResult || !noSideEffect(&raftReq) {
+ if !needResult && raftReq.Txn != nil {
+ removeNeedlessRangeReqs(raftReq.Txn)
+ }
+ ar = s.applyInternalRaftRequest(&raftReq, shouldApplyV3)
+ }
+
+ // do not re-toApply applied entries.
+ if !shouldApplyV3 {
+ return
+ }
+
+ if ar == nil {
+ return
+ }
+
+ if !errorspkg.Is(ar.Err, errors.ErrNoSpace) || len(s.alarmStore.Get(pb.AlarmType_NOSPACE)) > 0 {
+ s.w.Trigger(id, ar)
+ return
+ }
+
+ lg := s.Logger()
+ lg.Warn(
+ "message exceeded backend quota; raising alarm",
+ zap.Int64("quota-size-bytes", s.Cfg.QuotaBackendBytes),
+ zap.String("quota-size", humanize.Bytes(uint64(s.Cfg.QuotaBackendBytes))),
+ zap.Error(ar.Err),
+ )
+
+ s.GoAttach(func() {
+ a := &pb.AlarmRequest{
+ MemberID: uint64(s.MemberID()),
+ Action: pb.AlarmRequest_ACTIVATE,
+ Alarm: pb.AlarmType_NOSPACE,
+ }
+ s.raftRequest(s.ctx, pb.InternalRaftRequest{Alarm: a})
+ s.w.Trigger(id, ar)
+ })
+}
+
+func (s *EtcdServer) applyInternalRaftRequest(r *pb.InternalRaftRequest, shouldApplyV3 membership.ShouldApplyV3) *apply.Result {
+ if r.ClusterVersionSet == nil && r.ClusterMemberAttrSet == nil && r.DowngradeInfoSet == nil && r.DowngradeVersionTest == nil {
+ if !shouldApplyV3 {
+ return nil
+ }
+ return s.uberApply.Apply(r)
+ }
+ membershipApplier := apply.NewApplierMembership(s.lg, s.cluster, s)
+ op := "unknown"
+ defer func(start time.Time) {
+ txn.ApplySecObserve("v3", op, true, time.Since(start))
+ txn.WarnOfExpensiveRequest(s.lg, s.Cfg.WarningApplyDuration, start, &pb.InternalRaftStringer{Request: r}, nil, nil)
+ }(time.Now())
+ switch {
+ case r.ClusterVersionSet != nil:
+ op = "ClusterVersionSet" // Implemented in 3.5.x
+ membershipApplier.ClusterVersionSet(r.ClusterVersionSet, shouldApplyV3)
+ return &apply.Result{}
+ case r.ClusterMemberAttrSet != nil:
+ op = "ClusterMemberAttrSet" // Implemented in 3.5.x
+ membershipApplier.ClusterMemberAttrSet(r.ClusterMemberAttrSet, shouldApplyV3)
+ case r.DowngradeInfoSet != nil:
+ op = "DowngradeInfoSet" // Implemented in 3.5.x
+ membershipApplier.DowngradeInfoSet(r.DowngradeInfoSet, shouldApplyV3)
+ case r.DowngradeVersionTest != nil:
+ op = "DowngradeVersionTest" // Implemented in 3.6 for test only
+ // do nothing, we are just to ensure etcdserver don't panic in case
+ // users(test cases) intentionally inject DowngradeVersionTestRequest
+ // into the WAL files.
+ default:
+ s.lg.Panic("not implemented apply", zap.Stringer("raft-request", r))
+ return nil
+ }
+ return &apply.Result{}
+}
+
+func noSideEffect(r *pb.InternalRaftRequest) bool {
+ return r.Range != nil || r.AuthUserGet != nil || r.AuthRoleGet != nil || r.AuthStatus != nil
+}
+
+func removeNeedlessRangeReqs(txn *pb.TxnRequest) {
+ f := func(ops []*pb.RequestOp) []*pb.RequestOp {
+ j := 0
+ for i := 0; i < len(ops); i++ {
+ if _, ok := ops[i].Request.(*pb.RequestOp_RequestRange); ok {
+ continue
+ }
+ ops[j] = ops[i]
+ j++
+ }
+
+ return ops[:j]
+ }
+
+ txn.Success = f(txn.Success)
+ txn.Failure = f(txn.Failure)
+}
+
+// applyConfChange applies a ConfChange to the server. It is only
+// invoked with a ConfChange that has already passed through Raft
+func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.ConfState, shouldApplyV3 membership.ShouldApplyV3) (bool, error) {
+ lg := s.Logger()
+ if err := s.cluster.ValidateConfigurationChange(cc, shouldApplyV3); err != nil {
+ lg.Error("Validation on configuration change failed", zap.Bool("shouldApplyV3", bool(shouldApplyV3)), zap.Error(err))
+ cc.NodeID = raft.None
+ s.r.ApplyConfChange(cc)
+
+ // The txPostLock callback will not get called in this case,
+ // so we should set the consistent index directly.
+ if s.consistIndex != nil && membership.ApplyBoth == shouldApplyV3 {
+ applyingIndex, applyingTerm := s.consistIndex.ConsistentApplyingIndex()
+ s.consistIndex.SetConsistentIndex(applyingIndex, applyingTerm)
+ }
+ return false, err
+ }
+
+ *confState = *s.r.ApplyConfChange(cc)
+ s.beHooks.SetConfState(confState)
+ switch cc.Type {
+ case raftpb.ConfChangeAddNode, raftpb.ConfChangeAddLearnerNode:
+ confChangeContext := new(membership.ConfigChangeContext)
+ if err := json.Unmarshal(cc.Context, confChangeContext); err != nil {
+ lg.Panic("failed to unmarshal member", zap.Error(err))
+ }
+ if cc.NodeID != uint64(confChangeContext.Member.ID) {
+ lg.Panic(
+ "got different member ID",
+ zap.String("member-id-from-config-change-entry", types.ID(cc.NodeID).String()),
+ zap.String("member-id-from-message", confChangeContext.Member.ID.String()),
+ )
+ }
+ if confChangeContext.IsPromote {
+ s.cluster.PromoteMember(confChangeContext.Member.ID, shouldApplyV3)
+ } else {
+ s.cluster.AddMember(&confChangeContext.Member, shouldApplyV3)
+
+ if confChangeContext.Member.ID != s.MemberID() {
+ s.r.transport.AddPeer(confChangeContext.Member.ID, confChangeContext.PeerURLs)
+ }
+ }
+
+ case raftpb.ConfChangeRemoveNode:
+ id := types.ID(cc.NodeID)
+ s.cluster.RemoveMember(id, shouldApplyV3)
+ if id == s.MemberID() {
+ return true, nil
+ }
+ s.r.transport.RemovePeer(id)
+
+ case raftpb.ConfChangeUpdateNode:
+ m := new(membership.Member)
+ if err := json.Unmarshal(cc.Context, m); err != nil {
+ lg.Panic("failed to unmarshal member", zap.Error(err))
+ }
+ if cc.NodeID != uint64(m.ID) {
+ lg.Panic(
+ "got different member ID",
+ zap.String("member-id-from-config-change-entry", types.ID(cc.NodeID).String()),
+ zap.String("member-id-from-message", m.ID.String()),
+ )
+ }
+ s.cluster.UpdateRaftAttributes(m.ID, m.RaftAttributes, shouldApplyV3)
+ if m.ID != s.MemberID() {
+ s.r.transport.UpdatePeer(m.ID, m.PeerURLs)
+ }
+ }
+
+ verify.Verify(func() {
+ s.verifyV3StoreInSyncWithV2Store(shouldApplyV3)
+ })
+
+ return false, nil
+}
+
+func (s *EtcdServer) verifyV3StoreInSyncWithV2Store(shouldApplyV3 membership.ShouldApplyV3) {
+ // If shouldApplyV3 == false, then it means v2store hasn't caught up with v3store.
+ if !shouldApplyV3 {
+ return
+ }
+
+ // clean up the Attributes, and we only care about the RaftAttributes
+ cleanAttributesFunc := func(members map[types.ID]*membership.Member) map[types.ID]*membership.Member {
+ processedMembers := make(map[types.ID]*membership.Member)
+ for id, m := range members {
+ clonedMember := m.Clone()
+ clonedMember.Attributes = membership.Attributes{}
+ processedMembers[id] = clonedMember
+ }
+
+ return processedMembers
+ }
+
+ v2Members, _ := s.cluster.MembersFromStore()
+ v3Members, _ := s.cluster.MembersFromBackend()
+
+ processedV2Members := cleanAttributesFunc(v2Members)
+ processedV3Members := cleanAttributesFunc(v3Members)
+
+ if match := reflect.DeepEqual(processedV2Members, processedV3Members); !match {
+ v2Data, v2Err := json.Marshal(processedV2Members)
+ v3Data, v3Err := json.Marshal(processedV3Members)
+
+ if v2Err != nil || v3Err != nil {
+ panic("members in v2store doesn't match v3store")
+ }
+ panic(fmt.Sprintf("members in v2store doesn't match v3store, v2store: %s, v3store: %s", string(v2Data), string(v3Data)))
+ }
+}
+
+// TODO: non-blocking snapshot
+func (s *EtcdServer) snapshot(ep *etcdProgress, toDisk bool) {
+ lg := s.Logger()
+ d := GetMembershipInfoInV2Format(lg, s.cluster)
+ if toDisk {
+ s.Logger().Info(
+ "triggering snapshot",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.Uint64("local-member-applied-index", ep.appliedi),
+ zap.Uint64("local-member-snapshot-index", ep.diskSnapshotIndex),
+ zap.Uint64("local-member-snapshot-count", s.Cfg.SnapshotCount),
+ zap.Bool("snapshot-forced", s.forceDiskSnapshot),
+ )
+ s.forceDiskSnapshot = false
+ // commit kv to write metadata (for example: consistent index) to disk.
+ //
+ // This guarantees that Backend's consistent_index is >= index of last snapshot.
+ //
+ // KV().commit() updates the consistent index in backend.
+ // All operations that update consistent index must be called sequentially
+ // from applyAll function.
+ // So KV().Commit() cannot run in parallel with toApply. It has to be called outside
+ // the go routine created below.
+ s.KV().Commit()
+ }
+
+ // For backward compatibility, generate v2 snapshot from v3 state.
+ snap, err := s.r.raftStorage.CreateSnapshot(ep.appliedi, &ep.confState, d)
+ if err != nil {
+ // the snapshot was done asynchronously with the progress of raft.
+ // raft might have already got a newer snapshot.
+ if errorspkg.Is(err, raft.ErrSnapOutOfDate) {
+ return
+ }
+ lg.Panic("failed to create snapshot", zap.Error(err))
+ }
+ ep.memorySnapshotIndex = ep.appliedi
+
+ verifyConsistentIndexIsLatest(lg, snap, s.consistIndex.ConsistentIndex())
+
+ if toDisk {
+ // SaveSnap saves the snapshot to file and appends the corresponding WAL entry.
+ if err = s.r.storage.SaveSnap(snap); err != nil {
+ lg.Panic("failed to save snapshot", zap.Error(err))
+ }
+ ep.diskSnapshotIndex = ep.appliedi
+ if err = s.r.storage.Release(snap); err != nil {
+ lg.Panic("failed to release wal", zap.Error(err))
+ }
+
+ lg.Info(
+ "saved snapshot to disk",
+ zap.Uint64("snapshot-index", snap.Metadata.Index),
+ )
+ }
+}
+
+func (s *EtcdServer) compactRaftLog(snapi uint64) {
+ lg := s.Logger()
+
+ // When sending a snapshot, etcd will pause compaction.
+ // After receives a snapshot, the slow follower needs to get all the entries right after
+ // the snapshot sent to catch up. If we do not pause compaction, the log entries right after
+ // the snapshot sent might already be compacted. It happens when the snapshot takes long time
+ // to send and save. Pausing compaction avoids triggering a snapshot sending cycle.
+ if atomic.LoadInt64(&s.inflightSnapshots) != 0 {
+ lg.Info("skip compaction since there is an inflight snapshot")
+ return
+ }
+
+ // keep some in memory log entries for slow followers.
+ compacti := uint64(1)
+ if snapi > s.Cfg.SnapshotCatchUpEntries {
+ compacti = snapi - s.Cfg.SnapshotCatchUpEntries
+ }
+ err := s.r.raftStorage.Compact(compacti)
+ if err != nil {
+ // the compaction was done asynchronously with the progress of raft.
+ // raft log might already been compact.
+ if errorspkg.Is(err, raft.ErrCompacted) {
+ return
+ }
+ lg.Panic("failed to compact", zap.Error(err))
+ }
+ lg.Debug(
+ "compacted Raft logs",
+ zap.Uint64("compact-index", compacti),
+ )
+}
+
+// CutPeer drops messages to the specified peer.
+func (s *EtcdServer) CutPeer(id types.ID) {
+ tr, ok := s.r.transport.(*rafthttp.Transport)
+ if ok {
+ tr.CutPeer(id)
+ }
+}
+
+// MendPeer recovers the message dropping behavior of the given peer.
+func (s *EtcdServer) MendPeer(id types.ID) {
+ tr, ok := s.r.transport.(*rafthttp.Transport)
+ if ok {
+ tr.MendPeer(id)
+ }
+}
+
+func (s *EtcdServer) PauseSending() { s.r.pauseSending() }
+
+func (s *EtcdServer) ResumeSending() { s.r.resumeSending() }
+
+func (s *EtcdServer) ClusterVersion() *semver.Version {
+ if s.cluster == nil {
+ return nil
+ }
+ return s.cluster.Version()
+}
+
+func (s *EtcdServer) StorageVersion() *semver.Version {
+ // `applySnapshot` sets a new backend instance, so we need to acquire the bemu lock.
+ s.bemu.RLock()
+ defer s.bemu.RUnlock()
+
+ v, err := schema.DetectSchemaVersion(s.lg, s.be.ReadTx())
+ if err != nil {
+ s.lg.Warn("Failed to detect schema version", zap.Error(err))
+ return nil
+ }
+ return &v
+}
+
+// monitorClusterVersions every monitorVersionInterval checks if it's the leader and updates cluster version if needed.
+func (s *EtcdServer) monitorClusterVersions() {
+ lg := s.Logger()
+ monitor := serverversion.NewMonitor(lg, NewServerVersionAdapter(s))
+ for {
+ select {
+ case <-s.firstCommitInTerm.Receive():
+ case <-time.After(monitorVersionInterval):
+ case <-s.stopping:
+ lg.Info("server has stopped; stopping cluster version's monitor")
+ return
+ }
+
+ if s.Leader() != s.MemberID() {
+ continue
+ }
+ err := monitor.UpdateClusterVersionIfNeeded()
+ if err != nil {
+ s.lg.Error("Failed to monitor cluster version", zap.Error(err))
+ }
+ }
+}
+
+// monitorStorageVersion every monitorVersionInterval updates storage version if needed.
+func (s *EtcdServer) monitorStorageVersion() {
+ lg := s.Logger()
+ monitor := serverversion.NewMonitor(lg, NewServerVersionAdapter(s))
+ for {
+ select {
+ case <-time.After(monitorVersionInterval):
+ case <-s.clusterVersionChanged.Receive():
+ case <-s.stopping:
+ lg.Info("server has stopped; stopping storage version's monitor")
+ return
+ }
+ monitor.UpdateStorageVersionIfNeeded()
+ }
+}
+
+func (s *EtcdServer) monitorKVHash() {
+ t := s.Cfg.CorruptCheckTime
+ if t == 0 {
+ return
+ }
+ checkTicker := time.NewTicker(t)
+ defer checkTicker.Stop()
+
+ lg := s.Logger()
+ lg.Info(
+ "enabled corruption checking",
+ zap.String("local-member-id", s.MemberID().String()),
+ zap.Duration("interval", t),
+ )
+ for {
+ select {
+ case <-s.stopping:
+ lg.Info("server has stopped; stopping kv hash's monitor")
+ return
+ case <-checkTicker.C:
+ }
+ backend.VerifyBackendConsistency(s.be, lg, false, schema.AllBuckets...)
+ if !s.isLeader() {
+ continue
+ }
+ if err := s.corruptionChecker.PeriodicCheck(); err != nil {
+ lg.Warn("failed to check hash KV", zap.Error(err))
+ }
+ }
+}
+
+func (s *EtcdServer) monitorCompactHash() {
+ if !s.FeatureEnabled(features.CompactHashCheck) {
+ return
+ }
+ t := s.Cfg.CompactHashCheckTime
+ for {
+ select {
+ case <-time.After(t):
+ case <-s.stopping:
+ lg := s.Logger()
+ lg.Info("server has stopped; stopping compact hash's monitor")
+ return
+ }
+ if !s.isLeader() {
+ continue
+ }
+ s.corruptionChecker.CompactHashCheck()
+ }
+}
+
+func (s *EtcdServer) updateClusterVersionV3(ver string) {
+ lg := s.Logger()
+
+ if s.cluster.Version() == nil {
+ lg.Info(
+ "setting up initial cluster version using v3 API",
+ zap.String("cluster-version", version.Cluster(ver)),
+ )
+ } else {
+ lg.Info(
+ "updating cluster version using v3 API",
+ zap.String("from", version.Cluster(s.cluster.Version().String())),
+ zap.String("to", version.Cluster(ver)),
+ )
+ }
+
+ req := membershippb.ClusterVersionSetRequest{Ver: ver}
+
+ ctx, cancel := context.WithTimeout(s.ctx, s.Cfg.ReqTimeout())
+ _, err := s.raftRequest(ctx, pb.InternalRaftRequest{ClusterVersionSet: &req})
+ cancel()
+
+ switch {
+ case errorspkg.Is(err, nil):
+ lg.Info("cluster version is updated", zap.String("cluster-version", version.Cluster(ver)))
+ return
+
+ case errorspkg.Is(err, errors.ErrStopped):
+ lg.Warn("aborting cluster version update; server is stopped", zap.Error(err))
+ return
+
+ default:
+ lg.Warn("failed to update cluster version", zap.Error(err))
+ }
+}
+
+// monitorDowngrade every DowngradeCheckTime checks if it's the leader and cancels downgrade if needed.
+func (s *EtcdServer) monitorDowngrade() {
+ monitor := serverversion.NewMonitor(s.Logger(), NewServerVersionAdapter(s))
+ t := s.Cfg.DowngradeCheckTime
+ if t == 0 {
+ return
+ }
+ for {
+ select {
+ case <-time.After(t):
+ case <-s.stopping:
+ return
+ }
+
+ if !s.isLeader() {
+ continue
+ }
+ monitor.CancelDowngradeIfNeeded()
+ }
+}
+
+func (s *EtcdServer) parseProposeCtxErr(err error, start time.Time) error {
+ switch {
+ case errorspkg.Is(err, context.Canceled):
+ return errors.ErrCanceled
+
+ case errorspkg.Is(err, context.DeadlineExceeded):
+ s.leadTimeMu.RLock()
+ curLeadElected := s.leadElectedTime
+ s.leadTimeMu.RUnlock()
+ prevLeadLost := curLeadElected.Add(-2 * time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond)
+ if start.After(prevLeadLost) && start.Before(curLeadElected) {
+ return errors.ErrTimeoutDueToLeaderFail
+ }
+ lead := types.ID(s.getLead())
+ switch lead {
+ case types.ID(raft.None):
+ // TODO: return error to specify it happens because the cluster does not have leader now
+ case s.MemberID():
+ if !isConnectedToQuorumSince(s.r.transport, start, s.MemberID(), s.cluster.Members()) {
+ return errors.ErrTimeoutDueToConnectionLost
+ }
+ default:
+ if !isConnectedSince(s.r.transport, start, lead) {
+ return errors.ErrTimeoutDueToConnectionLost
+ }
+ }
+ return errors.ErrTimeout
+
+ default:
+ return err
+ }
+}
+
+func (s *EtcdServer) KV() mvcc.WatchableKV { return s.kv }
+func (s *EtcdServer) Backend() backend.Backend {
+ s.bemu.RLock()
+ defer s.bemu.RUnlock()
+ return s.be
+}
+
+func (s *EtcdServer) AuthStore() auth.AuthStore { return s.authStore }
+
+func (s *EtcdServer) restoreAlarms() error {
+ as, err := v3alarm.NewAlarmStore(s.lg, schema.NewAlarmBackend(s.lg, s.be))
+ if err != nil {
+ return err
+ }
+ s.alarmStore = as
+ return nil
+}
+
+// GoAttach creates a goroutine on a given function and tracks it using
+// the etcdserver waitgroup.
+// The passed function should interrupt on s.StoppingNotify().
+func (s *EtcdServer) GoAttach(f func()) {
+ s.wgMu.RLock() // this blocks with ongoing close(s.stopping)
+ defer s.wgMu.RUnlock()
+ select {
+ case <-s.stopping:
+ lg := s.Logger()
+ lg.Warn("server has stopped; skipping GoAttach")
+ return
+ default:
+ }
+
+ // now safe to add since waitgroup wait has not started yet
+ s.wg.Add(1)
+ go func() {
+ defer s.wg.Done()
+ f()
+ }()
+}
+
+func (s *EtcdServer) Alarms() []*pb.AlarmMember {
+ return s.alarmStore.Get(pb.AlarmType_NONE)
+}
+
+// IsLearner returns if the local member is raft learner
+func (s *EtcdServer) IsLearner() bool {
+ return s.cluster.IsLocalMemberLearner()
+}
+
+// IsMemberExist returns if the member with the given id exists in cluster.
+func (s *EtcdServer) IsMemberExist(id types.ID) bool {
+ return s.cluster.IsMemberExist(id)
+}
+
+// raftStatus returns the raft status of this etcd node.
+func (s *EtcdServer) raftStatus() raft.Status {
+ return s.r.Node.Status()
+}
+
+func (s *EtcdServer) Version() *serverversion.Manager {
+ return serverversion.NewManager(s.Logger(), NewServerVersionAdapter(s))
+}
+
+func (s *EtcdServer) getTxPostLockInsideApplyHook() func() {
+ return func() {
+ applyingIdx, applyingTerm := s.consistIndex.ConsistentApplyingIndex()
+ if applyingIdx > s.consistIndex.UnsafeConsistentIndex() {
+ s.consistIndex.SetConsistentIndex(applyingIdx, applyingTerm)
+ }
+ }
+}
+
+func (s *EtcdServer) CorruptionChecker() CorruptionChecker {
+ return s.corruptionChecker
+}
+
+func addFeatureGateMetrics(fg featuregate.FeatureGate, guageVec *prometheus.GaugeVec) {
+ for feature, featureSpec := range fg.(featuregate.MutableFeatureGate).GetAll() {
+ var metricVal float64
+ if fg.Enabled(feature) {
+ metricVal = 1
+ } else {
+ metricVal = 0
+ }
+ guageVec.With(prometheus.Labels{"name": string(feature), "stage": string(featureSpec.PreRelease)}).Set(metricVal)
+ }
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/server_access_control.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/server_access_control.go
new file mode 100644
index 0000000..09e2255
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/server_access_control.go
@@ -0,0 +1,65 @@
+// Copyright 2018 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import "sync"
+
+// AccessController controls etcd server HTTP request access.
+type AccessController struct {
+ corsMu sync.RWMutex
+ CORS map[string]struct{}
+ hostWhitelistMu sync.RWMutex
+ HostWhitelist map[string]struct{}
+}
+
+// NewAccessController returns a new "AccessController" with default "*" values.
+func NewAccessController() *AccessController {
+ return &AccessController{
+ CORS: map[string]struct{}{"*": {}},
+ HostWhitelist: map[string]struct{}{"*": {}},
+ }
+}
+
+// OriginAllowed determines whether the server will allow a given CORS origin.
+// If CORS is empty, allow all.
+func (ac *AccessController) OriginAllowed(origin string) bool {
+ ac.corsMu.RLock()
+ defer ac.corsMu.RUnlock()
+ if len(ac.CORS) == 0 { // allow all
+ return true
+ }
+ _, ok := ac.CORS["*"]
+ if ok {
+ return true
+ }
+ _, ok = ac.CORS[origin]
+ return ok
+}
+
+// IsHostWhitelisted returns true if the host is whitelisted.
+// If whitelist is empty, allow all.
+func (ac *AccessController) IsHostWhitelisted(host string) bool {
+ ac.hostWhitelistMu.RLock()
+ defer ac.hostWhitelistMu.RUnlock()
+ if len(ac.HostWhitelist) == 0 { // allow all
+ return true
+ }
+ _, ok := ac.HostWhitelist["*"]
+ if ok {
+ return true
+ }
+ _, ok = ac.HostWhitelist[host]
+ return ok
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/snapshot_merge.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/snapshot_merge.go
new file mode 100644
index 0000000..cc3c545
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/snapshot_merge.go
@@ -0,0 +1,83 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "io"
+
+ humanize "github.com/dustin/go-humanize"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/server/v3/etcdserver/api/snap"
+ "go.etcd.io/etcd/server/v3/storage/backend"
+ "go.etcd.io/raft/v3/raftpb"
+)
+
+// createMergedSnapshotMessage creates a snapshot message that contains: raft status (term, conf),
+// a snapshot of v2 store inside raft.Snapshot as []byte, a snapshot of v3 KV in the top level message
+// as ReadCloser.
+func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapt, snapi uint64, confState raftpb.ConfState) snap.Message {
+ lg := s.Logger()
+ // get a snapshot of v2 store as []byte
+ d := GetMembershipInfoInV2Format(lg, s.cluster)
+
+ // commit kv to write metadata(for example: consistent index).
+ s.KV().Commit()
+ dbsnap := s.be.Snapshot()
+ // get a snapshot of v3 KV as readCloser
+ rc := newSnapshotReaderCloser(lg, dbsnap)
+
+ // put the []byte snapshot of store into raft snapshot and return the merged snapshot with
+ // KV readCloser snapshot.
+ snapshot := raftpb.Snapshot{
+ Metadata: raftpb.SnapshotMetadata{
+ Index: snapi,
+ Term: snapt,
+ ConfState: confState,
+ },
+ Data: d,
+ }
+ m.Snapshot = &snapshot
+
+ verifySnapshotIndex(snapshot, s.consistIndex.ConsistentIndex())
+
+ return *snap.NewMessage(m, rc, dbsnap.Size())
+}
+
+func newSnapshotReaderCloser(lg *zap.Logger, snapshot backend.Snapshot) io.ReadCloser {
+ pr, pw := io.Pipe()
+ go func() {
+ n, err := snapshot.WriteTo(pw)
+ if err == nil {
+ lg.Info(
+ "sent database snapshot to writer",
+ zap.Int64("bytes", n),
+ zap.String("size", humanize.Bytes(uint64(n))),
+ )
+ } else {
+ lg.Warn(
+ "failed to send database snapshot to writer",
+ zap.String("size", humanize.Bytes(uint64(n))),
+ zap.Error(err),
+ )
+ }
+ pw.CloseWithError(err)
+ err = snapshot.Close()
+ if err != nil {
+ lg.Panic("failed to close database snapshot", zap.Error(err))
+ }
+ }()
+ return pr
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/metrics.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/metrics.go
new file mode 100644
index 0000000..93f2e07
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/metrics.go
@@ -0,0 +1,71 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package txn
+
+import (
+ "strconv"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+ slowApplies = prometheus.NewCounter(prometheus.CounterOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "slow_apply_total",
+ Help: "The total number of slow apply requests (likely overloaded from slow disk).",
+ })
+ applySec = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "apply_duration_seconds",
+ Help: "The latency distributions of v2 apply called by backend.",
+
+ // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
+ // highest bucket start of 0.0001 sec * 2^19 == 52.4288 sec
+ Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20),
+ },
+ []string{"version", "op", "success"},
+ )
+ rangeSec = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "etcd",
+ Subsystem: "server",
+ Name: "range_duration_seconds",
+ Help: "The latency distributions of txn.Range",
+
+ // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
+ // highest bucket start of 0.0001 sec * 2^19 == 52.4288 sec
+ Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20),
+ },
+ []string{"success"},
+ )
+)
+
+func ApplySecObserve(version, op string, success bool, latency time.Duration) {
+ applySec.WithLabelValues(version, op, strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0)
+}
+
+func RangeSecObserve(success bool, latency time.Duration) {
+ rangeSec.WithLabelValues(strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0)
+}
+
+func init() {
+ prometheus.MustRegister(applySec)
+ prometheus.MustRegister(rangeSec)
+ prometheus.MustRegister(slowApplies)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/txn.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/txn.go
new file mode 100644
index 0000000..51f70a0
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/txn.go
@@ -0,0 +1,723 @@
+// Copyright 2022 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package txn
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "sort"
+ "time"
+
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/mvccpb"
+ "go.etcd.io/etcd/pkg/v3/traceutil"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ "go.etcd.io/etcd/server/v3/lease"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+)
+
+func Put(ctx context.Context, lg *zap.Logger, lessor lease.Lessor, kv mvcc.KV, p *pb.PutRequest) (resp *pb.PutResponse, trace *traceutil.Trace, err error) {
+ trace = traceutil.Get(ctx)
+ // create put tracing if the trace in context is empty
+ if trace.IsEmpty() {
+ trace = traceutil.New("put",
+ lg,
+ traceutil.Field{Key: "key", Value: string(p.Key)},
+ traceutil.Field{Key: "req_size", Value: p.Size()},
+ )
+ ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
+ }
+ leaseID := lease.LeaseID(p.Lease)
+ if leaseID != lease.NoLease {
+ if l := lessor.Lookup(leaseID); l == nil {
+ return nil, nil, lease.ErrLeaseNotFound
+ }
+ }
+ txnWrite := kv.Write(trace)
+ defer txnWrite.End()
+ resp, err = put(ctx, txnWrite, p)
+ return resp, trace, err
+}
+
+func put(ctx context.Context, txnWrite mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, err error) {
+ trace := traceutil.Get(ctx)
+ resp = &pb.PutResponse{}
+ resp.Header = &pb.ResponseHeader{}
+ val, leaseID := p.Value, lease.LeaseID(p.Lease)
+
+ var rr *mvcc.RangeResult
+ if p.IgnoreValue || p.IgnoreLease || p.PrevKv {
+ trace.StepWithFunction(func() {
+ rr, err = txnWrite.Range(context.TODO(), p.Key, nil, mvcc.RangeOptions{})
+ }, "get previous kv pair")
+
+ if err != nil {
+ return nil, err
+ }
+ }
+ if p.IgnoreValue || p.IgnoreLease {
+ if rr == nil || len(rr.KVs) == 0 {
+ // ignore_{lease,value} flag expects previous key-value pair
+ return nil, errors.ErrKeyNotFound
+ }
+ }
+ if p.IgnoreValue {
+ val = rr.KVs[0].Value
+ }
+ if p.IgnoreLease {
+ leaseID = lease.LeaseID(rr.KVs[0].Lease)
+ }
+ if p.PrevKv {
+ if rr != nil && len(rr.KVs) != 0 {
+ resp.PrevKv = &rr.KVs[0]
+ }
+ }
+
+ resp.Header.Revision = txnWrite.Put(p.Key, val, leaseID)
+ trace.AddField(traceutil.Field{Key: "response_revision", Value: resp.Header.Revision})
+ return resp, nil
+}
+
+func DeleteRange(ctx context.Context, lg *zap.Logger, kv mvcc.KV, dr *pb.DeleteRangeRequest) (resp *pb.DeleteRangeResponse, trace *traceutil.Trace, err error) {
+ trace = traceutil.Get(ctx)
+ // create delete tracing if the trace in context is empty
+ if trace.IsEmpty() {
+ trace = traceutil.New("delete_range",
+ lg,
+ traceutil.Field{Key: "key", Value: string(dr.Key)},
+ traceutil.Field{Key: "range_end", Value: string(dr.RangeEnd)},
+ )
+ ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
+ }
+ txnWrite := kv.Write(trace)
+ defer txnWrite.End()
+ resp, err = deleteRange(ctx, txnWrite, dr)
+ return resp, trace, err
+}
+
+func deleteRange(ctx context.Context, txnWrite mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
+ resp := &pb.DeleteRangeResponse{}
+ resp.Header = &pb.ResponseHeader{}
+ end := mkGteRange(dr.RangeEnd)
+
+ if dr.PrevKv {
+ rr, err := txnWrite.Range(ctx, dr.Key, end, mvcc.RangeOptions{})
+ if err != nil {
+ return nil, err
+ }
+ if rr != nil {
+ resp.PrevKvs = make([]*mvccpb.KeyValue, len(rr.KVs))
+ for i := range rr.KVs {
+ resp.PrevKvs[i] = &rr.KVs[i]
+ }
+ }
+ }
+
+ resp.Deleted, resp.Header.Revision = txnWrite.DeleteRange(dr.Key, end)
+ return resp, nil
+}
+
+func Range(ctx context.Context, lg *zap.Logger, kv mvcc.KV, r *pb.RangeRequest) (resp *pb.RangeResponse, trace *traceutil.Trace, err error) {
+ trace = traceutil.Get(ctx)
+ if trace.IsEmpty() {
+ trace = traceutil.New("range", lg)
+ ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
+ }
+ defer func(start time.Time) {
+ success := err == nil
+ RangeSecObserve(success, time.Since(start))
+ }(time.Now())
+ txnRead := kv.Read(mvcc.ConcurrentReadTxMode, trace)
+ defer txnRead.End()
+ resp, err = executeRange(ctx, lg, txnRead, r)
+ return resp, trace, err
+}
+
+func executeRange(ctx context.Context, lg *zap.Logger, txnRead mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
+ trace := traceutil.Get(ctx)
+
+ resp := &pb.RangeResponse{}
+ resp.Header = &pb.ResponseHeader{}
+
+ limit := r.Limit
+ if r.SortOrder != pb.RangeRequest_NONE ||
+ r.MinModRevision != 0 || r.MaxModRevision != 0 ||
+ r.MinCreateRevision != 0 || r.MaxCreateRevision != 0 {
+ // fetch everything; sort and truncate afterwards
+ limit = 0
+ }
+ if limit > 0 {
+ // fetch one extra for 'more' flag
+ limit = limit + 1
+ }
+
+ ro := mvcc.RangeOptions{
+ Limit: limit,
+ Rev: r.Revision,
+ Count: r.CountOnly,
+ }
+
+ rr, err := txnRead.Range(ctx, r.Key, mkGteRange(r.RangeEnd), ro)
+ if err != nil {
+ return nil, err
+ }
+
+ if r.MaxModRevision != 0 {
+ f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision > r.MaxModRevision }
+ pruneKVs(rr, f)
+ }
+ if r.MinModRevision != 0 {
+ f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision < r.MinModRevision }
+ pruneKVs(rr, f)
+ }
+ if r.MaxCreateRevision != 0 {
+ f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision > r.MaxCreateRevision }
+ pruneKVs(rr, f)
+ }
+ if r.MinCreateRevision != 0 {
+ f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision < r.MinCreateRevision }
+ pruneKVs(rr, f)
+ }
+
+ sortOrder := r.SortOrder
+ if r.SortTarget != pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_NONE {
+ // Since current mvcc.Range implementation returns results
+ // sorted by keys in lexiographically ascending order,
+ // sort ASCEND by default only when target is not 'KEY'
+ sortOrder = pb.RangeRequest_ASCEND
+ } else if r.SortTarget == pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_ASCEND {
+ // Since current mvcc.Range implementation returns results
+ // sorted by keys in lexiographically ascending order,
+ // don't re-sort when target is 'KEY' and order is ASCEND
+ sortOrder = pb.RangeRequest_NONE
+ }
+ if sortOrder != pb.RangeRequest_NONE {
+ var sorter sort.Interface
+ switch {
+ case r.SortTarget == pb.RangeRequest_KEY:
+ sorter = &kvSortByKey{&kvSort{rr.KVs}}
+ case r.SortTarget == pb.RangeRequest_VERSION:
+ sorter = &kvSortByVersion{&kvSort{rr.KVs}}
+ case r.SortTarget == pb.RangeRequest_CREATE:
+ sorter = &kvSortByCreate{&kvSort{rr.KVs}}
+ case r.SortTarget == pb.RangeRequest_MOD:
+ sorter = &kvSortByMod{&kvSort{rr.KVs}}
+ case r.SortTarget == pb.RangeRequest_VALUE:
+ sorter = &kvSortByValue{&kvSort{rr.KVs}}
+ default:
+ lg.Panic("unexpected sort target", zap.Int32("sort-target", int32(r.SortTarget)))
+ }
+ switch {
+ case sortOrder == pb.RangeRequest_ASCEND:
+ sort.Sort(sorter)
+ case sortOrder == pb.RangeRequest_DESCEND:
+ sort.Sort(sort.Reverse(sorter))
+ }
+ }
+
+ if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
+ rr.KVs = rr.KVs[:r.Limit]
+ resp.More = true
+ }
+ trace.Step("filter and sort the key-value pairs")
+ resp.Header.Revision = rr.Rev
+ resp.Count = int64(rr.Count)
+ resp.Kvs = make([]*mvccpb.KeyValue, len(rr.KVs))
+ for i := range rr.KVs {
+ if r.KeysOnly {
+ rr.KVs[i].Value = nil
+ }
+ resp.Kvs[i] = &rr.KVs[i]
+ }
+ trace.Step("assemble the response")
+ return resp, nil
+}
+
+func Txn(ctx context.Context, lg *zap.Logger, rt *pb.TxnRequest, txnModeWriteWithSharedBuffer bool, kv mvcc.KV, lessor lease.Lessor) (*pb.TxnResponse, *traceutil.Trace, error) {
+ trace := traceutil.Get(ctx)
+ if trace.IsEmpty() {
+ trace = traceutil.New("transaction", lg)
+ ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
+ }
+ isWrite := !IsTxnReadonly(rt)
+ // When the transaction contains write operations, we use ReadTx instead of
+ // ConcurrentReadTx to avoid extra overhead of copying buffer.
+ var mode mvcc.ReadTxMode
+ if isWrite && txnModeWriteWithSharedBuffer /*a.s.Cfg.ServerFeatureGate.Enabled(features.TxnModeWriteWithSharedBuffer)*/ {
+ mode = mvcc.SharedBufReadTxMode
+ } else {
+ mode = mvcc.ConcurrentReadTxMode
+ }
+ txnRead := kv.Read(mode, trace)
+ var txnPath []bool
+ trace.StepWithFunction(
+ func() {
+ txnPath = compareToPath(txnRead, rt)
+ },
+ "compare",
+ )
+ if isWrite {
+ trace.AddField(traceutil.Field{Key: "read_only", Value: false})
+ }
+ _, err := checkTxn(txnRead, rt, lessor, txnPath)
+ if err != nil {
+ txnRead.End()
+ return nil, nil, err
+ }
+ trace.Step("check requests")
+ // When executing mutable txnWrite ops, etcd must hold the txnWrite lock so
+ // readers do not see any intermediate results. Since writes are
+ // serialized on the raft loop, the revision in the read view will
+ // be the revision of the write txnWrite.
+ var txnWrite mvcc.TxnWrite
+ if isWrite {
+ txnRead.End()
+ txnWrite = kv.Write(trace)
+ } else {
+ txnWrite = mvcc.NewReadOnlyTxnWrite(txnRead)
+ }
+ txnResp, err := txn(ctx, lg, txnWrite, rt, isWrite, txnPath)
+ txnWrite.End()
+
+ trace.AddField(
+ traceutil.Field{Key: "number_of_response", Value: len(txnResp.Responses)},
+ traceutil.Field{Key: "response_revision", Value: txnResp.Header.Revision},
+ )
+ return txnResp, trace, err
+}
+
+func txn(ctx context.Context, lg *zap.Logger, txnWrite mvcc.TxnWrite, rt *pb.TxnRequest, isWrite bool, txnPath []bool) (*pb.TxnResponse, error) {
+ txnResp, _ := newTxnResp(rt, txnPath)
+ _, err := executeTxn(ctx, lg, txnWrite, rt, txnPath, txnResp)
+ if err != nil {
+ if isWrite {
+ // CAUTION: When a txn performing write operations starts, we always expect it to be successful.
+ // If a write failure is seen we SHOULD NOT try to recover the server, but crash with a panic to make the failure explicit.
+ // Trying to silently recover (e.g by ignoring the failed txn or calling txn.End() early) poses serious risks:
+ // - violation of transaction atomicity if some write operations have been partially executed
+ // - data inconsistency across different etcd members if they applied the txn asymmetrically
+ lg.Panic("unexpected error during txn with writes", zap.Error(err))
+ } else {
+ lg.Error("unexpected error during readonly txn", zap.Error(err))
+ }
+ }
+ rev := txnWrite.Rev()
+ if len(txnWrite.Changes()) != 0 {
+ rev++
+ }
+ txnResp.Header.Revision = rev
+ return txnResp, err
+}
+
+// newTxnResp allocates a txn response for a txn request given a path.
+func newTxnResp(rt *pb.TxnRequest, txnPath []bool) (txnResp *pb.TxnResponse, txnCount int) {
+ reqs := rt.Success
+ if !txnPath[0] {
+ reqs = rt.Failure
+ }
+ resps := make([]*pb.ResponseOp, len(reqs))
+ txnResp = &pb.TxnResponse{
+ Responses: resps,
+ Succeeded: txnPath[0],
+ Header: &pb.ResponseHeader{},
+ }
+ for i, req := range reqs {
+ switch tv := req.Request.(type) {
+ case *pb.RequestOp_RequestRange:
+ resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{}}
+ case *pb.RequestOp_RequestPut:
+ resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{}}
+ case *pb.RequestOp_RequestDeleteRange:
+ resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{}}
+ case *pb.RequestOp_RequestTxn:
+ resp, txns := newTxnResp(tv.RequestTxn, txnPath[1:])
+ resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseTxn{ResponseTxn: resp}}
+ txnPath = txnPath[1+txns:]
+ txnCount += txns + 1
+ default:
+ }
+ }
+ return txnResp, txnCount
+}
+
+func executeTxn(ctx context.Context, lg *zap.Logger, txnWrite mvcc.TxnWrite, rt *pb.TxnRequest, txnPath []bool, tresp *pb.TxnResponse) (txns int, err error) {
+ trace := traceutil.Get(ctx)
+ reqs := rt.Success
+ if !txnPath[0] {
+ reqs = rt.Failure
+ }
+
+ for i, req := range reqs {
+ respi := tresp.Responses[i].Response
+ switch tv := req.Request.(type) {
+ case *pb.RequestOp_RequestRange:
+ trace.StartSubTrace(
+ traceutil.Field{Key: "req_type", Value: "range"},
+ traceutil.Field{Key: "range_begin", Value: string(tv.RequestRange.Key)},
+ traceutil.Field{Key: "range_end", Value: string(tv.RequestRange.RangeEnd)})
+ resp, err := executeRange(ctx, lg, txnWrite, tv.RequestRange)
+ if err != nil {
+ return 0, fmt.Errorf("applyTxn: failed Range: %w", err)
+ }
+ respi.(*pb.ResponseOp_ResponseRange).ResponseRange = resp
+ trace.StopSubTrace()
+ case *pb.RequestOp_RequestPut:
+ trace.StartSubTrace(
+ traceutil.Field{Key: "req_type", Value: "put"},
+ traceutil.Field{Key: "key", Value: string(tv.RequestPut.Key)},
+ traceutil.Field{Key: "req_size", Value: tv.RequestPut.Size()})
+ resp, err := put(ctx, txnWrite, tv.RequestPut)
+ if err != nil {
+ return 0, fmt.Errorf("applyTxn: failed Put: %w", err)
+ }
+ respi.(*pb.ResponseOp_ResponsePut).ResponsePut = resp
+ trace.StopSubTrace()
+ case *pb.RequestOp_RequestDeleteRange:
+ resp, err := deleteRange(ctx, txnWrite, tv.RequestDeleteRange)
+ if err != nil {
+ return 0, fmt.Errorf("applyTxn: failed DeleteRange: %w", err)
+ }
+ respi.(*pb.ResponseOp_ResponseDeleteRange).ResponseDeleteRange = resp
+ case *pb.RequestOp_RequestTxn:
+ resp := respi.(*pb.ResponseOp_ResponseTxn).ResponseTxn
+ applyTxns, err := executeTxn(ctx, lg, txnWrite, tv.RequestTxn, txnPath[1:], resp)
+ if err != nil {
+ // don't wrap the error. It's a recursive call and err should be already wrapped
+ return 0, err
+ }
+ txns += applyTxns + 1
+ txnPath = txnPath[applyTxns+1:]
+ default:
+ // empty union
+ }
+ }
+ return txns, nil
+}
+
+func checkPut(rv mvcc.ReadView, lessor lease.Lessor, req *pb.PutRequest) error {
+ if req.IgnoreValue || req.IgnoreLease {
+ // expects previous key-value, error if not exist
+ rr, err := rv.Range(context.TODO(), req.Key, nil, mvcc.RangeOptions{})
+ if err != nil {
+ return err
+ }
+ if rr == nil || len(rr.KVs) == 0 {
+ return errors.ErrKeyNotFound
+ }
+ }
+ if lease.LeaseID(req.Lease) != lease.NoLease {
+ if l := lessor.Lookup(lease.LeaseID(req.Lease)); l == nil {
+ return lease.ErrLeaseNotFound
+ }
+ }
+ return nil
+}
+
+func checkRange(rv mvcc.ReadView, req *pb.RangeRequest) error {
+ switch {
+ case req.Revision == 0:
+ return nil
+ case req.Revision > rv.Rev():
+ return mvcc.ErrFutureRev
+ case req.Revision < rv.FirstRev():
+ return mvcc.ErrCompacted
+ }
+ return nil
+}
+
+func checkTxn(rv mvcc.ReadView, rt *pb.TxnRequest, lessor lease.Lessor, txnPath []bool) (int, error) {
+ txnCount := 0
+ reqs := rt.Success
+ if !txnPath[0] {
+ reqs = rt.Failure
+ }
+ for _, req := range reqs {
+ var err error
+ var txns int
+ switch tv := req.Request.(type) {
+ case *pb.RequestOp_RequestRange:
+ err = checkRange(rv, tv.RequestRange)
+ case *pb.RequestOp_RequestPut:
+ err = checkPut(rv, lessor, tv.RequestPut)
+ case *pb.RequestOp_RequestDeleteRange:
+ case *pb.RequestOp_RequestTxn:
+ txns, err = checkTxn(rv, tv.RequestTxn, lessor, txnPath[1:])
+ txnCount += txns + 1
+ txnPath = txnPath[txns+1:]
+ default:
+ // empty union
+ }
+ if err != nil {
+ return 0, err
+ }
+ }
+ return txnCount, nil
+}
+
+// mkGteRange determines if the range end is a >= range. This works around grpc
+// sending empty byte strings as nil; >= is encoded in the range end as '\0'.
+// If it is a GTE range, then []byte{} is returned to indicate the empty byte
+// string (vs nil being no byte string).
+func mkGteRange(rangeEnd []byte) []byte {
+ if len(rangeEnd) == 1 && rangeEnd[0] == 0 {
+ return []byte{}
+ }
+ return rangeEnd
+}
+
+func pruneKVs(rr *mvcc.RangeResult, isPrunable func(*mvccpb.KeyValue) bool) {
+ j := 0
+ for i := range rr.KVs {
+ rr.KVs[j] = rr.KVs[i]
+ if !isPrunable(&rr.KVs[i]) {
+ j++
+ }
+ }
+ rr.KVs = rr.KVs[:j]
+}
+
+type kvSort struct{ kvs []mvccpb.KeyValue }
+
+func (s *kvSort) Swap(i, j int) {
+ t := s.kvs[i]
+ s.kvs[i] = s.kvs[j]
+ s.kvs[j] = t
+}
+func (s *kvSort) Len() int { return len(s.kvs) }
+
+type kvSortByKey struct{ *kvSort }
+
+func (s *kvSortByKey) Less(i, j int) bool {
+ return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
+}
+
+type kvSortByVersion struct{ *kvSort }
+
+func (s *kvSortByVersion) Less(i, j int) bool {
+ return (s.kvs[i].Version - s.kvs[j].Version) < 0
+}
+
+type kvSortByCreate struct{ *kvSort }
+
+func (s *kvSortByCreate) Less(i, j int) bool {
+ return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
+}
+
+type kvSortByMod struct{ *kvSort }
+
+func (s *kvSortByMod) Less(i, j int) bool {
+ return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
+}
+
+type kvSortByValue struct{ *kvSort }
+
+func (s *kvSortByValue) Less(i, j int) bool {
+ return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
+}
+
+func compareInt64(a, b int64) int {
+ switch {
+ case a < b:
+ return -1
+ case a > b:
+ return 1
+ default:
+ return 0
+ }
+}
+
+func compareToPath(rv mvcc.ReadView, rt *pb.TxnRequest) []bool {
+ txnPath := make([]bool, 1)
+ ops := rt.Success
+ if txnPath[0] = applyCompares(rv, rt.Compare); !txnPath[0] {
+ ops = rt.Failure
+ }
+ for _, op := range ops {
+ tv, ok := op.Request.(*pb.RequestOp_RequestTxn)
+ if !ok || tv.RequestTxn == nil {
+ continue
+ }
+ txnPath = append(txnPath, compareToPath(rv, tv.RequestTxn)...)
+ }
+ return txnPath
+}
+
+func applyCompares(rv mvcc.ReadView, cmps []*pb.Compare) bool {
+ for _, c := range cmps {
+ if !applyCompare(rv, c) {
+ return false
+ }
+ }
+ return true
+}
+
+// applyCompare applies the compare request.
+// If the comparison succeeds, it returns true. Otherwise, returns false.
+func applyCompare(rv mvcc.ReadView, c *pb.Compare) bool {
+ // TODO: possible optimizations
+ // * chunk reads for large ranges to conserve memory
+ // * rewrite rules for common patterns:
+ // ex. "[a, b) createrev > 0" => "limit 1 /\ kvs > 0"
+ // * caching
+ rr, err := rv.Range(context.TODO(), c.Key, mkGteRange(c.RangeEnd), mvcc.RangeOptions{})
+ if err != nil {
+ return false
+ }
+ if len(rr.KVs) == 0 {
+ if c.Target == pb.Compare_VALUE {
+ // Always fail if comparing a value on a key/keys that doesn't exist;
+ // nil == empty string in grpc; no way to represent missing value
+ return false
+ }
+ return compareKV(c, mvccpb.KeyValue{})
+ }
+ for _, kv := range rr.KVs {
+ if !compareKV(c, kv) {
+ return false
+ }
+ }
+ return true
+}
+
+func compareKV(c *pb.Compare, ckv mvccpb.KeyValue) bool {
+ var result int
+ rev := int64(0)
+ switch c.Target {
+ case pb.Compare_VALUE:
+ var v []byte
+ if tv, _ := c.TargetUnion.(*pb.Compare_Value); tv != nil {
+ v = tv.Value
+ }
+ result = bytes.Compare(ckv.Value, v)
+ case pb.Compare_CREATE:
+ if tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision); tv != nil {
+ rev = tv.CreateRevision
+ }
+ result = compareInt64(ckv.CreateRevision, rev)
+ case pb.Compare_MOD:
+ if tv, _ := c.TargetUnion.(*pb.Compare_ModRevision); tv != nil {
+ rev = tv.ModRevision
+ }
+ result = compareInt64(ckv.ModRevision, rev)
+ case pb.Compare_VERSION:
+ if tv, _ := c.TargetUnion.(*pb.Compare_Version); tv != nil {
+ rev = tv.Version
+ }
+ result = compareInt64(ckv.Version, rev)
+ case pb.Compare_LEASE:
+ if tv, _ := c.TargetUnion.(*pb.Compare_Lease); tv != nil {
+ rev = tv.Lease
+ }
+ result = compareInt64(ckv.Lease, rev)
+ }
+ switch c.Result {
+ case pb.Compare_EQUAL:
+ return result == 0
+ case pb.Compare_NOT_EQUAL:
+ return result != 0
+ case pb.Compare_GREATER:
+ return result > 0
+ case pb.Compare_LESS:
+ return result < 0
+ }
+ return true
+}
+
+func IsTxnSerializable(r *pb.TxnRequest) bool {
+ for _, u := range r.Success {
+ if r := u.GetRequestRange(); r == nil || !r.Serializable {
+ return false
+ }
+ }
+ for _, u := range r.Failure {
+ if r := u.GetRequestRange(); r == nil || !r.Serializable {
+ return false
+ }
+ }
+ return true
+}
+
+func IsTxnReadonly(r *pb.TxnRequest) bool {
+ for _, u := range r.Success {
+ if r := u.GetRequestRange(); r == nil {
+ return false
+ }
+ }
+ for _, u := range r.Failure {
+ if r := u.GetRequestRange(); r == nil {
+ return false
+ }
+ }
+ return true
+}
+
+func CheckTxnAuth(as auth.AuthStore, ai *auth.AuthInfo, rt *pb.TxnRequest) error {
+ for _, c := range rt.Compare {
+ if err := as.IsRangePermitted(ai, c.Key, c.RangeEnd); err != nil {
+ return err
+ }
+ }
+ if err := checkTxnReqsPermission(as, ai, rt.Success); err != nil {
+ return err
+ }
+ return checkTxnReqsPermission(as, ai, rt.Failure)
+}
+
+func checkTxnReqsPermission(as auth.AuthStore, ai *auth.AuthInfo, reqs []*pb.RequestOp) error {
+ for _, requ := range reqs {
+ switch tv := requ.Request.(type) {
+ case *pb.RequestOp_RequestRange:
+ if tv.RequestRange == nil {
+ continue
+ }
+
+ if err := as.IsRangePermitted(ai, tv.RequestRange.Key, tv.RequestRange.RangeEnd); err != nil {
+ return err
+ }
+
+ case *pb.RequestOp_RequestPut:
+ if tv.RequestPut == nil {
+ continue
+ }
+
+ if err := as.IsPutPermitted(ai, tv.RequestPut.Key); err != nil {
+ return err
+ }
+
+ case *pb.RequestOp_RequestDeleteRange:
+ if tv.RequestDeleteRange == nil {
+ continue
+ }
+
+ if tv.RequestDeleteRange.PrevKv {
+ err := as.IsRangePermitted(ai, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd)
+ if err != nil {
+ return err
+ }
+ }
+
+ err := as.IsDeleteRangePermitted(ai, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/util.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/util.go
new file mode 100644
index 0000000..f9987c6
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/txn/util.go
@@ -0,0 +1,107 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package txn
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "time"
+
+ "github.com/golang/protobuf/proto"
+ "go.uber.org/zap"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+)
+
+func WarnOfExpensiveRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) {
+ if time.Since(now) <= warningApplyDuration {
+ return
+ }
+ var resp string
+ if !isNil(respMsg) {
+ resp = fmt.Sprintf("size:%d", proto.Size(respMsg))
+ }
+ warnOfExpensiveGenericRequest(lg, warningApplyDuration, now, reqStringer, "", resp, err)
+}
+
+func WarnOfFailedRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) {
+ var resp string
+ if !isNil(respMsg) {
+ resp = fmt.Sprintf("size:%d", proto.Size(respMsg))
+ }
+ d := time.Since(now)
+ lg.Warn(
+ "failed to apply request",
+ zap.Duration("took", d),
+ zap.String("request", reqStringer.String()),
+ zap.String("response", resp),
+ zap.Error(err),
+ )
+}
+
+func WarnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) {
+ if time.Since(now) <= warningApplyDuration {
+ return
+ }
+ reqStringer := pb.NewLoggableTxnRequest(r)
+ var resp string
+ if !isNil(txnResponse) {
+ var resps []string
+ for _, r := range txnResponse.Responses {
+ switch r.Response.(type) {
+ case *pb.ResponseOp_ResponseRange:
+ if op := r.GetResponseRange(); op != nil {
+ resps = append(resps, fmt.Sprintf("range_response_count:%d", len(op.GetKvs())))
+ } else {
+ resps = append(resps, "range_response:nil")
+ }
+ default:
+ // only range responses should be in a read only txn request
+ }
+ }
+ resp = fmt.Sprintf("responses:<%s> size:%d", strings.Join(resps, " "), txnResponse.Size())
+ }
+ warnOfExpensiveGenericRequest(lg, warningApplyDuration, now, reqStringer, "read-only txn ", resp, err)
+}
+
+func WarnOfExpensiveReadOnlyRangeRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, reqStringer fmt.Stringer, rangeResponse *pb.RangeResponse, err error) {
+ if time.Since(now) <= warningApplyDuration {
+ return
+ }
+ var resp string
+ if !isNil(rangeResponse) {
+ resp = fmt.Sprintf("range_response_count:%d size:%d", len(rangeResponse.Kvs), rangeResponse.Size())
+ }
+ warnOfExpensiveGenericRequest(lg, warningApplyDuration, now, reqStringer, "read-only range ", resp, err)
+}
+
+// callers need make sure time has passed warningApplyDuration
+func warnOfExpensiveGenericRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, reqStringer fmt.Stringer, prefix string, resp string, err error) {
+ lg.Warn(
+ "apply request took too long",
+ zap.Duration("took", time.Since(now)),
+ zap.Duration("expected-duration", warningApplyDuration),
+ zap.String("prefix", prefix),
+ zap.String("request", reqStringer.String()),
+ zap.String("response", resp),
+ zap.Error(err),
+ )
+ slowApplies.Inc()
+}
+
+func isNil(msg proto.Message) bool {
+ return msg == nil || reflect.ValueOf(msg).IsNil()
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/util.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/util.go
new file mode 100644
index 0000000..fbba549
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/util.go
@@ -0,0 +1,116 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "fmt"
+ "time"
+
+ "go.etcd.io/etcd/client/pkg/v3/types"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp"
+)
+
+// isConnectedToQuorumSince checks whether the local member is connected to the
+// quorum of the cluster since the given time.
+func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
+ return numConnectedSince(transport, since, self, members) >= (len(members)/2)+1
+}
+
+// isConnectedSince checks whether the local member is connected to the
+// remote member since the given time.
+func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote types.ID) bool {
+ t := transport.ActiveSince(remote)
+ return !t.IsZero() && t.Before(since)
+}
+
+// isConnectedFullySince checks whether the local member is connected to all
+// members in the cluster since the given time.
+func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
+ return numConnectedSince(transport, since, self, members) == len(members)
+}
+
+// numConnectedSince counts how many members are connected to the local member
+// since the given time.
+func numConnectedSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) int {
+ connectedNum := 0
+ for _, m := range members {
+ if m.ID == self || isConnectedSince(transport, since, m.ID) {
+ connectedNum++
+ }
+ }
+ return connectedNum
+}
+
+// longestConnected chooses the member with longest active-since-time.
+// It returns false, if nothing is active.
+func longestConnected(tp rafthttp.Transporter, membs []types.ID) (types.ID, bool) {
+ var longest types.ID
+ var oldest time.Time
+ for _, id := range membs {
+ tm := tp.ActiveSince(id)
+ if tm.IsZero() { // inactive
+ continue
+ }
+
+ if oldest.IsZero() { // first longest candidate
+ oldest = tm
+ longest = id
+ }
+
+ if tm.Before(oldest) {
+ oldest = tm
+ longest = id
+ }
+ }
+ if uint64(longest) == 0 {
+ return longest, false
+ }
+ return longest, true
+}
+
+type notifier struct {
+ c chan struct{}
+ err error
+}
+
+func newNotifier() *notifier {
+ return ¬ifier{
+ c: make(chan struct{}),
+ }
+}
+
+func (nc *notifier) notify(err error) {
+ nc.err = err
+ close(nc.c)
+}
+
+// panicAlternativeStringer wraps a fmt.Stringer, and if calling String() panics, calls the alternative instead.
+// This is needed to ensure logging slow v2 requests does not panic, which occurs when running integration tests
+// with the embedded server with github.com/golang/protobuf v1.4.0+. See https://github.com/etcd-io/etcd/issues/12197.
+type panicAlternativeStringer struct {
+ stringer fmt.Stringer
+ alternative func() string
+}
+
+func (n panicAlternativeStringer) String() (s string) {
+ defer func() {
+ if err := recover(); err != nil {
+ s = n.alternative()
+ }
+ }()
+ s = n.stringer.String()
+ return s
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/v2_server.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/v2_server.go
new file mode 100644
index 0000000..8636204
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/v2_server.go
@@ -0,0 +1,26 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+)
+
+type RequestV2 pb.Request
+
+func (r *RequestV2) String() string {
+ rpb := pb.Request(*r)
+ return rpb.String()
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/v3_server.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/v3_server.go
new file mode 100644
index 0000000..c695360
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/v3_server.go
@@ -0,0 +1,1054 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "bytes"
+ "context"
+ "encoding/base64"
+ "encoding/binary"
+ errorspkg "errors"
+ "strconv"
+ "time"
+
+ "github.com/gogo/protobuf/proto"
+ "go.uber.org/zap"
+ "golang.org/x/crypto/bcrypt"
+
+ pb "go.etcd.io/etcd/api/v3/etcdserverpb"
+ "go.etcd.io/etcd/api/v3/version"
+ "go.etcd.io/etcd/pkg/v3/traceutil"
+ "go.etcd.io/etcd/server/v3/auth"
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
+ apply2 "go.etcd.io/etcd/server/v3/etcdserver/apply"
+ "go.etcd.io/etcd/server/v3/etcdserver/errors"
+ "go.etcd.io/etcd/server/v3/etcdserver/txn"
+ "go.etcd.io/etcd/server/v3/features"
+ "go.etcd.io/etcd/server/v3/lease"
+ "go.etcd.io/etcd/server/v3/lease/leasehttp"
+ "go.etcd.io/etcd/server/v3/storage/mvcc"
+ "go.etcd.io/raft/v3"
+)
+
+const (
+ // In the health case, there might be a small gap (10s of entries) between
+ // the applied index and committed index.
+ // However, if the committed entries are very heavy to toApply, the gap might grow.
+ // We should stop accepting new proposals if the gap growing to a certain point.
+ maxGapBetweenApplyAndCommitIndex = 5000
+ traceThreshold = 100 * time.Millisecond
+ readIndexRetryTime = 500 * time.Millisecond
+
+ // The timeout for the node to catch up its applied index, and is used in
+ // lease related operations, such as LeaseRenew and LeaseTimeToLive.
+ applyTimeout = time.Second
+)
+
+type RaftKV interface {
+ Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
+ Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
+ DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
+ Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
+ Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
+}
+
+type Lessor interface {
+ // LeaseGrant sends LeaseGrant request to raft and toApply it after committed.
+ LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
+ // LeaseRevoke sends LeaseRevoke request to raft and toApply it after committed.
+ LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
+
+ // LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
+ // is returned.
+ LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error)
+
+ // LeaseTimeToLive retrieves lease information.
+ LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error)
+
+ // LeaseLeases lists all leases.
+ LeaseLeases(ctx context.Context, r *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error)
+}
+
+type Authenticator interface {
+ AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error)
+ AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error)
+ AuthStatus(ctx context.Context, r *pb.AuthStatusRequest) (*pb.AuthStatusResponse, error)
+ Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error)
+ UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
+ UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
+ UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
+ UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
+ UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
+ UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
+ RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
+ RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
+ RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
+ RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
+ RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
+ UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
+ RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
+}
+
+func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
+ trace := traceutil.New("range",
+ s.Logger(),
+ traceutil.Field{Key: "range_begin", Value: string(r.Key)},
+ traceutil.Field{Key: "range_end", Value: string(r.RangeEnd)},
+ )
+ ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
+
+ var resp *pb.RangeResponse
+ var err error
+ defer func(start time.Time) {
+ txn.WarnOfExpensiveReadOnlyRangeRequest(s.Logger(), s.Cfg.WarningApplyDuration, start, r, resp, err)
+ if resp != nil {
+ trace.AddField(
+ traceutil.Field{Key: "response_count", Value: len(resp.Kvs)},
+ traceutil.Field{Key: "response_revision", Value: resp.Header.Revision},
+ )
+ }
+ trace.LogIfLong(traceThreshold)
+ }(time.Now())
+
+ if !r.Serializable {
+ err = s.linearizableReadNotify(ctx)
+ trace.Step("agreement among raft nodes before linearized reading")
+ if err != nil {
+ return nil, err
+ }
+ }
+ chk := func(ai *auth.AuthInfo) error {
+ return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
+ }
+
+ get := func() { resp, _, err = txn.Range(ctx, s.Logger(), s.KV(), r) }
+ if serr := s.doSerialize(ctx, chk, get); serr != nil {
+ err = serr
+ return nil, err
+ }
+ return resp, err
+}
+
+func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
+ ctx = context.WithValue(ctx, traceutil.StartTimeKey{}, time.Now())
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Put: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.PutResponse), nil
+}
+
+func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.DeleteRangeResponse), nil
+}
+
+func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
+ if txn.IsTxnReadonly(r) {
+ trace := traceutil.New("transaction",
+ s.Logger(),
+ traceutil.Field{Key: "read_only", Value: true},
+ )
+ ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
+ if !txn.IsTxnSerializable(r) {
+ err := s.linearizableReadNotify(ctx)
+ trace.Step("agreement among raft nodes before linearized reading")
+ if err != nil {
+ return nil, err
+ }
+ }
+ var resp *pb.TxnResponse
+ var err error
+ chk := func(ai *auth.AuthInfo) error {
+ return txn.CheckTxnAuth(s.authStore, ai, r)
+ }
+
+ defer func(start time.Time) {
+ txn.WarnOfExpensiveReadOnlyTxnRequest(s.Logger(), s.Cfg.WarningApplyDuration, start, r, resp, err)
+ trace.LogIfLong(traceThreshold)
+ }(time.Now())
+
+ get := func() {
+ resp, _, err = txn.Txn(ctx, s.Logger(), r, s.Cfg.ServerFeatureGate.Enabled(features.TxnModeWriteWithSharedBuffer), s.KV(), s.lessor)
+ }
+ if serr := s.doSerialize(ctx, chk, get); serr != nil {
+ return nil, serr
+ }
+ return resp, err
+ }
+
+ ctx = context.WithValue(ctx, traceutil.StartTimeKey{}, time.Now())
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Txn: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.TxnResponse), nil
+}
+
+func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
+ startTime := time.Now()
+ result, err := s.processInternalRaftRequestOnce(ctx, pb.InternalRaftRequest{Compaction: r})
+ trace := traceutil.TODO()
+ if result != nil && result.Trace != nil {
+ trace = result.Trace
+ defer func() {
+ trace.LogIfLong(traceThreshold)
+ }()
+ applyStart := result.Trace.GetStartTime()
+ result.Trace.SetStartTime(startTime)
+ trace.InsertStep(0, applyStart, "process raft request")
+ }
+ if r.Physical && result != nil && result.Physc != nil {
+ <-result.Physc
+ // The compaction is done deleting keys; the hash is now settled
+ // but the data is not necessarily committed. If there's a crash,
+ // the hash may revert to a hash prior to compaction completing
+ // if the compaction resumes. Force the finished compaction to
+ // commit so it won't resume following a crash.
+ //
+ // `applySnapshot` sets a new backend instance, so we need to acquire the bemu lock.
+ s.bemu.RLock()
+ s.be.ForceCommit()
+ s.bemu.RUnlock()
+ trace.Step("physically toApply compaction")
+ }
+ if err != nil {
+ return nil, err
+ }
+ if result.Err != nil {
+ return nil, result.Err
+ }
+ resp := result.Resp.(*pb.CompactionResponse)
+ if resp == nil {
+ resp = &pb.CompactionResponse{}
+ }
+ if resp.Header == nil {
+ resp.Header = &pb.ResponseHeader{}
+ }
+ resp.Header.Revision = s.kv.Rev()
+ trace.AddField(traceutil.Field{Key: "response_revision", Value: resp.Header.Revision})
+ return resp, nil
+}
+
+func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
+ // no id given? choose one
+ for r.ID == int64(lease.NoLease) {
+ // only use positive int64 id's
+ r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
+ }
+ resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseGrant: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.LeaseGrantResponse), nil
+}
+
+func (s *EtcdServer) waitAppliedIndex() error {
+ select {
+ case <-s.ApplyWait():
+ case <-s.stopping:
+ return errors.ErrStopped
+ case <-time.After(applyTimeout):
+ return errors.ErrTimeoutWaitAppliedIndex
+ }
+
+ return nil
+}
+
+func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
+ resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.LeaseRevokeResponse), nil
+}
+
+func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
+ if s.isLeader() {
+ // If s.isLeader() returns true, but we fail to ensure the current
+ // member's leadership, there are a couple of possibilities:
+ // 1. current member gets stuck on writing WAL entries;
+ // 2. current member is in network isolation status;
+ // 3. current member isn't a leader anymore (possibly due to #1 above).
+ // In such case, we just return error to client, so that the client can
+ // switch to another member to continue the lease keep-alive operation.
+ if !s.ensureLeadership() {
+ return -1, lease.ErrNotPrimary
+ }
+ if err := s.waitAppliedIndex(); err != nil {
+ return 0, err
+ }
+
+ ttl, err := s.lessor.Renew(id)
+ if err == nil { // already requested to primary lessor(leader)
+ return ttl, nil
+ }
+ if !errorspkg.Is(err, lease.ErrNotPrimary) {
+ return -1, err
+ }
+ }
+
+ cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
+ defer cancel()
+
+ // renewals don't go through raft; forward to leader manually
+ for cctx.Err() == nil {
+ leader, lerr := s.waitLeader(cctx)
+ if lerr != nil {
+ return -1, lerr
+ }
+ for _, url := range leader.PeerURLs {
+ lurl := url + leasehttp.LeasePrefix
+ ttl, err := leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
+ if err == nil || errorspkg.Is(err, lease.ErrLeaseNotFound) {
+ return ttl, err
+ }
+ }
+ // Throttle in case of e.g. connection problems.
+ time.Sleep(50 * time.Millisecond)
+ }
+
+ if errorspkg.Is(cctx.Err(), context.DeadlineExceeded) {
+ return -1, errors.ErrTimeout
+ }
+ return -1, errors.ErrCanceled
+}
+
+func (s *EtcdServer) checkLeaseTimeToLive(ctx context.Context, leaseID lease.LeaseID) (uint64, error) {
+ rev := s.AuthStore().Revision()
+ if !s.AuthStore().IsAuthEnabled() {
+ return rev, nil
+ }
+ authInfo, err := s.AuthInfoFromCtx(ctx)
+ if err != nil {
+ return rev, err
+ }
+ if authInfo == nil {
+ return rev, auth.ErrUserEmpty
+ }
+
+ l := s.lessor.Lookup(leaseID)
+ if l != nil {
+ for _, key := range l.Keys() {
+ if err := s.AuthStore().IsRangePermitted(authInfo, []byte(key), []byte{}); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ return rev, nil
+}
+
+func (s *EtcdServer) leaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
+ if s.isLeader() {
+ if err := s.waitAppliedIndex(); err != nil {
+ return nil, err
+ }
+
+ // gofail: var beforeLookupWhenLeaseTimeToLive struct{}
+
+ // primary; timetolive directly from leader
+ le := s.lessor.Lookup(lease.LeaseID(r.ID))
+ if le == nil {
+ return nil, lease.ErrLeaseNotFound
+ }
+ // TODO: fill out ResponseHeader
+ resp := &pb.LeaseTimeToLiveResponse{Header: &pb.ResponseHeader{}, ID: r.ID, TTL: int64(le.Remaining().Seconds()), GrantedTTL: le.TTL()}
+ if r.Keys {
+ ks := le.Keys()
+ kbs := make([][]byte, len(ks))
+ for i := range ks {
+ kbs[i] = []byte(ks[i])
+ }
+ resp.Keys = kbs
+ }
+
+ // The leasor could be demoted if leader changed during lookup.
+ // We should return error to force retry instead of returning
+ // incorrect remaining TTL.
+ if le.Demoted() {
+ // NOTE: lease.ErrNotPrimary is not retryable error for
+ // client. Instead, uses ErrLeaderChanged.
+ return nil, errors.ErrLeaderChanged
+ }
+ return resp, nil
+ }
+
+ cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
+ defer cancel()
+
+ // forward to leader
+ for cctx.Err() == nil {
+ leader, err := s.waitLeader(cctx)
+ if err != nil {
+ return nil, err
+ }
+ for _, url := range leader.PeerURLs {
+ lurl := url + leasehttp.LeaseInternalPrefix
+ resp, err := leasehttp.TimeToLiveHTTP(cctx, lease.LeaseID(r.ID), r.Keys, lurl, s.peerRt)
+ if err == nil {
+ return resp.LeaseTimeToLiveResponse, nil
+ }
+ if errorspkg.Is(err, lease.ErrLeaseNotFound) {
+ return nil, err
+ }
+ }
+ }
+
+ if errorspkg.Is(cctx.Err(), context.DeadlineExceeded) {
+ return nil, errors.ErrTimeout
+ }
+ return nil, errors.ErrCanceled
+}
+
+func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
+ var rev uint64
+ var err error
+ if r.Keys {
+ // check RBAC permission only if Keys is true
+ rev, err = s.checkLeaseTimeToLive(ctx, lease.LeaseID(r.ID))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ resp, err := s.leaseTimeToLive(ctx, r)
+ if err != nil {
+ return nil, err
+ }
+
+ if r.Keys {
+ if s.AuthStore().IsAuthEnabled() && rev != s.AuthStore().Revision() {
+ return nil, auth.ErrAuthOldRevision
+ }
+ }
+ return resp, nil
+}
+
+func (s *EtcdServer) newHeader() *pb.ResponseHeader {
+ return &pb.ResponseHeader{
+ ClusterId: uint64(s.cluster.ID()),
+ MemberId: uint64(s.MemberID()),
+ Revision: s.KV().Rev(),
+ RaftTerm: s.Term(),
+ }
+}
+
+// LeaseLeases is really ListLeases !???
+func (s *EtcdServer) LeaseLeases(_ context.Context, _ *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) {
+ ls := s.lessor.Leases()
+ lss := make([]*pb.LeaseStatus, len(ls))
+ for i := range ls {
+ lss[i] = &pb.LeaseStatus{ID: int64(ls[i].ID)}
+ }
+ return &pb.LeaseLeasesResponse{Header: s.newHeader(), Leases: lss}, nil
+}
+
+func (s *EtcdServer) waitLeader(ctx context.Context) (*membership.Member, error) {
+ leader := s.cluster.Member(s.Leader())
+ for leader == nil {
+ // wait an election
+ dur := time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond
+ select {
+ case <-time.After(dur):
+ leader = s.cluster.Member(s.Leader())
+ case <-s.stopping:
+ return nil, errors.ErrStopped
+ case <-ctx.Done():
+ return nil, errors.ErrNoLeader
+ }
+ }
+ if len(leader.PeerURLs) == 0 {
+ return nil, errors.ErrNoLeader
+ }
+ return leader, nil
+}
+
+func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
+ resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{Alarm: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AlarmResponse), nil
+}
+
+func (s *EtcdServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
+ resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{AuthEnable: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthEnableResponse), nil
+}
+
+func (s *EtcdServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthDisable: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthDisableResponse), nil
+}
+
+func (s *EtcdServer) AuthStatus(ctx context.Context, r *pb.AuthStatusRequest) (*pb.AuthStatusResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthStatus: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthStatusResponse), nil
+}
+
+func (s *EtcdServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
+ if err := s.linearizableReadNotify(ctx); err != nil {
+ return nil, err
+ }
+
+ lg := s.Logger()
+
+ // fix https://nvd.nist.gov/vuln/detail/CVE-2021-28235
+ defer func() {
+ if r != nil {
+ r.Password = ""
+ }
+ }()
+
+ var resp proto.Message
+ for {
+ checkedRevision, err := s.AuthStore().CheckPassword(r.Name, r.Password)
+ if err != nil {
+ if !errorspkg.Is(err, auth.ErrAuthNotEnabled) {
+ lg.Warn(
+ "invalid authentication was requested",
+ zap.String("user", r.Name),
+ zap.Error(err),
+ )
+ }
+ return nil, err
+ }
+
+ st, err := s.AuthStore().GenTokenPrefix()
+ if err != nil {
+ return nil, err
+ }
+
+ // internalReq doesn't need to have Password because the above s.AuthStore().CheckPassword() already did it.
+ // In addition, it will let a WAL entry not record password as a plain text.
+ internalReq := &pb.InternalAuthenticateRequest{
+ Name: r.Name,
+ SimpleToken: st,
+ }
+
+ resp, err = s.raftRequestOnce(ctx, pb.InternalRaftRequest{Authenticate: internalReq})
+ if err != nil {
+ return nil, err
+ }
+ if checkedRevision == s.AuthStore().Revision() {
+ break
+ }
+
+ lg.Info("revision when password checked became stale; retrying")
+ }
+
+ return resp.(*pb.AuthenticateResponse), nil
+}
+
+func (s *EtcdServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
+ if r.Options == nil || !r.Options.NoPassword {
+ hashedPassword, err := bcrypt.GenerateFromPassword([]byte(r.Password), s.authStore.BcryptCost())
+ if err != nil {
+ return nil, err
+ }
+ r.HashedPassword = base64.StdEncoding.EncodeToString(hashedPassword)
+ r.Password = ""
+ }
+
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserAdd: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserAddResponse), nil
+}
+
+func (s *EtcdServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserDelete: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserDeleteResponse), nil
+}
+
+func (s *EtcdServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
+ if r.Password != "" {
+ hashedPassword, err := bcrypt.GenerateFromPassword([]byte(r.Password), s.authStore.BcryptCost())
+ if err != nil {
+ return nil, err
+ }
+ r.HashedPassword = base64.StdEncoding.EncodeToString(hashedPassword)
+ r.Password = ""
+ }
+
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserChangePassword: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserChangePasswordResponse), nil
+}
+
+func (s *EtcdServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGrantRole: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserGrantRoleResponse), nil
+}
+
+func (s *EtcdServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGet: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserGetResponse), nil
+}
+
+func (s *EtcdServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserList: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserListResponse), nil
+}
+
+func (s *EtcdServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserRevokeRole: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthUserRevokeRoleResponse), nil
+}
+
+func (s *EtcdServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleAdd: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthRoleAddResponse), nil
+}
+
+func (s *EtcdServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGrantPermission: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthRoleGrantPermissionResponse), nil
+}
+
+func (s *EtcdServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGet: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthRoleGetResponse), nil
+}
+
+func (s *EtcdServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleList: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthRoleListResponse), nil
+}
+
+func (s *EtcdServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleRevokePermission: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthRoleRevokePermissionResponse), nil
+}
+
+func (s *EtcdServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
+ resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleDelete: r})
+ if err != nil {
+ return nil, err
+ }
+ return resp.(*pb.AuthRoleDeleteResponse), nil
+}
+
+func (s *EtcdServer) raftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
+ result, err := s.processInternalRaftRequestOnce(ctx, r)
+ if err != nil {
+ return nil, err
+ }
+ if result.Err != nil {
+ return nil, result.Err
+ }
+ if startTime, ok := ctx.Value(traceutil.StartTimeKey{}).(time.Time); ok && result.Trace != nil {
+ applyStart := result.Trace.GetStartTime()
+ // The trace object is created in toApply. Here reset the start time to trace
+ // the raft request time by the difference between the request start time
+ // and toApply start time
+ result.Trace.SetStartTime(startTime)
+ result.Trace.InsertStep(0, applyStart, "process raft request")
+ result.Trace.LogIfLong(traceThreshold)
+ }
+ return result.Resp, nil
+}
+
+func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
+ return s.raftRequestOnce(ctx, r)
+}
+
+// doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure.
+func (s *EtcdServer) doSerialize(ctx context.Context, chk func(*auth.AuthInfo) error, get func()) error {
+ trace := traceutil.Get(ctx)
+ ai, err := s.AuthInfoFromCtx(ctx)
+ if err != nil {
+ return err
+ }
+ if ai == nil {
+ // chk expects non-nil AuthInfo; use empty credentials
+ ai = &auth.AuthInfo{}
+ }
+ if err = chk(ai); err != nil {
+ return err
+ }
+ trace.Step("get authentication metadata")
+ // fetch response for serialized request
+ get()
+ // check for stale token revision in case the auth store was updated while
+ // the request has been handled.
+ if ai.Revision != 0 && ai.Revision != s.authStore.Revision() {
+ return auth.ErrAuthOldRevision
+ }
+ return nil
+}
+
+func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (*apply2.Result, error) {
+ ai := s.getAppliedIndex()
+ ci := s.getCommittedIndex()
+ if ci > ai+maxGapBetweenApplyAndCommitIndex {
+ return nil, errors.ErrTooManyRequests
+ }
+
+ r.Header = &pb.RequestHeader{
+ ID: s.reqIDGen.Next(),
+ }
+
+ // check authinfo if it is not InternalAuthenticateRequest
+ if r.Authenticate == nil {
+ authInfo, err := s.AuthInfoFromCtx(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if authInfo != nil {
+ r.Header.Username = authInfo.Username
+ r.Header.AuthRevision = authInfo.Revision
+ }
+ }
+
+ data, err := r.Marshal()
+ if err != nil {
+ return nil, err
+ }
+
+ if len(data) > int(s.Cfg.MaxRequestBytes) {
+ return nil, errors.ErrRequestTooLarge
+ }
+
+ id := r.ID
+ if id == 0 {
+ id = r.Header.ID
+ }
+ ch := s.w.Register(id)
+
+ cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
+ defer cancel()
+
+ start := time.Now()
+ err = s.r.Propose(cctx, data)
+ if err != nil {
+ proposalsFailed.Inc()
+ s.w.Trigger(id, nil) // GC wait
+ return nil, err
+ }
+ proposalsPending.Inc()
+ defer proposalsPending.Dec()
+
+ select {
+ case x := <-ch:
+ return x.(*apply2.Result), nil
+ case <-cctx.Done():
+ proposalsFailed.Inc()
+ s.w.Trigger(id, nil) // GC wait
+ return nil, s.parseProposeCtxErr(cctx.Err(), start)
+ case <-s.done:
+ return nil, errors.ErrStopped
+ }
+}
+
+// Watchable returns a watchable interface attached to the etcdserver.
+func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }
+
+func (s *EtcdServer) linearizableReadLoop() {
+ for {
+ requestID := s.reqIDGen.Next()
+ leaderChangedNotifier := s.leaderChanged.Receive()
+ select {
+ case <-leaderChangedNotifier:
+ continue
+ case <-s.readwaitc:
+ case <-s.stopping:
+ return
+ }
+
+ // as a single loop is can unlock multiple reads, it is not very useful
+ // to propagate the trace from Txn or Range.
+ trace := traceutil.New("linearizableReadLoop", s.Logger())
+
+ nextnr := newNotifier()
+ s.readMu.Lock()
+ nr := s.readNotifier
+ s.readNotifier = nextnr
+ s.readMu.Unlock()
+
+ confirmedIndex, err := s.requestCurrentIndex(leaderChangedNotifier, requestID)
+ if isStopped(err) {
+ return
+ }
+ if err != nil {
+ nr.notify(err)
+ continue
+ }
+
+ trace.Step("read index received")
+
+ trace.AddField(traceutil.Field{Key: "readStateIndex", Value: confirmedIndex})
+
+ appliedIndex := s.getAppliedIndex()
+ trace.AddField(traceutil.Field{Key: "appliedIndex", Value: strconv.FormatUint(appliedIndex, 10)})
+
+ if appliedIndex < confirmedIndex {
+ select {
+ case <-s.applyWait.Wait(confirmedIndex):
+ case <-s.stopping:
+ return
+ }
+ }
+ // unblock all l-reads requested at indices before confirmedIndex
+ nr.notify(nil)
+ trace.Step("applied index is now lower than readState.Index")
+
+ trace.LogAllStepsIfLong(traceThreshold)
+ }
+}
+
+func isStopped(err error) bool {
+ return errorspkg.Is(err, raft.ErrStopped) || errorspkg.Is(err, errors.ErrStopped)
+}
+
+func (s *EtcdServer) requestCurrentIndex(leaderChangedNotifier <-chan struct{}, requestID uint64) (uint64, error) {
+ err := s.sendReadIndex(requestID)
+ if err != nil {
+ return 0, err
+ }
+
+ lg := s.Logger()
+ errorTimer := time.NewTimer(s.Cfg.ReqTimeout())
+ defer errorTimer.Stop()
+ retryTimer := time.NewTimer(readIndexRetryTime)
+ defer retryTimer.Stop()
+
+ firstCommitInTermNotifier := s.firstCommitInTerm.Receive()
+
+ for {
+ select {
+ case rs := <-s.r.readStateC:
+ requestIDBytes := uint64ToBigEndianBytes(requestID)
+ gotOwnResponse := bytes.Equal(rs.RequestCtx, requestIDBytes)
+ if !gotOwnResponse {
+ // a previous request might time out. now we should ignore the response of it and
+ // continue waiting for the response of the current requests.
+ responseID := uint64(0)
+ if len(rs.RequestCtx) == 8 {
+ responseID = binary.BigEndian.Uint64(rs.RequestCtx)
+ }
+ lg.Warn(
+ "ignored out-of-date read index response; local node read indexes queueing up and waiting to be in sync with leader",
+ zap.Uint64("sent-request-id", requestID),
+ zap.Uint64("received-request-id", responseID),
+ )
+ slowReadIndex.Inc()
+ continue
+ }
+ return rs.Index, nil
+ case <-leaderChangedNotifier:
+ readIndexFailed.Inc()
+ // return a retryable error.
+ return 0, errors.ErrLeaderChanged
+ case <-firstCommitInTermNotifier:
+ firstCommitInTermNotifier = s.firstCommitInTerm.Receive()
+ lg.Info("first commit in current term: resending ReadIndex request")
+ err := s.sendReadIndex(requestID)
+ if err != nil {
+ return 0, err
+ }
+ retryTimer.Reset(readIndexRetryTime)
+ continue
+ case <-retryTimer.C:
+ lg.Warn(
+ "waiting for ReadIndex response took too long, retrying",
+ zap.Uint64("sent-request-id", requestID),
+ zap.Duration("retry-timeout", readIndexRetryTime),
+ )
+ err := s.sendReadIndex(requestID)
+ if err != nil {
+ return 0, err
+ }
+ retryTimer.Reset(readIndexRetryTime)
+ continue
+ case <-errorTimer.C:
+ lg.Warn(
+ "timed out waiting for read index response (local node might have slow network)",
+ zap.Duration("timeout", s.Cfg.ReqTimeout()),
+ )
+ slowReadIndex.Inc()
+ return 0, errors.ErrTimeout
+ case <-s.stopping:
+ return 0, errors.ErrStopped
+ }
+ }
+}
+
+func uint64ToBigEndianBytes(number uint64) []byte {
+ byteResult := make([]byte, 8)
+ binary.BigEndian.PutUint64(byteResult, number)
+ return byteResult
+}
+
+func (s *EtcdServer) sendReadIndex(requestIndex uint64) error {
+ ctxToSend := uint64ToBigEndianBytes(requestIndex)
+
+ cctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
+ err := s.r.ReadIndex(cctx, ctxToSend)
+ cancel()
+ if errorspkg.Is(err, raft.ErrStopped) {
+ return err
+ }
+ if err != nil {
+ lg := s.Logger()
+ lg.Warn("failed to get read index from Raft", zap.Error(err))
+ readIndexFailed.Inc()
+ return err
+ }
+ return nil
+}
+
+func (s *EtcdServer) LinearizableReadNotify(ctx context.Context) error {
+ return s.linearizableReadNotify(ctx)
+}
+
+func (s *EtcdServer) linearizableReadNotify(ctx context.Context) error {
+ s.readMu.RLock()
+ nc := s.readNotifier
+ s.readMu.RUnlock()
+
+ // signal linearizable loop for current notify if it hasn't been already
+ select {
+ case s.readwaitc <- struct{}{}:
+ default:
+ }
+
+ // wait for read state notification
+ select {
+ case <-nc.c:
+ return nc.err
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-s.done:
+ return errors.ErrStopped
+ }
+}
+
+func (s *EtcdServer) AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error) {
+ authInfo, err := s.AuthStore().AuthInfoFromCtx(ctx)
+ if authInfo != nil || err != nil {
+ return authInfo, err
+ }
+ if !s.Cfg.ClientCertAuthEnabled {
+ return nil, nil
+ }
+ authInfo = s.AuthStore().AuthInfoFromTLS(ctx)
+ return authInfo, nil
+}
+
+func (s *EtcdServer) Downgrade(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) {
+ switch r.Action {
+ case pb.DowngradeRequest_VALIDATE:
+ return s.downgradeValidate(ctx, r.Version)
+ case pb.DowngradeRequest_ENABLE:
+ return s.downgradeEnable(ctx, r)
+ case pb.DowngradeRequest_CANCEL:
+ return s.downgradeCancel(ctx)
+ default:
+ return nil, errors.ErrUnknownMethod
+ }
+}
+
+func (s *EtcdServer) downgradeValidate(ctx context.Context, v string) (*pb.DowngradeResponse, error) {
+ resp := &pb.DowngradeResponse{}
+
+ targetVersion, err := convertToClusterVersion(v)
+ if err != nil {
+ return nil, err
+ }
+
+ cv := s.ClusterVersion()
+ if cv == nil {
+ return nil, errors.ErrClusterVersionUnavailable
+ }
+ resp.Version = version.Cluster(cv.String())
+ err = s.Version().DowngradeValidate(ctx, targetVersion)
+ if err != nil {
+ return nil, err
+ }
+
+ return resp, nil
+}
+
+func (s *EtcdServer) downgradeEnable(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) {
+ lg := s.Logger()
+ targetVersion, err := convertToClusterVersion(r.Version)
+ if err != nil {
+ lg.Warn("reject downgrade request", zap.Error(err))
+ return nil, err
+ }
+ err = s.Version().DowngradeEnable(ctx, targetVersion)
+ if err != nil {
+ lg.Warn("reject downgrade request", zap.Error(err))
+ return nil, err
+ }
+ resp := pb.DowngradeResponse{Version: version.Cluster(s.ClusterVersion().String())}
+ return &resp, nil
+}
+
+func (s *EtcdServer) downgradeCancel(ctx context.Context) (*pb.DowngradeResponse, error) {
+ err := s.Version().DowngradeCancel(ctx)
+ if err != nil {
+ s.lg.Warn("failed to cancel downgrade", zap.Error(err))
+ }
+ resp := pb.DowngradeResponse{Version: version.Cluster(s.ClusterVersion().String())}
+ return &resp, nil
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/doc.go
new file mode 100644
index 0000000..c34f905
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/doc.go
@@ -0,0 +1,16 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package version provides functions for getting/saving storage version.
+package version
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/downgrade.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/downgrade.go
new file mode 100644
index 0000000..f2c6e11
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/downgrade.go
@@ -0,0 +1,76 @@
+// Copyright 2020 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package version
+
+import (
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+)
+
+type DowngradeInfo struct {
+ // TargetVersion is the target downgrade version, if the cluster is not under downgrading,
+ // the targetVersion will be an empty string
+ TargetVersion string `json:"target-version"`
+ // Enabled indicates whether the cluster is enabled to downgrade
+ Enabled bool `json:"enabled"`
+}
+
+func (d *DowngradeInfo) GetTargetVersion() *semver.Version {
+ return semver.Must(semver.NewVersion(d.TargetVersion))
+}
+
+// isValidDowngrade verifies whether the cluster can be downgraded from verFrom to verTo
+func isValidDowngrade(verFrom *semver.Version, verTo *semver.Version) bool {
+ return verTo.Equal(*allowedDowngradeVersion(verFrom))
+}
+
+// MustDetectDowngrade will detect local server joining cluster that doesn't support it's version.
+func MustDetectDowngrade(lg *zap.Logger, sv, cv *semver.Version) {
+ // only keep major.minor version for comparison against cluster version
+ sv = &semver.Version{Major: sv.Major, Minor: sv.Minor}
+
+ // if the cluster disables downgrade, check local version against determined cluster version.
+ // the validation passes when local version is not less than cluster version
+ if cv != nil && sv.LessThan(*cv) {
+ lg.Panic(
+ "invalid downgrade; server version is lower than determined cluster version",
+ zap.String("current-server-version", sv.String()),
+ zap.String("determined-cluster-version", version.Cluster(cv.String())),
+ )
+ }
+}
+
+func allowedDowngradeVersion(ver *semver.Version) *semver.Version {
+ // Todo: handle the case that downgrading from higher major version(e.g. downgrade from v4.0 to v3.x)
+ return &semver.Version{Major: ver.Major, Minor: ver.Minor - 1}
+}
+
+// IsValidClusterVersionChange checks the two scenario when version is valid to change:
+// 1. Downgrade: cluster version is 1 minor version higher than local version,
+// cluster version should change.
+// 2. Cluster start: when not all members version are available, cluster version
+// is set to MinVersion(3.0), when all members are at higher version, cluster version
+// is lower than minimal server version, cluster version should change
+func IsValidClusterVersionChange(verFrom *semver.Version, verTo *semver.Version) bool {
+ verFrom = &semver.Version{Major: verFrom.Major, Minor: verFrom.Minor}
+ verTo = &semver.Version{Major: verTo.Major, Minor: verTo.Minor}
+
+ if isValidDowngrade(verFrom, verTo) || (verFrom.Major == verTo.Major && verFrom.LessThan(*verTo)) {
+ return true
+ }
+ return false
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/errors.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/errors.go
new file mode 100644
index 0000000..906aa9f
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/errors.go
@@ -0,0 +1,23 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package version
+
+import "errors"
+
+var (
+ ErrInvalidDowngradeTargetVersion = errors.New("etcdserver: invalid downgrade target version")
+ ErrDowngradeInProcess = errors.New("etcdserver: cluster has a downgrade job in progress")
+ ErrNoInflightDowngrade = errors.New("etcdserver: no inflight downgrade job")
+)
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/monitor.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/monitor.go
new file mode 100644
index 0000000..b3e7f58
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/monitor.go
@@ -0,0 +1,221 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package version
+
+import (
+ "context"
+ "errors"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+
+ "go.etcd.io/etcd/api/v3/version"
+)
+
+// Monitor contains logic used by cluster leader to monitor version changes and decide on cluster version or downgrade progress.
+type Monitor struct {
+ lg *zap.Logger
+ s Server
+}
+
+// Server lists EtcdServer methods needed by Monitor
+type Server interface {
+ GetClusterVersion() *semver.Version
+ GetDowngradeInfo() *DowngradeInfo
+ GetMembersVersions() map[string]*version.Versions
+ UpdateClusterVersion(string)
+ LinearizableReadNotify(ctx context.Context) error
+ DowngradeEnable(ctx context.Context, targetVersion *semver.Version) error
+ DowngradeCancel(ctx context.Context) error
+
+ GetStorageVersion() *semver.Version
+ UpdateStorageVersion(semver.Version) error
+}
+
+func NewMonitor(lg *zap.Logger, storage Server) *Monitor {
+ return &Monitor{
+ lg: lg,
+ s: storage,
+ }
+}
+
+// UpdateClusterVersionIfNeeded updates the cluster version.
+func (m *Monitor) UpdateClusterVersionIfNeeded() error {
+ newClusterVersion, err := m.decideClusterVersion()
+ if newClusterVersion != nil {
+ newClusterVersion = &semver.Version{Major: newClusterVersion.Major, Minor: newClusterVersion.Minor}
+ m.s.UpdateClusterVersion(newClusterVersion.String())
+ }
+ return err
+}
+
+// decideClusterVersion decides whether to change cluster version and its next value.
+// New cluster version is based on the members versions server and whether cluster is downgrading.
+// Returns nil if cluster version should be left unchanged.
+func (m *Monitor) decideClusterVersion() (*semver.Version, error) {
+ clusterVersion := m.s.GetClusterVersion()
+ minimalServerVersion := m.membersMinimalServerVersion()
+ if clusterVersion == nil {
+ if minimalServerVersion != nil {
+ return minimalServerVersion, nil
+ }
+ return semver.New(version.MinClusterVersion), nil
+ }
+ if minimalServerVersion == nil {
+ return nil, nil
+ }
+ downgrade := m.s.GetDowngradeInfo()
+ if downgrade != nil && downgrade.Enabled {
+ if downgrade.GetTargetVersion().Equal(*clusterVersion) {
+ return nil, nil
+ }
+ if !isValidDowngrade(clusterVersion, downgrade.GetTargetVersion()) {
+ m.lg.Error("Cannot downgrade from cluster-version to downgrade-target",
+ zap.String("downgrade-target", downgrade.TargetVersion),
+ zap.String("cluster-version", clusterVersion.String()),
+ )
+ return nil, errors.New("invalid downgrade target")
+ }
+ if !isValidDowngrade(minimalServerVersion, downgrade.GetTargetVersion()) {
+ m.lg.Error("Cannot downgrade from minimal-server-version to downgrade-target",
+ zap.String("downgrade-target", downgrade.TargetVersion),
+ zap.String("minimal-server-version", minimalServerVersion.String()),
+ )
+ return nil, errors.New("invalid downgrade target")
+ }
+ return downgrade.GetTargetVersion(), nil
+ }
+ if clusterVersion.LessThan(*minimalServerVersion) && IsValidClusterVersionChange(clusterVersion, minimalServerVersion) {
+ return minimalServerVersion, nil
+ }
+ return nil, nil
+}
+
+// UpdateStorageVersionIfNeeded updates the storage version if it differs from cluster version.
+func (m *Monitor) UpdateStorageVersionIfNeeded() {
+ cv := m.s.GetClusterVersion()
+ if cv == nil || cv.String() == version.MinClusterVersion {
+ return
+ }
+ sv := m.s.GetStorageVersion()
+
+ if sv == nil || sv.Major != cv.Major || sv.Minor != cv.Minor {
+ if sv != nil {
+ m.lg.Info("cluster version differs from storage version.", zap.String("cluster-version", cv.String()), zap.String("storage-version", sv.String()))
+ }
+ err := m.s.UpdateStorageVersion(semver.Version{Major: cv.Major, Minor: cv.Minor})
+ if err != nil {
+ m.lg.Error("failed to update storage version", zap.String("cluster-version", cv.String()), zap.Error(err))
+ return
+ }
+ d := m.s.GetDowngradeInfo()
+ if d != nil && d.Enabled {
+ m.lg.Info(
+ "The server is ready to downgrade",
+ zap.String("target-version", d.TargetVersion),
+ zap.String("server-version", version.Version),
+ )
+ }
+ }
+}
+
+func (m *Monitor) CancelDowngradeIfNeeded() {
+ d := m.s.GetDowngradeInfo()
+ if d == nil || !d.Enabled {
+ return
+ }
+
+ targetVersion := d.TargetVersion
+ v := semver.Must(semver.NewVersion(targetVersion))
+ if m.versionsMatchTarget(v) {
+ m.lg.Info("the cluster has been downgraded", zap.String("cluster-version", targetVersion))
+ err := m.s.DowngradeCancel(context.Background())
+ if err != nil {
+ m.lg.Warn("failed to cancel downgrade", zap.Error(err))
+ }
+ }
+}
+
+// membersMinimalServerVersion returns the min server version in the map, or nil if the min
+// version in unknown.
+// It prints out log if there is a member with a higher version than the
+// local version.
+func (m *Monitor) membersMinimalServerVersion() *semver.Version {
+ vers := m.s.GetMembersVersions()
+ var minV *semver.Version
+ lv := semver.Must(semver.NewVersion(version.Version))
+
+ for mid, ver := range vers {
+ if ver == nil {
+ return nil
+ }
+ v, err := semver.NewVersion(ver.Server)
+ if err != nil {
+ m.lg.Warn(
+ "failed to parse server version of remote member",
+ zap.String("remote-member-id", mid),
+ zap.String("remote-member-version", ver.Server),
+ zap.Error(err),
+ )
+ return nil
+ }
+ if lv.LessThan(*v) {
+ m.lg.Warn(
+ "leader found higher-versioned member",
+ zap.String("local-member-version", lv.String()),
+ zap.String("remote-member-id", mid),
+ zap.String("remote-member-version", ver.Server),
+ )
+ }
+ if minV == nil {
+ minV = v
+ } else if v.LessThan(*minV) {
+ minV = v
+ }
+ }
+ return minV
+}
+
+// versionsMatchTarget returns true if all server versions are equal to target version, otherwise return false.
+// It can be used to decide the whether the cluster finishes downgrading to target version.
+func (m *Monitor) versionsMatchTarget(targetVersion *semver.Version) bool {
+ vers := m.s.GetMembersVersions()
+ targetVersion = &semver.Version{Major: targetVersion.Major, Minor: targetVersion.Minor}
+ for mid, ver := range vers {
+ if ver == nil {
+ return false
+ }
+ v, err := semver.NewVersion(ver.Server)
+ if err != nil {
+ m.lg.Warn(
+ "failed to parse server version of remote member",
+ zap.String("remote-member-id", mid),
+ zap.String("remote-member-version", ver.Server),
+ zap.Error(err),
+ )
+ return false
+ }
+ v = &semver.Version{Major: v.Major, Minor: v.Minor}
+ if !targetVersion.Equal(*v) {
+ m.lg.Warn("remotes server has mismatching etcd version",
+ zap.String("remote-member-id", mid),
+ zap.String("current-server-version", v.String()),
+ zap.String("target-version", targetVersion.String()),
+ )
+ return false
+ }
+ }
+ return true
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/version.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/version.go
new file mode 100644
index 0000000..0a2f99a
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/version/version.go
@@ -0,0 +1,81 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package version
+
+import (
+ "context"
+
+ "github.com/coreos/go-semver/semver"
+ "go.uber.org/zap"
+)
+
+// Manager contains logic to manage etcd cluster version downgrade process.
+type Manager struct {
+ lg *zap.Logger
+ s Server
+}
+
+// NewManager returns a new manager instance
+func NewManager(lg *zap.Logger, s Server) *Manager {
+ return &Manager{
+ lg: lg,
+ s: s,
+ }
+}
+
+// DowngradeValidate validates if cluster is downloadable to provided target version and returns error if not.
+func (m *Manager) DowngradeValidate(ctx context.Context, targetVersion *semver.Version) error {
+ // gets leaders commit index and wait for local store to finish applying that index
+ // to avoid using stale downgrade information
+ err := m.s.LinearizableReadNotify(ctx)
+ if err != nil {
+ return err
+ }
+ cv := m.s.GetClusterVersion()
+ allowedTargetVersion := allowedDowngradeVersion(cv)
+ if !targetVersion.Equal(*allowedTargetVersion) {
+ return ErrInvalidDowngradeTargetVersion
+ }
+
+ downgradeInfo := m.s.GetDowngradeInfo()
+ if downgradeInfo != nil && downgradeInfo.Enabled {
+ // Todo: return the downgrade status along with the error msg
+ return ErrDowngradeInProcess
+ }
+ return nil
+}
+
+// DowngradeEnable initiates etcd cluster version downgrade process.
+func (m *Manager) DowngradeEnable(ctx context.Context, targetVersion *semver.Version) error {
+ // validate downgrade capability before starting downgrade
+ err := m.DowngradeValidate(ctx, targetVersion)
+ if err != nil {
+ return err
+ }
+ return m.s.DowngradeEnable(ctx, targetVersion)
+}
+
+// DowngradeCancel cancels ongoing downgrade process.
+func (m *Manager) DowngradeCancel(ctx context.Context) error {
+ err := m.s.LinearizableReadNotify(ctx)
+ if err != nil {
+ return err
+ }
+ downgradeInfo := m.s.GetDowngradeInfo()
+ if !downgradeInfo.Enabled {
+ return ErrNoInflightDowngrade
+ }
+ return m.s.DowngradeCancel(ctx)
+}
diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdserver/zap_raft.go b/vendor/go.etcd.io/etcd/server/v3/etcdserver/zap_raft.go
new file mode 100644
index 0000000..7672bdf
--- /dev/null
+++ b/vendor/go.etcd.io/etcd/server/v3/etcdserver/zap_raft.go
@@ -0,0 +1,103 @@
+// Copyright 2018 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package etcdserver
+
+import (
+ "errors"
+
+ "go.uber.org/zap"
+ "go.uber.org/zap/zapcore"
+
+ "go.etcd.io/raft/v3"
+)
+
+// NewRaftLogger builds "raft.Logger" from "*zap.Config".
+func NewRaftLogger(lcfg *zap.Config) (raft.Logger, error) {
+ if lcfg == nil {
+ return nil, errors.New("nil zap.Config")
+ }
+ lg, err := lcfg.Build(zap.AddCallerSkip(1)) // to annotate caller outside of "logutil"
+ if err != nil {
+ return nil, err
+ }
+ return &zapRaftLogger{lg: lg, sugar: lg.Sugar()}, nil
+}
+
+// NewRaftLoggerZap converts "*zap.Logger" to "raft.Logger".
+func NewRaftLoggerZap(lg *zap.Logger) raft.Logger {
+ skipCallerLg := lg.WithOptions(zap.AddCallerSkip(1))
+ return &zapRaftLogger{lg: skipCallerLg, sugar: skipCallerLg.Sugar()}
+}
+
+// NewRaftLoggerFromZapCore creates "raft.Logger" from "zap.Core"
+// and "zapcore.WriteSyncer".
+func NewRaftLoggerFromZapCore(cr zapcore.Core, syncer zapcore.WriteSyncer) raft.Logger {
+ // "AddCallerSkip" to annotate caller outside of "logutil"
+ lg := zap.New(cr, zap.AddCaller(), zap.AddCallerSkip(1), zap.ErrorOutput(syncer))
+ return &zapRaftLogger{lg: lg, sugar: lg.Sugar()}
+}
+
+type zapRaftLogger struct {
+ lg *zap.Logger
+ sugar *zap.SugaredLogger
+}
+
+func (zl *zapRaftLogger) Debug(args ...any) {
+ zl.sugar.Debug(args...)
+}
+
+func (zl *zapRaftLogger) Debugf(format string, args ...any) {
+ zl.sugar.Debugf(format, args...)
+}
+
+func (zl *zapRaftLogger) Error(args ...any) {
+ zl.sugar.Error(args...)
+}
+
+func (zl *zapRaftLogger) Errorf(format string, args ...any) {
+ zl.sugar.Errorf(format, args...)
+}
+
+func (zl *zapRaftLogger) Info(args ...any) {
+ zl.sugar.Info(args...)
+}
+
+func (zl *zapRaftLogger) Infof(format string, args ...any) {
+ zl.sugar.Infof(format, args...)
+}
+
+func (zl *zapRaftLogger) Warning(args ...any) {
+ zl.sugar.Warn(args...)
+}
+
+func (zl *zapRaftLogger) Warningf(format string, args ...any) {
+ zl.sugar.Warnf(format, args...)
+}
+
+func (zl *zapRaftLogger) Fatal(args ...any) {
+ zl.sugar.Fatal(args...)
+}
+
+func (zl *zapRaftLogger) Fatalf(format string, args ...any) {
+ zl.sugar.Fatalf(format, args...)
+}
+
+func (zl *zapRaftLogger) Panic(args ...any) {
+ zl.sugar.Panic(args...)
+}
+
+func (zl *zapRaftLogger) Panicf(format string, args ...any) {
+ zl.sugar.Panicf(format, args...)
+}