[VOL-5486] Fix deprecated versions
Change-Id: I3e03ea246020547ae75fa92ce8cf5cbba7e8f3bb
Signed-off-by: Abhay Kumar <abhay.kumar@radisys.com>
diff --git a/vendor/go.etcd.io/raft/v3/rawnode.go b/vendor/go.etcd.io/raft/v3/rawnode.go
new file mode 100644
index 0000000..a4da2ae
--- /dev/null
+++ b/vendor/go.etcd.io/raft/v3/rawnode.go
@@ -0,0 +1,562 @@
+// Copyright 2015 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package raft
+
+import (
+ "errors"
+
+ pb "go.etcd.io/raft/v3/raftpb"
+ "go.etcd.io/raft/v3/tracker"
+)
+
+// ErrStepLocalMsg is returned when try to step a local raft message
+var ErrStepLocalMsg = errors.New("raft: cannot step raft local message")
+
+// ErrStepPeerNotFound is returned when try to step a response message
+// but there is no peer found in raft.trk for that node.
+var ErrStepPeerNotFound = errors.New("raft: cannot step as peer not found")
+
+// RawNode is a thread-unsafe Node.
+// The methods of this struct correspond to the methods of Node and are described
+// more fully there.
+type RawNode struct {
+ raft *raft
+ asyncStorageWrites bool
+
+ // Mutable fields.
+ prevSoftSt *SoftState
+ prevHardSt pb.HardState
+ stepsOnAdvance []pb.Message
+}
+
+// NewRawNode instantiates a RawNode from the given configuration.
+//
+// See Bootstrap() for bootstrapping an initial state; this replaces the former
+// 'peers' argument to this method (with identical behavior). However, It is
+// recommended that instead of calling Bootstrap, applications bootstrap their
+// state manually by setting up a Storage that has a first index > 1 and which
+// stores the desired ConfState as its InitialState.
+func NewRawNode(config *Config) (*RawNode, error) {
+ r := newRaft(config)
+ rn := &RawNode{
+ raft: r,
+ }
+ rn.asyncStorageWrites = config.AsyncStorageWrites
+ ss := r.softState()
+ rn.prevSoftSt = &ss
+ rn.prevHardSt = r.hardState()
+ return rn, nil
+}
+
+// Tick advances the internal logical clock by a single tick.
+func (rn *RawNode) Tick() {
+ rn.raft.tick()
+}
+
+// TickQuiesced advances the internal logical clock by a single tick without
+// performing any other state machine processing. It allows the caller to avoid
+// periodic heartbeats and elections when all of the peers in a Raft group are
+// known to be at the same state. Expected usage is to periodically invoke Tick
+// or TickQuiesced depending on whether the group is "active" or "quiesced".
+//
+// WARNING: Be very careful about using this method as it subverts the Raft
+// state machine. You should probably be using Tick instead.
+//
+// DEPRECATED: This method will be removed in a future release.
+func (rn *RawNode) TickQuiesced() {
+ rn.raft.electionElapsed++
+}
+
+// Campaign causes this RawNode to transition to candidate state.
+func (rn *RawNode) Campaign() error {
+ return rn.raft.Step(pb.Message{
+ Type: pb.MsgHup,
+ })
+}
+
+// Propose proposes data be appended to the raft log.
+func (rn *RawNode) Propose(data []byte) error {
+ return rn.raft.Step(pb.Message{
+ Type: pb.MsgProp,
+ From: rn.raft.id,
+ Entries: []pb.Entry{
+ {Data: data},
+ }})
+}
+
+// ProposeConfChange proposes a config change. See (Node).ProposeConfChange for
+// details.
+func (rn *RawNode) ProposeConfChange(cc pb.ConfChangeI) error {
+ m, err := confChangeToMsg(cc)
+ if err != nil {
+ return err
+ }
+ return rn.raft.Step(m)
+}
+
+// ApplyConfChange applies a config change to the local node. The app must call
+// this when it applies a configuration change, except when it decides to reject
+// the configuration change, in which case no call must take place.
+func (rn *RawNode) ApplyConfChange(cc pb.ConfChangeI) *pb.ConfState {
+ cs := rn.raft.applyConfChange(cc.AsV2())
+ return &cs
+}
+
+// Step advances the state machine using the given message.
+func (rn *RawNode) Step(m pb.Message) error {
+ // Ignore unexpected local messages receiving over network.
+ if IsLocalMsg(m.Type) && !IsLocalMsgTarget(m.From) {
+ return ErrStepLocalMsg
+ }
+ if IsResponseMsg(m.Type) && !IsLocalMsgTarget(m.From) && rn.raft.trk.Progress[m.From] == nil {
+ return ErrStepPeerNotFound
+ }
+ return rn.raft.Step(m)
+}
+
+// Ready returns the outstanding work that the application needs to handle. This
+// includes appending and applying entries or a snapshot, updating the HardState,
+// and sending messages. The returned Ready() *must* be handled and subsequently
+// passed back via Advance().
+func (rn *RawNode) Ready() Ready {
+ rd := rn.readyWithoutAccept()
+ rn.acceptReady(rd)
+ return rd
+}
+
+// readyWithoutAccept returns a Ready. This is a read-only operation, i.e. there
+// is no obligation that the Ready must be handled.
+func (rn *RawNode) readyWithoutAccept() Ready {
+ r := rn.raft
+
+ rd := Ready{
+ Entries: r.raftLog.nextUnstableEnts(),
+ CommittedEntries: r.raftLog.nextCommittedEnts(rn.applyUnstableEntries()),
+ Messages: r.msgs,
+ }
+ if softSt := r.softState(); !softSt.equal(rn.prevSoftSt) {
+ // Allocate only when SoftState changes.
+ escapingSoftSt := softSt
+ rd.SoftState = &escapingSoftSt
+ }
+ if hardSt := r.hardState(); !isHardStateEqual(hardSt, rn.prevHardSt) {
+ rd.HardState = hardSt
+ }
+ if r.raftLog.hasNextUnstableSnapshot() {
+ rd.Snapshot = *r.raftLog.nextUnstableSnapshot()
+ }
+ if len(r.readStates) != 0 {
+ rd.ReadStates = r.readStates
+ }
+ rd.MustSync = MustSync(r.hardState(), rn.prevHardSt, len(rd.Entries))
+
+ if rn.asyncStorageWrites {
+ // If async storage writes are enabled, enqueue messages to
+ // local storage threads, where applicable.
+ if needStorageAppendMsg(r, rd) {
+ m := newStorageAppendMsg(r, rd)
+ rd.Messages = append(rd.Messages, m)
+ }
+ if needStorageApplyMsg(rd) {
+ m := newStorageApplyMsg(r, rd)
+ rd.Messages = append(rd.Messages, m)
+ }
+ } else {
+ // If async storage writes are disabled, immediately enqueue
+ // msgsAfterAppend to be sent out. The Ready struct contract
+ // mandates that Messages cannot be sent until after Entries
+ // are written to stable storage.
+ for _, m := range r.msgsAfterAppend {
+ if m.To != r.id {
+ rd.Messages = append(rd.Messages, m)
+ }
+ }
+ }
+
+ return rd
+}
+
+// MustSync returns true if the hard state and count of Raft entries indicate
+// that a synchronous write to persistent storage is required.
+func MustSync(st, prevst pb.HardState, entsnum int) bool {
+ // Persistent state on all servers:
+ // (Updated on stable storage before responding to RPCs)
+ // currentTerm
+ // votedFor
+ // log entries[]
+ return entsnum != 0 || st.Vote != prevst.Vote || st.Term != prevst.Term
+}
+
+func needStorageAppendMsg(r *raft, rd Ready) bool {
+ // Return true if log entries, hard state, or a snapshot need to be written
+ // to stable storage. Also return true if any messages are contingent on all
+ // prior MsgStorageAppend being processed.
+ return len(rd.Entries) > 0 ||
+ !IsEmptyHardState(rd.HardState) ||
+ !IsEmptySnap(rd.Snapshot) ||
+ len(r.msgsAfterAppend) > 0
+}
+
+func needStorageAppendRespMsg(r *raft, rd Ready) bool {
+ // Return true if raft needs to hear about stabilized entries or an applied
+ // snapshot. See the comment in newStorageAppendRespMsg, which explains why
+ // we check hasNextOrInProgressUnstableEnts instead of len(rd.Entries) > 0.
+ return r.raftLog.hasNextOrInProgressUnstableEnts() ||
+ !IsEmptySnap(rd.Snapshot)
+}
+
+// newStorageAppendMsg creates the message that should be sent to the local
+// append thread to instruct it to append log entries, write an updated hard
+// state, and apply a snapshot. The message also carries a set of responses
+// that should be delivered after the rest of the message is processed. Used
+// with AsyncStorageWrites.
+func newStorageAppendMsg(r *raft, rd Ready) pb.Message {
+ m := pb.Message{
+ Type: pb.MsgStorageAppend,
+ To: LocalAppendThread,
+ From: r.id,
+ Entries: rd.Entries,
+ }
+ if !IsEmptyHardState(rd.HardState) {
+ // If the Ready includes a HardState update, assign each of its fields
+ // to the corresponding fields in the Message. This allows clients to
+ // reconstruct the HardState and save it to stable storage.
+ //
+ // If the Ready does not include a HardState update, make sure to not
+ // assign a value to any of the fields so that a HardState reconstructed
+ // from them will be empty (return true from raft.IsEmptyHardState).
+ m.Term = rd.Term
+ m.Vote = rd.Vote
+ m.Commit = rd.Commit
+ }
+ if !IsEmptySnap(rd.Snapshot) {
+ snap := rd.Snapshot
+ m.Snapshot = &snap
+ }
+ // Attach all messages in msgsAfterAppend as responses to be delivered after
+ // the message is processed, along with a self-directed MsgStorageAppendResp
+ // to acknowledge the entry stability.
+ //
+ // NB: it is important for performance that MsgStorageAppendResp message be
+ // handled after self-directed MsgAppResp messages on the leader (which will
+ // be contained in msgsAfterAppend). This ordering allows the MsgAppResp
+ // handling to use a fast-path in r.raftLog.term() before the newly appended
+ // entries are removed from the unstable log.
+ m.Responses = r.msgsAfterAppend
+ if needStorageAppendRespMsg(r, rd) {
+ m.Responses = append(m.Responses, newStorageAppendRespMsg(r, rd))
+ }
+ return m
+}
+
+// newStorageAppendRespMsg creates the message that should be returned to node
+// after the unstable log entries, hard state, and snapshot in the current Ready
+// (along with those in all prior Ready structs) have been saved to stable
+// storage.
+func newStorageAppendRespMsg(r *raft, rd Ready) pb.Message {
+ m := pb.Message{
+ Type: pb.MsgStorageAppendResp,
+ To: r.id,
+ From: LocalAppendThread,
+ // Dropped after term change, see below.
+ Term: r.Term,
+ }
+ if r.raftLog.hasNextOrInProgressUnstableEnts() {
+ // If the raft log has unstable entries, attach the last index and term of the
+ // append to the response message. This (index, term) tuple will be handed back
+ // and consulted when the stability of those log entries is signaled to the
+ // unstable. If the (index, term) match the unstable log by the time the
+ // response is received (unstable.stableTo), the unstable log can be truncated.
+ //
+ // However, with just this logic, there would be an ABA problem[^1] that could
+ // lead to the unstable log and the stable log getting out of sync temporarily
+ // and leading to an inconsistent view. Consider the following example with 5
+ // nodes, A B C D E:
+ //
+ // 1. A is the leader.
+ // 2. A proposes some log entries but only B receives these entries.
+ // 3. B gets the Ready and the entries are appended asynchronously.
+ // 4. A crashes and C becomes leader after getting a vote from D and E.
+ // 5. C proposes some log entries and B receives these entries, overwriting the
+ // previous unstable log entries that are in the process of being appended.
+ // The entries have a larger term than the previous entries but the same
+ // indexes. It begins appending these new entries asynchronously.
+ // 6. C crashes and A restarts and becomes leader again after getting the vote
+ // from D and E.
+ // 7. B receives the entries from A which are the same as the ones from step 2,
+ // overwriting the previous unstable log entries that are in the process of
+ // being appended from step 5. The entries have the original terms and
+ // indexes from step 2. Recall that log entries retain their original term
+ // numbers when a leader replicates entries from previous terms. It begins
+ // appending these new entries asynchronously.
+ // 8. The asynchronous log appends from the first Ready complete and stableTo
+ // is called.
+ // 9. However, the log entries from the second Ready are still in the
+ // asynchronous append pipeline and will overwrite (in stable storage) the
+ // entries from the first Ready at some future point. We can't truncate the
+ // unstable log yet or a future read from Storage might see the entries from
+ // step 5 before they have been replaced by the entries from step 7.
+ // Instead, we must wait until we are sure that the entries are stable and
+ // that no in-progress appends might overwrite them before removing entries
+ // from the unstable log.
+ //
+ // To prevent these kinds of problems, we also attach the current term to the
+ // MsgStorageAppendResp (above). If the term has changed by the time the
+ // MsgStorageAppendResp if returned, the response is ignored and the unstable
+ // log is not truncated. The unstable log is only truncated when the term has
+ // remained unchanged from the time that the MsgStorageAppend was sent to the
+ // time that the MsgStorageAppendResp is received, indicating that no-one else
+ // is in the process of truncating the stable log.
+ //
+ // However, this replaces a correctness problem with a liveness problem. If we
+ // only attempted to truncate the unstable log when appending new entries but
+ // also occasionally dropped these responses, then quiescence of new log entries
+ // could lead to the unstable log never being truncated.
+ //
+ // To combat this, we attempt to truncate the log on all MsgStorageAppendResp
+ // messages where the unstable log is not empty, not just those associated with
+ // entry appends. This includes MsgStorageAppendResp messages associated with an
+ // updated HardState, which occur after a term change.
+ //
+ // In other words, we set Index and LogTerm in a block that looks like:
+ //
+ // if r.raftLog.hasNextOrInProgressUnstableEnts() { ... }
+ //
+ // not like:
+ //
+ // if len(rd.Entries) > 0 { ... }
+ //
+ // To do so, we attach r.raftLog.lastIndex() and r.raftLog.lastTerm(), not the
+ // (index, term) of the last entry in rd.Entries. If rd.Entries is not empty,
+ // these will be the same. However, if rd.Entries is empty, we still want to
+ // attest that this (index, term) is correct at the current term, in case the
+ // MsgStorageAppend that contained the last entry in the unstable slice carried
+ // an earlier term and was dropped.
+ //
+ // A MsgStorageAppend with a new term is emitted on each term change. This is
+ // the same condition that causes MsgStorageAppendResp messages with earlier
+ // terms to be ignored. As a result, we are guaranteed that, assuming a bounded
+ // number of term changes, there will eventually be a MsgStorageAppendResp
+ // message that is not ignored. This means that entries in the unstable log
+ // which have been appended to stable storage will eventually be truncated and
+ // dropped from memory.
+ //
+ // [^1]: https://en.wikipedia.org/wiki/ABA_problem
+ last := r.raftLog.lastEntryID()
+ m.Index = last.index
+ m.LogTerm = last.term
+ }
+ if !IsEmptySnap(rd.Snapshot) {
+ snap := rd.Snapshot
+ m.Snapshot = &snap
+ }
+ return m
+}
+
+func needStorageApplyMsg(rd Ready) bool { return len(rd.CommittedEntries) > 0 }
+func needStorageApplyRespMsg(rd Ready) bool { return needStorageApplyMsg(rd) }
+
+// newStorageApplyMsg creates the message that should be sent to the local
+// apply thread to instruct it to apply committed log entries. The message
+// also carries a response that should be delivered after the rest of the
+// message is processed. Used with AsyncStorageWrites.
+func newStorageApplyMsg(r *raft, rd Ready) pb.Message {
+ ents := rd.CommittedEntries
+ return pb.Message{
+ Type: pb.MsgStorageApply,
+ To: LocalApplyThread,
+ From: r.id,
+ Term: 0, // committed entries don't apply under a specific term
+ Entries: ents,
+ Responses: []pb.Message{
+ newStorageApplyRespMsg(r, ents),
+ },
+ }
+}
+
+// newStorageApplyRespMsg creates the message that should be returned to node
+// after the committed entries in the current Ready (along with those in all
+// prior Ready structs) have been applied to the local state machine.
+func newStorageApplyRespMsg(r *raft, ents []pb.Entry) pb.Message {
+ return pb.Message{
+ Type: pb.MsgStorageApplyResp,
+ To: r.id,
+ From: LocalApplyThread,
+ Term: 0, // committed entries don't apply under a specific term
+ Entries: ents,
+ }
+}
+
+// acceptReady is called when the consumer of the RawNode has decided to go
+// ahead and handle a Ready. Nothing must alter the state of the RawNode between
+// this call and the prior call to Ready().
+func (rn *RawNode) acceptReady(rd Ready) {
+ if rd.SoftState != nil {
+ rn.prevSoftSt = rd.SoftState
+ }
+ if !IsEmptyHardState(rd.HardState) {
+ rn.prevHardSt = rd.HardState
+ }
+ if len(rd.ReadStates) != 0 {
+ rn.raft.readStates = nil
+ }
+ if !rn.asyncStorageWrites {
+ if len(rn.stepsOnAdvance) != 0 {
+ rn.raft.logger.Panicf("two accepted Ready structs without call to Advance")
+ }
+ for _, m := range rn.raft.msgsAfterAppend {
+ if m.To == rn.raft.id {
+ rn.stepsOnAdvance = append(rn.stepsOnAdvance, m)
+ }
+ }
+ if needStorageAppendRespMsg(rn.raft, rd) {
+ m := newStorageAppendRespMsg(rn.raft, rd)
+ rn.stepsOnAdvance = append(rn.stepsOnAdvance, m)
+ }
+ if needStorageApplyRespMsg(rd) {
+ m := newStorageApplyRespMsg(rn.raft, rd.CommittedEntries)
+ rn.stepsOnAdvance = append(rn.stepsOnAdvance, m)
+ }
+ }
+ rn.raft.msgs = nil
+ rn.raft.msgsAfterAppend = nil
+ rn.raft.raftLog.acceptUnstable()
+ if len(rd.CommittedEntries) > 0 {
+ ents := rd.CommittedEntries
+ index := ents[len(ents)-1].Index
+ rn.raft.raftLog.acceptApplying(index, entsSize(ents), rn.applyUnstableEntries())
+ }
+
+ traceReady(rn.raft)
+}
+
+// applyUnstableEntries returns whether entries are allowed to be applied once
+// they are known to be committed but before they have been written locally to
+// stable storage.
+func (rn *RawNode) applyUnstableEntries() bool {
+ return !rn.asyncStorageWrites
+}
+
+// HasReady called when RawNode user need to check if any Ready pending.
+func (rn *RawNode) HasReady() bool {
+ // TODO(nvanbenschoten): order these cases in terms of cost and frequency.
+ r := rn.raft
+ if softSt := r.softState(); !softSt.equal(rn.prevSoftSt) {
+ return true
+ }
+ if hardSt := r.hardState(); !IsEmptyHardState(hardSt) && !isHardStateEqual(hardSt, rn.prevHardSt) {
+ return true
+ }
+ if r.raftLog.hasNextUnstableSnapshot() {
+ return true
+ }
+ if len(r.msgs) > 0 || len(r.msgsAfterAppend) > 0 {
+ return true
+ }
+ if r.raftLog.hasNextUnstableEnts() || r.raftLog.hasNextCommittedEnts(rn.applyUnstableEntries()) {
+ return true
+ }
+ if len(r.readStates) != 0 {
+ return true
+ }
+ return false
+}
+
+// Advance notifies the RawNode that the application has applied and saved progress in the
+// last Ready results.
+//
+// NOTE: Advance must not be called when using AsyncStorageWrites. Response messages from
+// the local append and apply threads take its place.
+func (rn *RawNode) Advance(_ Ready) {
+ // The actions performed by this function are encoded into stepsOnAdvance in
+ // acceptReady. In earlier versions of this library, they were computed from
+ // the provided Ready struct. Retain the unused parameter for compatibility.
+ if rn.asyncStorageWrites {
+ rn.raft.logger.Panicf("Advance must not be called when using AsyncStorageWrites")
+ }
+ for i, m := range rn.stepsOnAdvance {
+ _ = rn.raft.Step(m)
+ rn.stepsOnAdvance[i] = pb.Message{}
+ }
+ rn.stepsOnAdvance = rn.stepsOnAdvance[:0]
+}
+
+// Status returns the current status of the given group. This allocates, see
+// BasicStatus and WithProgress for allocation-friendlier choices.
+func (rn *RawNode) Status() Status {
+ status := getStatus(rn.raft)
+ return status
+}
+
+// BasicStatus returns a BasicStatus. Notably this does not contain the
+// Progress map; see WithProgress for an allocation-free way to inspect it.
+func (rn *RawNode) BasicStatus() BasicStatus {
+ return getBasicStatus(rn.raft)
+}
+
+// ProgressType indicates the type of replica a Progress corresponds to.
+type ProgressType byte
+
+const (
+ // ProgressTypePeer accompanies a Progress for a regular peer replica.
+ ProgressTypePeer ProgressType = iota
+ // ProgressTypeLearner accompanies a Progress for a learner replica.
+ ProgressTypeLearner
+)
+
+// WithProgress is a helper to introspect the Progress for this node and its
+// peers.
+func (rn *RawNode) WithProgress(visitor func(id uint64, typ ProgressType, pr tracker.Progress)) {
+ rn.raft.trk.Visit(func(id uint64, pr *tracker.Progress) {
+ typ := ProgressTypePeer
+ if pr.IsLearner {
+ typ = ProgressTypeLearner
+ }
+ p := *pr
+ p.Inflights = nil
+ visitor(id, typ, p)
+ })
+}
+
+// ReportUnreachable reports the given node is not reachable for the last send.
+func (rn *RawNode) ReportUnreachable(id uint64) {
+ _ = rn.raft.Step(pb.Message{Type: pb.MsgUnreachable, From: id})
+}
+
+// ReportSnapshot reports the status of the sent snapshot.
+func (rn *RawNode) ReportSnapshot(id uint64, status SnapshotStatus) {
+ rej := status == SnapshotFailure
+
+ _ = rn.raft.Step(pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej})
+}
+
+// TransferLeader tries to transfer leadership to the given transferee.
+func (rn *RawNode) TransferLeader(transferee uint64) {
+ _ = rn.raft.Step(pb.Message{Type: pb.MsgTransferLeader, From: transferee})
+}
+
+// ForgetLeader forgets a follower's current leader, changing it to None.
+// See (Node).ForgetLeader for details.
+func (rn *RawNode) ForgetLeader() error {
+ return rn.raft.Step(pb.Message{Type: pb.MsgForgetLeader})
+}
+
+// ReadIndex requests a read state. The read state will be set in ready.
+// Read State has a read index. Once the application advances further than the read
+// index, any linearizable read requests issued before the read request can be
+// processed safely. The read state will have the same rctx attached.
+func (rn *RawNode) ReadIndex(rctx []byte) {
+ _ = rn.raft.Step(pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
+}