[VOL-5548] Handle ONU SW download and upgrade during context cancellation or timeout
Change-Id: I3a901833e2e69c52911f9ffc13e7dd5dc1abcea6
Signed-off-by: bseeniva <balaji.seenivasan@radisys.com>
diff --git a/VERSION b/VERSION
index 3053a1c..de4e782 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.13.17
+2.13.18
diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go
index 704aa77..e23e077 100644
--- a/internal/pkg/config/config.go
+++ b/internal/pkg/config/config.go
@@ -68,6 +68,7 @@
MinBackoffRetryDelay time.Duration
MaxBackoffRetryDelay time.Duration
RPCTimeout time.Duration
+ ONUSwUpgradeTimeout time.Duration
MaxConcurrentFlowsPerUni int
PerRPCRetryTimeout time.Duration
MaxRetries uint
@@ -282,6 +283,11 @@
10*time.Second,
"The default timeout when making an RPC request")
+ fs.DurationVar(&(so.ONUSwUpgradeTimeout),
+ "onu_sw_upgrade_timeout",
+ 3*time.Hour,
+ "The default timeout for ONU software upgrade")
+
fs.DurationVar(&(so.MinBackoffRetryDelay),
"min_retry_delay",
500*time.Millisecond,
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index e981fd8..2c5a58f 100755
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -1706,7 +1706,7 @@
// onuSwUpgradeAfterDownload initiates the SW download transfer to the ONU with activate and commit options
// after the OnuImage has been downloaded to the adapter, called in background
func (dh *deviceHandler) onuSwUpgradeAfterDownload(ctx context.Context, apImageRequest *voltha.DeviceImageDownloadRequest,
- apDownloadManager *swupg.FileDownloadManager, aImageIdentifier string) {
+ apDownloadManager *swupg.FileDownloadManager, aImageIdentifier string, aCancel context.CancelFunc) {
var err error
pDevEntry := dh.GetOnuDeviceEntry(ctx, false)
@@ -1767,6 +1767,32 @@
logger.Errorw(ctx, "onu upgrade fsm could not be created", log.Fields{
"device-id": dh.DeviceID, "error": err})
}
+ go func() {
+ onuDlChn := dh.pOnuUpradeFsm.GetOnuDLChannel()
+ select {
+ case <-ctx.Done():
+ logger.Errorw(ctx, "context Deadline Exceeded aborting ONU SW upgrade", log.Fields{"device-id": dh.DeviceID, "err": ctx.Err()})
+ dh.lockUpgradeFsm.Lock()
+ if dh.pOnuUpradeFsm != nil {
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST)
+ }
+ dh.lockUpgradeFsm.Unlock()
+ return
+ case <-dh.deviceDeleteCommChan:
+ logger.Errorw(ctx, "device deleted aborting ONU SW upgrade", log.Fields{"device-id": dh.DeviceID, "err": ctx.Err()})
+ dh.lockUpgradeFsm.Lock()
+ if dh.pOnuUpradeFsm != nil {
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST)
+ }
+ dh.lockUpgradeFsm.Unlock()
+ return
+ case success := <-onuDlChn:
+ logger.Infow(ctx, "onu SW upgrade download completed", log.Fields{"isSuccess": success, "device-id": dh.DeviceID})
+ aCancel()
+ return
+
+ }
+ }()
return
}
logger.Errorw(ctx, "start Onu SW upgrade rejected: no inactive image", log.Fields{
@@ -3405,7 +3431,8 @@
logger.Errorw(ctx, "no valid OnuDevice or omciCC - abort", log.Fields{"device-id": dh.DeviceID})
return fmt.Errorf("no valid omciCC - abort for device-id: %s", dh.device.Id)
}
- dh.pOnuUpradeFsm = swupg.NewOnuUpgradeFsm(ctx, dh, apDevEntry, apDevEntry.GetOnuDB(), aDevEvent,
+ fsmCtx := log.WithSpanFromContext(context.Background(), ctx)
+ dh.pOnuUpradeFsm = swupg.NewOnuUpgradeFsm(fsmCtx, dh, apDevEntry, apDevEntry.GetOnuDB(), aDevEvent,
sFsmName, chUpgradeFsm)
if dh.pOnuUpradeFsm != nil {
pUpgradeStatemachine := dh.pOnuUpradeFsm.PAdaptFsm.PFsm
diff --git a/internal/pkg/core/openonu.go b/internal/pkg/core/openonu.go
index 19afb48..5437aed 100755
--- a/internal/pkg/core/openonu.go
+++ b/internal/pkg/core/openonu.go
@@ -22,6 +22,7 @@
"errors"
"fmt"
"hash/fnv"
+ "strconv"
"strings"
"sync"
"time"
@@ -37,6 +38,7 @@
"github.com/opencord/voltha-protos/v5/go/health"
"github.com/opencord/voltha-protos/v5/go/olt_inter_adapter_service"
"google.golang.org/grpc"
+ "google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
"github.com/golang/protobuf/ptypes/empty"
@@ -637,7 +639,28 @@
//onu upgrade handling called in background without immediate error evaluation here
// as the processing can be done for multiple ONU's and an error on one ONU should not stop processing for others
// state/progress/success of the request has to be verified using the Get_onu_image_status() API
- go handler.onuSwUpgradeAfterDownload(ctx, request, oo.pFileManager, imageIdentifier)
+ go func() {
+ onuswctx, cancel := context.WithTimeout(context.Background(), oo.config.ONUSwUpgradeTimeout)
+
+ if md, ok := metadata.FromIncomingContext(ctx); ok {
+ if deadlineStrs := md.Get("deadline"); len(deadlineStrs) > 0 {
+ if deadlineUnixNano, err := strconv.ParseInt(deadlineStrs[0], 10, 64); err == nil {
+ deadline := time.Unix(0, deadlineUnixNano)
+ onuswctx, cancel = context.WithTimeout(context.Background(), time.Until(deadline))
+ } else {
+ logger.Warnw(ctx, "Failed to parse deadline metadata, using default timeout",
+ log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier, "err": err})
+ }
+ } else {
+ logger.Warnw(ctx, "No deadline metadata found, using default timeout",
+ log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier})
+ }
+ } else {
+ logger.Warnw(ctx, "Failed to retrieve metadata, using default timeout",
+ log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier})
+ }
+ handler.onuSwUpgradeAfterDownload(onuswctx, request, oo.pFileManager, imageIdentifier, cancel)
+ }()
loDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_STARTED
loDeviceImageState.ImageState.Reason = voltha.ImageState_NO_ERROR
loDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
diff --git a/internal/pkg/swupg/omci_onu_upgrade.go b/internal/pkg/swupg/omci_onu_upgrade.go
index fff4b51..2dce008 100755
--- a/internal/pkg/swupg/omci_onu_upgrade.go
+++ b/internal/pkg/swupg/omci_onu_upgrade.go
@@ -2050,3 +2050,9 @@
oFsm.abortOnOmciError(ctx, false)
}
}
+
+func (oFsm *OnuUpgradeFsm) GetOnuDLChannel() chan bool {
+ oFsm.mutexUpgradeParams.Lock()
+ defer oFsm.mutexUpgradeParams.Unlock()
+ return oFsm.chOnuDlReady
+}