[VOL-5548] Handle ONU SW download and upgrade during context cancellation or timeout

Change-Id: I3a901833e2e69c52911f9ffc13e7dd5dc1abcea6
Signed-off-by: bseeniva <balaji.seenivasan@radisys.com>
diff --git a/VERSION b/VERSION
index 3053a1c..de4e782 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.13.17
+2.13.18
diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go
index 704aa77..e23e077 100644
--- a/internal/pkg/config/config.go
+++ b/internal/pkg/config/config.go
@@ -68,6 +68,7 @@
 	MinBackoffRetryDelay        time.Duration
 	MaxBackoffRetryDelay        time.Duration
 	RPCTimeout                  time.Duration
+	ONUSwUpgradeTimeout         time.Duration
 	MaxConcurrentFlowsPerUni    int
 	PerRPCRetryTimeout          time.Duration
 	MaxRetries                  uint
@@ -282,6 +283,11 @@
 		10*time.Second,
 		"The default timeout when making an RPC request")
 
+	fs.DurationVar(&(so.ONUSwUpgradeTimeout),
+		"onu_sw_upgrade_timeout",
+		3*time.Hour,
+		"The default timeout for ONU software upgrade")
+
 	fs.DurationVar(&(so.MinBackoffRetryDelay),
 		"min_retry_delay",
 		500*time.Millisecond,
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index e981fd8..2c5a58f 100755
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -1706,7 +1706,7 @@
 // onuSwUpgradeAfterDownload initiates the SW download transfer to the ONU with activate and commit options
 // after the OnuImage has been downloaded to the adapter, called in background
 func (dh *deviceHandler) onuSwUpgradeAfterDownload(ctx context.Context, apImageRequest *voltha.DeviceImageDownloadRequest,
-	apDownloadManager *swupg.FileDownloadManager, aImageIdentifier string) {
+	apDownloadManager *swupg.FileDownloadManager, aImageIdentifier string, aCancel context.CancelFunc) {
 
 	var err error
 	pDevEntry := dh.GetOnuDeviceEntry(ctx, false)
@@ -1767,6 +1767,32 @@
 			logger.Errorw(ctx, "onu upgrade fsm could not be created", log.Fields{
 				"device-id": dh.DeviceID, "error": err})
 		}
+		go func() {
+			onuDlChn := dh.pOnuUpradeFsm.GetOnuDLChannel()
+			select {
+			case <-ctx.Done():
+				logger.Errorw(ctx, "context Deadline Exceeded aborting ONU SW upgrade", log.Fields{"device-id": dh.DeviceID, "err": ctx.Err()})
+				dh.lockUpgradeFsm.Lock()
+				if dh.pOnuUpradeFsm != nil {
+					dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST)
+				}
+				dh.lockUpgradeFsm.Unlock()
+				return
+			case <-dh.deviceDeleteCommChan:
+				logger.Errorw(ctx, "device deleted aborting ONU SW upgrade", log.Fields{"device-id": dh.DeviceID, "err": ctx.Err()})
+				dh.lockUpgradeFsm.Lock()
+				if dh.pOnuUpradeFsm != nil {
+					dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST)
+				}
+				dh.lockUpgradeFsm.Unlock()
+				return
+			case success := <-onuDlChn:
+				logger.Infow(ctx, "onu SW upgrade download completed", log.Fields{"isSuccess": success, "device-id": dh.DeviceID})
+				aCancel()
+				return
+
+			}
+		}()
 		return
 	}
 	logger.Errorw(ctx, "start Onu SW upgrade rejected: no inactive image", log.Fields{
@@ -3405,7 +3431,8 @@
 		logger.Errorw(ctx, "no valid OnuDevice or omciCC - abort", log.Fields{"device-id": dh.DeviceID})
 		return fmt.Errorf("no valid omciCC - abort for device-id: %s", dh.device.Id)
 	}
-	dh.pOnuUpradeFsm = swupg.NewOnuUpgradeFsm(ctx, dh, apDevEntry, apDevEntry.GetOnuDB(), aDevEvent,
+	fsmCtx := log.WithSpanFromContext(context.Background(), ctx)
+	dh.pOnuUpradeFsm = swupg.NewOnuUpgradeFsm(fsmCtx, dh, apDevEntry, apDevEntry.GetOnuDB(), aDevEvent,
 		sFsmName, chUpgradeFsm)
 	if dh.pOnuUpradeFsm != nil {
 		pUpgradeStatemachine := dh.pOnuUpradeFsm.PAdaptFsm.PFsm
diff --git a/internal/pkg/core/openonu.go b/internal/pkg/core/openonu.go
index 19afb48..5437aed 100755
--- a/internal/pkg/core/openonu.go
+++ b/internal/pkg/core/openonu.go
@@ -22,6 +22,7 @@
 	"errors"
 	"fmt"
 	"hash/fnv"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -37,6 +38,7 @@
 	"github.com/opencord/voltha-protos/v5/go/health"
 	"github.com/opencord/voltha-protos/v5/go/olt_inter_adapter_service"
 	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
 	"google.golang.org/grpc/status"
 
 	"github.com/golang/protobuf/ptypes/empty"
@@ -637,7 +639,28 @@
 					//onu upgrade handling called in background without immediate error evaluation here
 					//  as the processing can be done for multiple ONU's and an error on one ONU should not stop processing for others
 					//  state/progress/success of the request has to be verified using the Get_onu_image_status() API
-					go handler.onuSwUpgradeAfterDownload(ctx, request, oo.pFileManager, imageIdentifier)
+					go func() {
+						onuswctx, cancel := context.WithTimeout(context.Background(), oo.config.ONUSwUpgradeTimeout)
+
+						if md, ok := metadata.FromIncomingContext(ctx); ok {
+							if deadlineStrs := md.Get("deadline"); len(deadlineStrs) > 0 {
+								if deadlineUnixNano, err := strconv.ParseInt(deadlineStrs[0], 10, 64); err == nil {
+									deadline := time.Unix(0, deadlineUnixNano)
+									onuswctx, cancel = context.WithTimeout(context.Background(), time.Until(deadline))
+								} else {
+									logger.Warnw(ctx, "Failed to parse deadline metadata, using default timeout",
+										log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier, "err": err})
+								}
+							} else {
+								logger.Warnw(ctx, "No deadline metadata found, using default timeout",
+									log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier})
+							}
+						} else {
+							logger.Warnw(ctx, "Failed to retrieve metadata, using default timeout",
+								log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier})
+						}
+						handler.onuSwUpgradeAfterDownload(onuswctx, request, oo.pFileManager, imageIdentifier, cancel)
+					}()
 					loDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_STARTED
 					loDeviceImageState.ImageState.Reason = voltha.ImageState_NO_ERROR
 					loDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
diff --git a/internal/pkg/swupg/omci_onu_upgrade.go b/internal/pkg/swupg/omci_onu_upgrade.go
index fff4b51..2dce008 100755
--- a/internal/pkg/swupg/omci_onu_upgrade.go
+++ b/internal/pkg/swupg/omci_onu_upgrade.go
@@ -2050,3 +2050,9 @@
 		oFsm.abortOnOmciError(ctx, false)
 	}
 }
+
+func (oFsm *OnuUpgradeFsm) GetOnuDLChannel() chan bool {
+	oFsm.mutexUpgradeParams.Lock()
+	defer oFsm.mutexUpgradeParams.Unlock()
+	return oFsm.chOnuDlReady
+}