[VOL-5485] update transceint state to None if delete-device fails and returning err for childdevicelost
Change-Id: I4e4b247f1592a18def550b81c9e86bc7ac73d3dc
Signed-off-by: Akash Reddy Kankanala <akash.kankanala@radisys.com>
diff --git a/VERSION b/VERSION
index 4cc5902..2d6744e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.6.22
+3.6.23
diff --git a/rw_core/core/device/agent.go b/rw_core/core/device/agent.go
index 74c2475..38197e6 100755
--- a/rw_core/core/device/agent.go
+++ b/rw_core/core/device/agent.go
@@ -305,19 +305,27 @@
// onDeleteSuccess is a common callback for scenarios where we receive a nil response following a delete request
// to an adapter.
-func (agent *Agent) onDeleteSuccess(ctx context.Context, prevState, currState *common.AdminState_Types) {
+func (agent *Agent) onDeleteSuccess(ctx context.Context, prevState, currState *common.AdminState_Types) error {
if err := agent.requestQueue.WaitForGreenLight(ctx); err != nil {
logger.Errorw(ctx, "delete-device-failure", log.Fields{"device-id": agent.deviceID, "error": err})
+ return err
}
previousDeviceTransientState := agent.getTransientState()
newDevice := agent.cloneDeviceWithoutLock()
if err := agent.updateDeviceWithTransientStateAndReleaseLock(ctx, newDevice,
core.DeviceTransientState_DELETING_POST_ADAPTER_RESPONSE, previousDeviceTransientState); err != nil {
+ ctx1, cancel1 := context.WithTimeout(context.Background(), agent.rpcTimeout) // incase of ctx cancellation, updatetranscientstate will fail , so creating a new context for updating
+ if err1 := agent.updateTransientState(ctx1, core.DeviceTransientState_NONE); err1 != nil { // reset the device transient state if the transition handlers fail, so the next retry can go through
+ logger.Errorf(ctx, "failed-to-reset-transient-state-to-none: %s", err1)
+ }
+ cancel1()
logger.Errorw(ctx, "delete-device-failure", log.Fields{"device-id": agent.deviceID, "error": err})
+ return err
}
requestStatus := &common.OperationResp{Code: common.OperationResp_OPERATION_SUCCESS}
desc := "adapter-response"
agent.logDeviceUpdate(ctx, prevState, currState, requestStatus, nil, desc)
+ return nil
}
// onDeleteFailure is a common callback for scenarios where we receive an error response following a delete request
@@ -754,6 +762,12 @@
// Update device and release lock
if err = agent.updateDeviceWithTransientStateAndReleaseLock(ctx, device,
currentDeviceTransientState, previousDeviceTransientState); err != nil {
+ ctx1, cancel1 := context.WithTimeout(context.Background(), agent.rpcTimeout) // incase of ctx cancellation, updatetranscientstate will fail , so creating a new context for updating
+ if err1 := agent.updateTransientState(ctx1, core.DeviceTransientState_NONE); err1 != nil { // reset the device transient state if the transition handlers fail, so the next retry can go through
+ logger.Errorf(ctx, "failed-to-reset-transient-state-to-none: %s", err1)
+ }
+ cancel1()
+
desc = err.Error()
return err
}
@@ -774,10 +788,12 @@
}
subCtx, cancel := context.WithTimeout(coreutils.WithAllMetadataFromContext(ctx), agent.rpcTimeout)
requestStatus.Code = common.OperationResp_OPERATION_IN_PROGRESS
- if _, err = client.DeleteDevice(subCtx, device); err != nil {
- agent.onDeleteFailure(subCtx, err, &previousAdminState, &agent.device.AdminState)
+ _, err = client.DeleteDevice(subCtx, device)
+ if (err == nil) || (status.Code(err) == codes.NotFound) {
+ err = agent.onDeleteSuccess(subCtx, &previousAdminState, &agent.device.AdminState) // return error is the device transition update fails , so that northbound can retry
+
} else {
- agent.onDeleteSuccess(subCtx, &previousAdminState, &agent.device.AdminState)
+ agent.onDeleteFailure(subCtx, err, &previousAdminState, &agent.device.AdminState)
}
cancel()
}
@@ -1084,6 +1100,7 @@
rpce := agent.deviceMgr.NewRPCEvent(ctx, agent.deviceID, err.Error(), nil)
go agent.deviceMgr.SendRPCEvent(ctx, "RPC_ERROR_RAISE_EVENT", rpce, voltha.EventCategory_COMMUNICATION,
nil, time.Now().Unix())
+ return err
}
return nil
@@ -1137,7 +1154,7 @@
}
if err = agent.deviceMgr.canAdapterRequestProceed(ctx, agent.deviceID); err != nil {
logger.Errorw(ctx, "adapter-request-cannot-proceed", log.Fields{"device-id": agent.deviceID, "error": err})
- return err
+ return nil // as we are returning the err for childdevice lost call , and canAdapterRequestProceed will fail for forceDeleteDevice of OLT , so returning nil here
}
// send request to adapter
client, err := agent.adapterMgr.GetAdapterClient(ctx, agent.adapterEndpoint)
diff --git a/rw_core/core/device/logical_manager.go b/rw_core/core/device/logical_manager.go
index 0d89e8e..6e009f2 100644
--- a/rw_core/core/device/logical_manager.go
+++ b/rw_core/core/device/logical_manager.go
@@ -284,10 +284,12 @@
// retrieve parent device using child device ID
// TODO: return (string, have) instead of *string
// also: If not root device, just return device.parentID instead of loading the parent device.
- if parentDevice := ldMgr.deviceMgr.getParentDevice(ctx, device); parentDevice != nil {
+ parentDevice, err := ldMgr.deviceMgr.getParentDevice(ctx, device)
+ if parentDevice != nil && err == nil {
+
return &parentDevice.ParentId, nil
}
- return nil, status.Errorf(codes.NotFound, "%s", device.Id)
+ return nil, status.Errorf(codes.NotFound, "LogicalDeviceId for %s NotFound with Error %v", device.Id, err)
}
func (ldMgr *LogicalManager) getLogicalDeviceIDFromDeviceID(ctx context.Context, deviceID string) (*string, error) {
diff --git a/rw_core/core/device/manager.go b/rw_core/core/device/manager.go
index e27ea43..0b5df56 100755
--- a/rw_core/core/device/manager.go
+++ b/rw_core/core/device/manager.go
@@ -679,14 +679,18 @@
return status.Errorf(codes.NotFound, "%s", device.Id)
}
-func (dMgr *Manager) getParentDevice(ctx context.Context, childDevice *voltha.Device) *voltha.Device {
+func (dMgr *Manager) getParentDevice(ctx context.Context, childDevice *voltha.Device) (*voltha.Device, error) {
// Sanity check
if childDevice.Root {
// childDevice is the parent device
- return childDevice
+ return childDevice, nil
}
- parentDevice, _ := dMgr.getDeviceReadOnly(ctx, childDevice.ParentId)
- return parentDevice
+ parentDevice, err := dMgr.getDeviceReadOnly(ctx, childDevice.ParentId)
+ if err != nil {
+ return nil, err
+ }
+ return parentDevice, nil
+
}
/*
diff --git a/rw_core/core/device/manager_state_callback.go b/rw_core/core/device/manager_state_callback.go
index 62f79e6..1cb69f2 100644
--- a/rw_core/core/device/manager_state_callback.go
+++ b/rw_core/core/device/manager_state_callback.go
@@ -191,6 +191,7 @@
if err := parentAgent.ChildDeviceLost(ctx, curr); err != nil {
// Just log the message and let the remaining pipeline proceed.
logger.Warnw(ctx, "childDeviceLost", log.Fields{"child-device-id": curr.Id, "parent-device-id": curr.ParentId, "error": err})
+ return err
}
}
// Do not return an error as parent device may also have been deleted. Let the remaining pipeline proceed.