package postgresflexalpha import ( "context" "crypto/rand" "errors" "fmt" "math" "math/big" "net/http" "time" "github.com/hashicorp/terraform-plugin-log/tflog" "github.com/stackitcloud/stackit-sdk-go/services/postgresflex/v3alpha1api" "github.com/stackitcloud/stackit-sdk-go/core/oapierror" "github.com/stackitcloud/stackit-sdk-go/core/wait" ) // "READY" "PENDING" "PROGRESSING" "FAILURE" "UNKNOWN" "TERMINATING" const ( InstanceStateEmpty = "" InstanceStateProgressing = "PROGRESSING" InstanceStateSuccess = "READY" InstanceStateFailed = "FAILURE" InstanceStateTerminating = "TERMINATING" InstanceStateUnknown = "UNKNOWN" InstanceStatePending = "PENDING" InstanceStateDeleted = "DELETED" ) // APIClientInstanceInterface Interface needed for tests type APIClientInstanceInterface interface { GetInstanceRequest(ctx context.Context, projectID, region, instanceID string) v3alpha1api.ApiGetInstanceRequestRequest ListUsersRequest( ctx context.Context, projectID string, region string, instanceID string, ) v3alpha1api.ApiListUsersRequestRequest } // APIClientUserInterface Interface needed for tests type APIClientUserInterface interface { GetUserRequest(ctx context.Context, projectID, region, instanceID string, userID int32) v3alpha1api.ApiGetUserRequestRequest } // APIClientDatabaseInterface Interface needed for tests type APIClientDatabaseInterface interface { GetDatabaseRequest(ctx context.Context, projectID string, region string, instanceID string, databaseID int32) v3alpha1api.ApiGetDatabaseRequestRequest } // CreateInstanceWaitHandler will wait for instance creation func CreateInstanceWaitHandler( ctx context.Context, a APIClientInstanceInterface, projectID, region, instanceID string, ) *wait.AsyncActionHandler[v3alpha1api.GetInstanceResponse] { instanceCreated := false var instanceGetResponse *v3alpha1api.GetInstanceResponse maxWait := time.Minute * 45 startTime := time.Now() extendedTimeout := 0 maxFailedCount := 3 failedCount := 0 handler := wait.New( func() (waitFinished bool, response *v3alpha1api.GetInstanceResponse, err error) { if !instanceCreated { s, getErr := a.GetInstanceRequest(ctx, projectID, region, instanceID).Execute() if getErr != nil { return false, nil, getErr } if s == nil || s.Id != instanceID { return false, nil, nil } tflog.Debug( ctx, "waiting for instance ready", map[string]interface{}{ "status": s.Status, }, ) switch s.Status { default: return true, s, fmt.Errorf("instance with id %s has unexpected status %s", instanceID, s.Status) case InstanceStateEmpty: return false, nil, nil case InstanceStatePending: return false, nil, nil case InstanceStateUnknown: return false, nil, nil case InstanceStateProgressing: if time.Since(startTime) < maxWait { return false, nil, nil } tflog.Warn( ctx, fmt.Sprintf( "Wait handler still got status %s after %v for instance: %s", InstanceStateProgressing, maxWait, instanceID, ), ) if extendedTimeout < 3 { maxWait += time.Minute * 5 extendedTimeout++ return false, nil, nil } return false, nil, fmt.Errorf("instance after max timeout still in state %s", InstanceStateProgressing) case InstanceStateSuccess: if s.Network.AccessScope != nil && *s.Network.AccessScope == "SNA" { if s.Network.InstanceAddress == nil { tflog.Warn(ctx, "Waiting for instance_address") return false, nil, nil } if s.Network.RouterAddress == nil { tflog.Warn(ctx, "Waiting for router_address") return false, nil, nil } } instanceCreated = true instanceGetResponse = s case InstanceStateFailed: if failedCount < maxFailedCount { failedCount++ tflog.Warn( ctx, "got failed status from API retry", map[string]interface{}{ "failedCount": failedCount, }, ) var waitCounter int64 = 1 maxWaitInt := big.NewInt(7) n, randErr := rand.Int(rand.Reader, maxWaitInt) if randErr == nil { waitCounter = n.Int64() + 1 } time.Sleep(time.Duration(waitCounter*30) * time.Second) //nolint:gosec // not that important and temporary return false, nil, nil } return true, s, fmt.Errorf( "update got status FAILURE for instance with id %s after %d retries", instanceID, failedCount, ) // API responds with FAILURE for some seconds and then the instance goes to READY // return true, s, fmt.Errorf("create failed for instance with id %s", instanceId) } } tflog.Info(ctx, "Waiting for instance (calling list users") // // User operations aren't available right after an instance is deemed successful // // To check if they are, perform a users request _, err = a.ListUsersRequest(ctx, projectID, region, instanceID).Execute() if err == nil { return true, instanceGetResponse, nil } oapiErr, ok := err.(*oapierror.GenericOpenAPIError) // nolint:errorlint //complaining that error.As should be used to catch wrapped errors, but this error should not be wrapped if !ok { return false, nil, err } // TODO: refactor and cooperate with api guys to mitigate // nolint: // reason upfront if oapiErr.StatusCode < 500 { return true, instanceGetResponse, fmt.Errorf( "users request after instance creation returned %d status code", oapiErr.StatusCode, ) } return false, nil, nil }, ) return handler } // PartialUpdateInstanceWaitHandler will wait for instance update func PartialUpdateInstanceWaitHandler( ctx context.Context, a APIClientInstanceInterface, projectID, region, instanceID string, ) *wait.AsyncActionHandler[v3alpha1api.GetInstanceResponse] { maxFailedCount := 3 failedCount := 0 handler := wait.New( func() (waitFinished bool, response *v3alpha1api.GetInstanceResponse, err error) { s, err := a.GetInstanceRequest(ctx, projectID, region, instanceID).Execute() if err != nil { return false, nil, err } if s == nil || s.Id != instanceID { return false, nil, nil } switch s.Status { default: return true, s, fmt.Errorf("instance with id %s has unexpected status %s", instanceID, s.Status) case InstanceStateEmpty: return false, nil, nil case InstanceStatePending: return false, nil, nil case InstanceStateProgressing: return false, nil, nil case InstanceStateSuccess: return true, s, nil case InstanceStateTerminating: return false, nil, nil case InstanceStateUnknown: return false, nil, nil case InstanceStateFailed: if failedCount < maxFailedCount { failedCount++ tflog.Warn( ctx, "got failed status from API retry", map[string]interface{}{ "failedCount": failedCount, }, ) var waitCounter int64 = 1 maxWait := big.NewInt(7) n, err := rand.Int(rand.Reader, maxWait) if err == nil { waitCounter = n.Int64() + 1 } time.Sleep(time.Duration(waitCounter*30) * time.Second) //nolint:gosec // not that important and temporary return false, nil, nil } return true, s, fmt.Errorf( "update got status FAILURE for instance with id %s after %d retries", instanceID, failedCount, ) } }, ) return handler } // GetUserByIdWaitHandler will wait for instance creation func GetUserByIdWaitHandler( ctx context.Context, a APIClientUserInterface, projectID, instanceID, region string, userID int64, ) *wait.AsyncActionHandler[v3alpha1api.GetUserResponse] { handler := wait.New( func() (waitFinished bool, response *v3alpha1api.GetUserResponse, err error) { if userID > math.MaxInt32 { return false, nil, fmt.Errorf("userID too large for int32") } userID32 := int32(userID) //nolint:gosec // checked above s, err := a.GetUserRequest(ctx, projectID, region, instanceID, userID32).Execute() if err != nil { var oapiErr *oapierror.GenericOpenAPIError ok := errors.As(err, &oapiErr) if !ok { return false, nil, fmt.Errorf("could not convert error to oapierror.GenericOpenAPIError") } switch oapiErr.StatusCode { case http.StatusBadGateway, http.StatusGatewayTimeout, http.StatusServiceUnavailable: case http.StatusNotFound: tflog.Warn( ctx, "api responded with status", map[string]interface{}{ "status": oapiErr.StatusCode, }, ) return false, nil, nil default: return false, nil, err } } return true, s, nil }, ) return handler } // GetDatabaseByIdWaitHandler will wait for instance creation func GetDatabaseByIdWaitHandler( ctx context.Context, a APIClientDatabaseInterface, projectID, instanceID, region string, databaseID int64, ) *wait.AsyncActionHandler[v3alpha1api.GetDatabaseResponse] { handler := wait.New( func() (waitFinished bool, response *v3alpha1api.GetDatabaseResponse, err error) { if databaseID > math.MaxInt32 { return false, nil, fmt.Errorf("databaseID too large for int32") } dbId32 := int32(databaseID) //nolint:gosec // is checked above s, err := a.GetDatabaseRequest(ctx, projectID, region, instanceID, dbId32).Execute() if err != nil { var oapiErr *oapierror.GenericOpenAPIError ok := errors.As(err, &oapiErr) if !ok { return false, nil, fmt.Errorf("could not convert error to oapierror.GenericOpenAPIError") } switch oapiErr.StatusCode { case http.StatusBadGateway, http.StatusGatewayTimeout, http.StatusServiceUnavailable: tflog.Warn( ctx, "api responded with 50[2,3,4] status", map[string]interface{}{ "status": oapiErr.StatusCode, }, ) return false, nil, nil case http.StatusNotFound: tflog.Warn( ctx, "api responded with 404 status", map[string]interface{}{ "status": oapiErr.StatusCode, }, ) return false, nil, nil default: return false, nil, err } } return true, s, nil }, ) return handler } func DeleteInstanceWaitHandler( ctx context.Context, a APIClientInstanceInterface, projectID, region, instanceID string, timeout, sleepBeforeWait time.Duration, ) error { maxFailedCount := 3 failedCount := 0 handler := wait.New( func() (waitFinished bool, response *v3alpha1api.GetInstanceResponse, err error) { s, err := a.GetInstanceRequest(ctx, projectID, region, instanceID).Execute() if err != nil { oapiErr, ok := err.(*oapierror.GenericOpenAPIError) // nolint:errorlint //complaining that error.As should be used to catch wrapped errors, but this error should not be wrapped if !ok { return false, nil, fmt.Errorf("received error is no oapierror: %w", err) } if oapiErr.StatusCode == 404 { return true, nil, nil } return false, nil, fmt.Errorf("api returned error: %w", err) } switch s.Status { case InstanceStateDeleted: return true, nil, nil case InstanceStateEmpty, InstanceStatePending, InstanceStateUnknown, InstanceStateProgressing, InstanceStateSuccess: return false, nil, nil case InstanceStateFailed: if failedCount < maxFailedCount { failedCount++ tflog.Warn( ctx, "got failed status from API retry", map[string]interface{}{ "failedCount": failedCount, }, ) var waitCounter int64 = 1 maxWait := big.NewInt(7) n, err := rand.Int(rand.Reader, maxWait) if err == nil { waitCounter = n.Int64() + 1 } time.Sleep(time.Duration(waitCounter*30) * time.Second) //nolint:gosec // not that important and temporary return false, nil, nil } return true, nil, fmt.Errorf("wait handler got status FAILURE for instance: %s", instanceID) default: return true, s, fmt.Errorf("instance with id %s has unexpected status %s", instanceID, s.Status) } }, ). SetTimeout(timeout). SetSleepBeforeWait(sleepBeforeWait) _, err := handler.WaitWithContext(ctx) if err != nil { return err } return nil }