feat: more_tests (#85)

## Description  relates to #1234 ## Checklist - [ ] Issue was linked above - [ ] Code format was applied: `make fmt` - [ ] Examples were added / adjusted (see `examples/` directory) - [x] Docs are up-to-date: `make generate-docs` (will be checked by CI) - [ ] Unit tests got implemented or updated - [ ] Acceptance tests got implemented or updated (see e.g. [here](f5f99d1709/stackit/internal/services/dns/dns_acc_test.go)) - [x] Unit tests are passing: `make test` (will be checked by CI) - [x] No linter issues: `make lint` (will be checked by CI) Co-authored-by: Marcel S. Henselin <marcel.henselin@stackit.cloud> Reviewed-on: #85
2026-03-17 15:02:08 +00:00 · 2026-03-17 15:02:08 +00:00 · dd77da71dd
commit dd77da71dd
parent 3790894563
37 changed files with 2473 additions and 1742 deletions
--- a/stackit/internal/wait/postgresflexalpha/wait.go
+++ b/stackit/internal/wait/postgresflexalpha/wait.go
@ -2,9 +2,11 @@ package postgresflexalpha

 import (
 	"context"
+	"crypto/rand"
 	"errors"
 	"fmt"
 	"math"
+	"math/big"
 	"net/http"
 	"time"

@ -29,45 +31,47 @@ const (

 // APIClientInstanceInterface Interface needed for tests
 type APIClientInstanceInterface interface {
-	GetInstanceRequest(ctx context.Context, projectId, region, instanceId string) v3alpha1api.ApiGetInstanceRequestRequest
+	GetInstanceRequest(ctx context.Context, projectID, region, instanceID string) v3alpha1api.ApiGetInstanceRequestRequest

 	ListUsersRequest(
 		ctx context.Context,
-		projectId string,
+		projectID string,
 		region string,
-		instanceId string,
+		instanceID string,
 	) v3alpha1api.ApiListUsersRequestRequest
 }

 // APIClientUserInterface Interface needed for tests
 type APIClientUserInterface interface {
-	GetUserRequest(ctx context.Context, projectId, region, instanceId string, userId int32) v3alpha1api.ApiGetUserRequestRequest
+	GetUserRequest(ctx context.Context, projectID, region, instanceID string, userID int32) v3alpha1api.ApiGetUserRequestRequest
 }

 // APIClientDatabaseInterface Interface needed for tests
 type APIClientDatabaseInterface interface {
-	GetDatabaseRequest(ctx context.Context, projectId string, region string, instanceId string, databaseId int32) v3alpha1api.ApiGetDatabaseRequestRequest
+	GetDatabaseRequest(ctx context.Context, projectID string, region string, instanceID string, databaseID int32) v3alpha1api.ApiGetDatabaseRequestRequest
 }

 // CreateInstanceWaitHandler will wait for instance creation
 func CreateInstanceWaitHandler(
-	ctx context.Context, a APIClientInstanceInterface, projectId, region,
-	instanceId string,
+	ctx context.Context, a APIClientInstanceInterface, projectID, region,
+	instanceID string,
 ) *wait.AsyncActionHandler[v3alpha1api.GetInstanceResponse] {
 	instanceCreated := false
 	var instanceGetResponse *v3alpha1api.GetInstanceResponse
 	maxWait := time.Minute * 45
 	startTime := time.Now()
 	extendedTimeout := 0
+	maxFailedCount := 3
+	failedCount := 0

 	handler := wait.New(
 		func() (waitFinished bool, response *v3alpha1api.GetInstanceResponse, err error) {
 			if !instanceCreated {
-				s, err := a.GetInstanceRequest(ctx, projectId, region, instanceId).Execute()
-				if err != nil {
-					return false, nil, err
+				s, getErr := a.GetInstanceRequest(ctx, projectID, region, instanceID).Execute()
+				if getErr != nil {
+					return false, nil, getErr
 				}
-				if s == nil || s.Id != instanceId {
+				if s == nil || s.Id != instanceID {
 					return false, nil, nil
 				}
 				tflog.Debug(
@ -77,7 +81,7 @@ func CreateInstanceWaitHandler(
 				)
 				switch s.Status {
 				default:
-					return true, s, fmt.Errorf("instance with id %s has unexpected status %s", instanceId, s.Status)
+					return true, s, fmt.Errorf("instance with id %s has unexpected status %s", instanceID, s.Status)
 				case InstanceStateEmpty:
 					return false, nil, nil
 				case InstanceStatePending:
@ -94,30 +98,15 @@ func CreateInstanceWaitHandler(
 							"Wait handler still got status %s after %v for instance: %s",
 							InstanceStateProgressing,
 							maxWait,
-							instanceId,
+							instanceID,
 						),
 					)
 					if extendedTimeout < 3 {
 						maxWait += time.Minute * 5
 						extendedTimeout++
-						if *s.Network.AccessScope == "SNA" {
-							ready := true
-							if s.Network.InstanceAddress == nil {
-								tflog.Warn(ctx, "Waiting for instance_address")
-								ready = false
-							}
-							if s.Network.RouterAddress == nil {
-								tflog.Warn(ctx, "Waiting for router_address")
-								ready = false
-							}
-							if !ready {
-								return false, nil, nil
-							}
-						}
+						return false, nil, nil
 					}
-
-					instanceCreated = true
-					instanceGetResponse = s
+					return false, nil, fmt.Errorf("instance after max timeout still in state %s", InstanceStateProgressing)
 				case InstanceStateSuccess:
 					if s.Network.AccessScope != nil && *s.Network.AccessScope == "SNA" {
 						if s.Network.InstanceAddress == nil {
@ -132,8 +121,27 @@ func CreateInstanceWaitHandler(
 					instanceCreated = true
 					instanceGetResponse = s
 				case InstanceStateFailed:
-					tflog.Warn(ctx, fmt.Sprintf("Wait handler got status FAILURE for instance: %s", instanceId))
-					return false, nil, nil
+					if failedCount < maxFailedCount {
+						failedCount++
+						tflog.Warn(
+							ctx, "got failed status from API retry", map[string]interface{}{
+								"failedCount": failedCount,
+							},
+						)
+						var waitCounter int64 = 1
+						maxWaitInt := big.NewInt(7)
+						n, randErr := rand.Int(rand.Reader, maxWaitInt)
+						if randErr == nil {
+							waitCounter = n.Int64() + 1
+						}
+						time.Sleep(time.Duration(waitCounter*30) * time.Second) //nolint:gosec // not that important and temporary
+						return false, nil, nil
+					}
+					return true, s, fmt.Errorf(
+						"update got status FAILURE for instance with id %s after %d retries",
+						instanceID,
+						failedCount,
+					)
 					// API responds with FAILURE for some seconds and then the instance goes to READY
 					// return true, s, fmt.Errorf("create failed for instance with id %s", instanceId)
 				}
@ -142,7 +150,7 @@ func CreateInstanceWaitHandler(
 			tflog.Info(ctx, "Waiting for instance (calling list users")
 			// 	// User operations aren't available right after an instance is deemed successful
 			//			// To check if they are, perform a users request
-			_, err = a.ListUsersRequest(ctx, projectId, region, instanceId).Execute()
+			_, err = a.ListUsersRequest(ctx, projectID, region, instanceID).Execute()
 			if err == nil {
 				return true, instanceGetResponse, nil
 			}
@ -150,7 +158,7 @@ func CreateInstanceWaitHandler(
 			if !ok {
 				return false, nil, err
 			}
-			// TODO: refactor and cooperate with api guys to mitigate
+			// TODO: refactor and cooperate with api guys to mitigate  // nolint: // reason upfront
 			if oapiErr.StatusCode < 500 {
 				return true, instanceGetResponse, fmt.Errorf(
 					"users request after instance creation returned %d status code",
@ -160,8 +168,6 @@ func CreateInstanceWaitHandler(
 			return false, nil, nil
 		},
 	)
-	// Sleep before wait is set because sometimes API returns 404 right after creation request
-	handler.SetTimeout(90 * time.Minute).SetSleepBeforeWait(30 * time.Second)
 	return handler
 }

@ -170,6 +176,8 @@ func PartialUpdateInstanceWaitHandler(
 	ctx context.Context, a APIClientInstanceInterface, projectID, region,
 	instanceID string,
 ) *wait.AsyncActionHandler[v3alpha1api.GetInstanceResponse] {
+	maxFailedCount := 3
+	failedCount := 0
 	handler := wait.New(
 		func() (waitFinished bool, response *v3alpha1api.GetInstanceResponse, err error) {
 			s, err := a.GetInstanceRequest(ctx, projectID, region, instanceID).Execute()
@ -195,11 +203,30 @@ func PartialUpdateInstanceWaitHandler(
 			case InstanceStateUnknown:
 				return false, nil, nil
 			case InstanceStateFailed:
-				return true, s, fmt.Errorf("update got status FAILURE for instance with id %s", instanceID)
+				if failedCount < maxFailedCount {
+					failedCount++
+					tflog.Warn(
+						ctx, "got failed status from API retry", map[string]interface{}{
+							"failedCount": failedCount,
+						},
+					)
+					var waitCounter int64 = 1
+					maxWait := big.NewInt(7)
+					n, err := rand.Int(rand.Reader, maxWait)
+					if err == nil {
+						waitCounter = n.Int64() + 1
+					}
+					time.Sleep(time.Duration(waitCounter*30) * time.Second) //nolint:gosec // not that important and temporary
+					return false, nil, nil
+				}
+				return true, s, fmt.Errorf(
+					"update got status FAILURE for instance with id %s after %d retries",
+					instanceID,
+					failedCount,
+				)
 			}
 		},
 	)
-	handler.SetTimeout(45 * time.Minute).SetSleepBeforeWait(30 * time.Second)
 	return handler
 }

@ -295,6 +322,8 @@ func DeleteInstanceWaitHandler(
 	instanceID string,
 	timeout, sleepBeforeWait time.Duration,
 ) error {
+	maxFailedCount := 3
+	failedCount := 0
 	handler := wait.New(
 		func() (waitFinished bool, response *v3alpha1api.GetInstanceResponse, err error) {
 			s, err := a.GetInstanceRequest(ctx, projectID, region, instanceID).Execute()
@ -314,6 +343,22 @@ func DeleteInstanceWaitHandler(
 			case InstanceStateEmpty, InstanceStatePending, InstanceStateUnknown, InstanceStateProgressing, InstanceStateSuccess:
 				return false, nil, nil
 			case InstanceStateFailed:
+				if failedCount < maxFailedCount {
+					failedCount++
+					tflog.Warn(
+						ctx, "got failed status from API retry", map[string]interface{}{
+							"failedCount": failedCount,
+						},
+					)
+					var waitCounter int64 = 1
+					maxWait := big.NewInt(7)
+					n, err := rand.Int(rand.Reader, maxWait)
+					if err == nil {
+						waitCounter = n.Int64() + 1
+					}
+					time.Sleep(time.Duration(waitCounter*30) * time.Second) //nolint:gosec // not that important and temporary
+					return false, nil, nil
+				}
 				return true, nil, fmt.Errorf("wait handler got status FAILURE for instance: %s", instanceID)
 			default:
 				return true, s, fmt.Errorf("instance with id %s has unexpected status %s", instanceID, s.Status)