terraform-provider-stackitp.../docs/guides/ske_log_alerts.md
Alexander Dahmen 093fb0de2a
Update network_area and ske documentation and examples (#929)
Signed-off-by: Alexander Dahmen <alexander.dahmen@inovex.de>
2025-07-28 12:11:20 +02:00

6.1 KiB

page_title
SKE Log Alerts with STACKIT Observability

SKE Log Alerts with STACKIT Observability

Overview

This guide walks you through setting up log-based alerting in STACKIT Observability using Promtail to ship Kubernetes logs.

  1. Set Up Providers

    Begin by configuring the STACKIT and Kubernetes providers to connect to the STACKIT services.

    provider "stackit" {
      region = "eu01"
    }
    
    provider "kubernetes" {
      host                   = yamldecode(stackit_ske_kubeconfig.example.kube_config).clusters.0.cluster.server
      client_certificate     = base64decode(yamldecode(stackit_ske_kubeconfig.example.kube_config).users.0.user.client-certificate-data)
      client_key             = base64decode(yamldecode(stackit_ske_kubeconfig.example.kube_config).users.0.user.client-key-data)
      cluster_ca_certificate = base64decode(yamldecode(stackit_ske_kubeconfig.example.kube_config).clusters.0.cluster.certificate-authority-data)
    }
    
    provider "helm" {
      kubernetes {
        host                   = yamldecode(stackit_ske_kubeconfig.example.kube_config).clusters.0.cluster.server
        client_certificate     = base64decode(yamldecode(stackit_ske_kubeconfig.example.kube_config).users.0.user.client-certificate-data)
        client_key             = base64decode(yamldecode(stackit_ske_kubeconfig.example.kube_config).users.0.user.client-key-data)
        cluster_ca_certificate = base64decode(yamldecode(stackit_ske_kubeconfig.example.kube_config).clusters.0.cluster.certificate-authority-data)
      }
    }
    
  2. Create SKE Cluster and Kubeconfig Resource

    Set up a STACKIT SKE Cluster and generate the associated kubeconfig resource.

    resource "stackit_ske_cluster" "example" {
      project_id             = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
      name                   = "example"
      kubernetes_version_min = "1.31"
      node_pools = [
        {
          name               = "standard"
          machine_type       = "c1.4"
          minimum            = "3"
          maximum            = "9"
          max_surge          = "3"
          availability_zones = ["eu01-1", "eu01-2", "eu01-3"]
          os_version_min     = "4081.2.1"
          os_name            = "flatcar"
          volume_size        = 32
          volume_type        = "storage_premium_perf6"
        }
      ]
      maintenance = {
        enable_kubernetes_version_updates    = true
        enable_machine_image_version_updates = true
        start                                = "01:00:00Z"
        end                                  = "02:00:00Z"
      }
    }
    
    resource "stackit_ske_kubeconfig" "example" {
      project_id   = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
      cluster_name = stackit_ske_cluster.example.name
      refresh      = true
    }
    
  3. Create Observability Instance and Credentials

    Establish a STACKIT Observability instance and its credentials to handle alerts.

    locals {
      alert_config = {
        route = {
          receiver        = "EmailStackit",
          repeat_interval = "1m",
          continue        = true
        }
        receivers = [
          {
            name = "EmailStackit",
            email_configs = [
              {
                to = "<email>"
              }
            ]
          }
        ]
      }
    }
    
    resource "stackit_observability_instance" "example" {
      project_id = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
      name       = "example"
      plan_name  = "Observability-Large-EU01"
      alert_config = local.alert_config
    }
    
    resource "stackit_observability_credential" "example" {
      project_id  = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
      instance_id = stackit_observability_instance.example.instance_id
    }
    
  4. Install Promtail

    Deploy Promtail via Helm to collect logs and forward them to the STACKIT Observability Loki endpoint.

    resource "helm_release" "promtail" {
      name       = "promtail"
      repository = "https://grafana.github.io/helm-charts"
      chart      = "promtail"
      namespace  = kubernetes_namespace.monitoring.metadata.0.name
      version    = "6.16.4"
    
      values = [
        <<-EOF
        config:
          clients:
          # To find the Loki push URL, navigate to the observability instance in the portal and select the API tab.
          - url: "https://${stackit_observability_credential.example.username}:${stackit_observability_credential.example.password}@<your-loki-push-url>/instances/${stackit_observability_instance.example.instance_id}/loki/api/v1/push"
        EOF
      ]
    }
    
  5. Create Alert Group

    Create a log alert that triggers when a specific pod logs an error message.

    resource "stackit_observability_logalertgroup" "example" {
      project_id  = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
      instance_id = stackit_observability_instance.example.instance_id
      name        = "TestLogAlertGroup"
      interval    = "1m"
      rules = [
        {
          alert      = "SimplePodLogAlertCheck"
          expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Simulated error message\" [1m])) > 0"
          for        = "60s"
          labels = {
            severity = "critical"
          },
          annotations = {
            summary : "Test Log Alert is working"
            description : "Test Log Alert"
          },
        },
      ]
    }
    
  6. Deploy Test Pod

    Launch a pod that emits simulated error logs. This should trigger the alert if everything is set up correctly.

    resource "kubernetes_namespace" "example" {
      metadata {
        name = "example"
      }
    }
    
    resource "kubernetes_pod" "logger" {
      metadata {
        name      = "logger"
        namespace = kubernetes_namespace.example.metadata[0].name
        labels = {
          app = "logger"
        }
      }
    
      spec {
        container {
          name  = "logger"
          image = "bash"
          command = [
            "bash",
            "-c",
            <<EOF
            while true; do
              sleep $(shuf -i 1-3 -n 1)
              echo "ERROR: $(date) - Simulated error message $(shuf -i 1-100 -n 1)" 1>&2
            done
            EOF
          ]
        }
      }
    }