From f1292caf1f8f909265f9ea0aeb4ffcdec35ec9eb Mon Sep 17 00:00:00 2001
From: Oded David <oded@coralogix.com>
Date: Wed, 3 Dec 2025 12:57:32 +0200
Subject: [PATCH 1/2] feat(helm): add ServiceMonitor and PrometheusRule support

- Add serviceMonitor configuration to values.yaml with interval and scrapeTimeout options
- Add prometheusRule configuration with RDSControllerSyncErrors alert (severity: critical)
- Create service-monitor.yaml template for Prometheus Operator ServiceMonitor
- Create prometheus-rule.yaml template for Prometheus Operator PrometheusRule
- Update values.schema.json with schema definitions for new options
---
 helm/templates/prometheus-rule.yaml | 25 ++++++++++++++
 helm/templates/service-monitor.yaml | 36 ++++++++++++++++++++
 helm/values.schema.json             | 53 +++++++++++++++++++++++++++++
 helm/values.yaml                    | 26 ++++++++++++++
 4 files changed, 140 insertions(+)
 create mode 100644 helm/templates/prometheus-rule.yaml
 create mode 100644 helm/templates/service-monitor.yaml

diff --git a/helm/templates/prometheus-rule.yaml b/helm/templates/prometheus-rule.yaml
new file mode 100644
index 0000000..9fd6010
--- /dev/null
+++ b/helm/templates/prometheus-rule.yaml
@@ -0,0 +1,25 @@
+{{- if .Values.metrics.prometheusRule.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: {{ .Chart.Name | trimSuffix "-chart" | trunc 44 }}-controller-rules
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app.kubernetes.io/name: {{ include "ack-rds-controller.app.name" . }}
+    app.kubernetes.io/instance: {{ .Release.Name }}
+    app.kubernetes.io/managed-by: Helm
+    app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+    k8s-app: {{ include "ack-rds-controller.app.name" . }}
+    helm.sh/chart: {{ include "ack-rds-controller.chart.name-version" . }}
+{{- with .Values.metrics.prometheusRule.additionalLabels }}
+{{ toYaml . | indent 4 }}
+{{- end }}
+spec:
+  groups:
+    - name: {{ include "ack-rds-controller.app.name" . }}
+      rules:
+{{- with .Values.metrics.prometheusRule.rules }}
+{{ toYaml . | indent 8 }}
+{{- end }}
+{{- end }}
+
diff --git a/helm/templates/service-monitor.yaml b/helm/templates/service-monitor.yaml
new file mode 100644
index 0000000..4c82a5a
--- /dev/null
+++ b/helm/templates/service-monitor.yaml
@@ -0,0 +1,36 @@
+{{- if .Values.metrics.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ .Chart.Name | trimSuffix "-chart" | trunc 44 }}-controller-metrics
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app.kubernetes.io/name: {{ include "ack-rds-controller.app.name" . }}
+    app.kubernetes.io/instance: {{ .Release.Name }}
+    app.kubernetes.io/managed-by: Helm
+    app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+    k8s-app: {{ include "ack-rds-controller.app.name" . }}
+    helm.sh/chart: {{ include "ack-rds-controller.chart.name-version" . }}
+{{- with .Values.metrics.serviceMonitor.additionalLabels }}
+{{ toYaml . | indent 4 }}
+{{- end }}
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: {{ include "ack-rds-controller.app.name" . }}
+      app.kubernetes.io/instance: {{ .Release.Name }}
+      app.kubernetes.io/managed-by: Helm
+      k8s-app: {{ include "ack-rds-controller.app.name" . }}
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  endpoints:
+    - port: metricsport
+      {{- with .Values.metrics.serviceMonitor.interval }}
+      interval: {{ . }}
+      {{- end }}
+      {{- with .Values.metrics.serviceMonitor.scrapeTimeout }}
+      scrapeTimeout: {{ . }}
+      {{- end }}
+{{- end }}
+
diff --git a/helm/values.schema.json b/helm/values.schema.json
index c3f56a0..e186962 100644
--- a/helm/values.schema.json
+++ b/helm/values.schema.json
@@ -104,6 +104,59 @@
               "type"
           ],
           "type": "object"
+        },
+        "serviceMonitor": {
+          "description": "Prometheus ServiceMonitor settings",
+          "properties": {
+            "enabled": {
+              "type": "boolean"
+            },
+            "additionalLabels": {
+              "type": "object"
+            },
+            "interval": {
+              "type": "string"
+            },
+            "scrapeTimeout": {
+              "type": "string"
+            }
+          },
+          "type": "object"
+        },
+        "prometheusRule": {
+          "description": "Prometheus PrometheusRule settings",
+          "properties": {
+            "enabled": {
+              "type": "boolean"
+            },
+            "additionalLabels": {
+              "type": "object"
+            },
+            "rules": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "alert": {
+                    "type": "string"
+                  },
+                  "expr": {
+                    "type": "string"
+                  },
+                  "for": {
+                    "type": "string"
+                  },
+                  "labels": {
+                    "type": "object"
+                  },
+                  "annotations": {
+                    "type": "object"
+                  }
+                }
+              }
+            }
+          },
+          "type": "object"
         }
       },
       "required": [
diff --git a/helm/values.yaml b/helm/values.yaml
index 8f2aea3..994e73c 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -77,6 +77,32 @@ metrics:
     # Which Type to use for the Kubernetes Service?
     # See: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
     type: "ClusterIP"
+  serviceMonitor:
+    # Set to true to automatically create a Prometheus ServiceMonitor resource
+    # Requires the Prometheus Operator CRDs to be installed
+    enabled: false
+    # Additional labels for the ServiceMonitor (e.g., for Prometheus selector)
+    additionalLabels: {}
+    # Scrape interval
+    interval: 30s
+    # Scrape timeout
+    scrapeTimeout: 10s
+  prometheusRule:
+    # Set to true to automatically create a Prometheus PrometheusRule resource
+    # Requires the Prometheus Operator CRDs to be installed
+    enabled: false
+    # Additional labels for the PrometheusRule (e.g., for Prometheus selector)
+    additionalLabels: {}
+    # Alert rules (can be overridden or extended)
+    rules:
+      - alert: RDSControllerSyncErrors
+        expr: sum by (controller) (rate(controller_runtime_reconcile_errors_total{job="rds-controller-controller-metrics"}[10m])) > 0.5
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          description: RDS controller having sync errors in the last 10 minutes for controller {{ $labels.controller }}
+          summary: RDS controller having sync errors with one or more objects
 
 resources:
   requests:

From 8010ab549495097b30abc62c9d16488a604fcae3 Mon Sep 17 00:00:00 2001
From: Oded David <oded@coralogix.com>
Date: Wed, 3 Dec 2025 12:59:22 +0200
Subject: [PATCH 2/2] fix(helm): correct job label in PrometheusRule alert
 expression

The alert expression was referencing job='rds-controller-controller-metrics'
but the ServiceMonitor generates job='rds-controller-metrics'. Fixed the
mismatch so the alert will correctly match scraped metrics.
---
 helm/values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/helm/values.yaml b/helm/values.yaml
index 994e73c..fef14bc 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -96,7 +96,7 @@ metrics:
     # Alert rules (can be overridden or extended)
     rules:
       - alert: RDSControllerSyncErrors
-        expr: sum by (controller) (rate(controller_runtime_reconcile_errors_total{job="rds-controller-controller-metrics"}[10m])) > 0.5
+        expr: sum by (controller) (rate(controller_runtime_reconcile_errors_total{job="rds-controller-metrics"}[10m])) > 0.5
         for: 5m
         labels:
           severity: critical