diff --git a/apis/placement/v1beta1/metriccollector_types.go b/apis/placement/v1beta1/metriccollector_types.go new file mode 100644 index 000000000..f6aa0d3e9 --- /dev/null +++ b/apis/placement/v1beta1/metriccollector_types.go @@ -0,0 +1,151 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubefleet-dev/kubefleet/apis" +) + +// +genclient +// +genclient:nonNamespaced +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Cluster",shortName=mc,categories={fleet,fleet-metrics} +// +kubebuilder:subresource:status +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:JSONPath=`.metadata.generation`,name="Gen",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="MetricCollectorReady")].status`,name="Ready",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.workloadsMonitored`,name="Workloads",type=integer +// +kubebuilder:printcolumn:JSONPath=`.status.lastCollectionTime`,name="Last-Collection",type=date +// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date + +// MetricCollector is used by member-agent to scrape and collect metrics from workloads +// running on the member cluster. It runs on each member cluster and collects metrics +// from Prometheus-compatible endpoints. +type MetricCollector struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // The desired state of MetricCollector. + // +required + Spec MetricCollectorSpec `json:"spec"` + + // The observed status of MetricCollector. + // +optional + Status MetricCollectorStatus `json:"status,omitempty"` +} + +// MetricCollectorSpec defines the desired state of MetricCollector. +type MetricCollectorSpec struct { + // PrometheusURL is the URL of the Prometheus server. + // Example: http://prometheus.test-ns.svc.cluster.local:9090 + // +required + // +kubebuilder:validation:Pattern=`^https?://.*$` + PrometheusURL string `json:"prometheusUrl"` + + // ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + // This should be the fleet-member-{clusterName} namespace. + // Example: fleet-member-cluster-1 + // +required + ReportNamespace string `json:"reportNamespace"` +} + +// MetricsEndpointSpec defines how to access the metrics endpoint.ctor. +type MetricCollectorStatus struct { + // Conditions is an array of current observed conditions. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration is the generation most recently observed. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // WorkloadsMonitored is the count of workloads being monitored. + // +optional + WorkloadsMonitored int32 `json:"workloadsMonitored,omitempty"` + + // LastCollectionTime is when metrics were last collected. + // +optional + LastCollectionTime *metav1.Time `json:"lastCollectionTime,omitempty"` + + // CollectedMetrics contains the most recent metrics from each workload. + // +optional + CollectedMetrics []WorkloadMetrics `json:"collectedMetrics,omitempty"` +} + +// WorkloadMetrics represents metrics collected from a single workload pod. +type WorkloadMetrics struct { + // Namespace is the namespace of the pod. + // +required + Namespace string `json:"namespace"` + + // ClusterName from the workload_health metric label. + // +required + ClusterName string `json:"clusterName"` + + // WorkloadName from the workload_health metric label (typically the deployment name). + // +required + WorkloadName string `json:"workloadName"` + + // Health indicates if the workload is healthy (true=healthy, false=unhealthy). + // +required + Health bool `json:"health"` +} + +const ( + // MetricCollectorConditionTypeReady indicates the collector is ready. + MetricCollectorConditionTypeReady string = "MetricCollectorReady" + + // MetricCollectorConditionTypeCollecting indicates metrics are being collected. + MetricCollectorConditionTypeCollecting string = "MetricsCollecting" + + // MetricCollectorConditionTypeReported indicates metrics were successfully reported to hub. + MetricCollectorConditionTypeReported string = "MetricsReported" +) + +// +kubebuilder:object:root=true + +// MetricCollectorList contains a list of MetricCollector. +type MetricCollectorList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetricCollector `json:"items"` +} + +// GetConditions returns the conditions of the MetricCollector. +func (m *MetricCollector) GetConditions() []metav1.Condition { + return m.Status.Conditions +} + +// SetConditions sets the conditions of the MetricCollector. +func (m *MetricCollector) SetConditions(conditions ...metav1.Condition) { + m.Status.Conditions = conditions +} + +// GetCondition returns the condition of the given MetricCollector. +func (m *MetricCollector) GetCondition(conditionType string) *metav1.Condition { + return meta.FindStatusCondition(m.Status.Conditions, conditionType) +} + +// Ensure MetricCollector implements the ConditionedObj interface. +var _ apis.ConditionedObj = &MetricCollector{} + +func init() { + SchemeBuilder.Register(&MetricCollector{}, &MetricCollectorList{}) +} diff --git a/apis/placement/v1beta1/metriccollectorreport_types.go b/apis/placement/v1beta1/metriccollectorreport_types.go new file mode 100644 index 000000000..209da838a --- /dev/null +++ b/apis/placement/v1beta1/metriccollectorreport_types.go @@ -0,0 +1,86 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Namespaced",shortName=mcr,categories={fleet,fleet-metrics} +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:JSONPath=`.workloadsMonitored`,name="Workloads",type=integer +// +kubebuilder:printcolumn:JSONPath=`.lastCollectionTime`,name="Last-Collection",type=date +// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date + +// MetricCollectorReport is created by the MetricCollector controller on the hub cluster +// in the fleet-member-{clusterName} namespace to report collected metrics from a member cluster. +// The controller watches MetricCollector objects on the member cluster, collects metrics, +// and syncs the status to the hub as MetricCollectorReport objects. +// +// Controller workflow: +// 1. MetricCollector reconciles and collects metrics on member cluster +// 2. Metrics include clusterName from workload_health labels +// 3. Controller creates/updates MetricCollectorReport in fleet-member-{clusterName} namespace on hub +// 4. Report name matches MetricCollector name for easy lookup +// +// Namespace: fleet-member-{clusterName} (extracted from CollectedMetrics[0].ClusterName) +// Name: Same as MetricCollector name +// All metrics in CollectedMetrics are guaranteed to have the same ClusterName. +type MetricCollectorReport struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Conditions copied from the MetricCollector status. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration is the generation most recently observed from the MetricCollector. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // WorkloadsMonitored is the count of workloads being monitored. + // +optional + WorkloadsMonitored int32 `json:"workloadsMonitored,omitempty"` + + // LastCollectionTime is when metrics were last collected on the member cluster. + // +optional + LastCollectionTime *metav1.Time `json:"lastCollectionTime,omitempty"` + + // CollectedMetrics contains the most recent metrics from each workload. + // All metrics are guaranteed to have the same ClusterName since they're collected from one member cluster. + // +optional + CollectedMetrics []WorkloadMetrics `json:"collectedMetrics,omitempty"` + + // LastReportTime is when this report was last synced to the hub. + // +optional + LastReportTime *metav1.Time `json:"lastReportTime,omitempty"` +} + +// +kubebuilder:object:root=true + +// MetricCollectorReportList contains a list of MetricCollectorReport. +type MetricCollectorReportList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetricCollectorReport `json:"items"` +} + +func init() { + SchemeBuilder.Register(&MetricCollectorReport{}, &MetricCollectorReportList{}) +} diff --git a/apis/placement/v1beta1/workloadtracker_types.go b/apis/placement/v1beta1/workloadtracker_types.go new file mode 100644 index 000000000..62437183b --- /dev/null +++ b/apis/placement/v1beta1/workloadtracker_types.go @@ -0,0 +1,63 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// WorkloadReference represents a workload to be tracked +type WorkloadReference struct { + // Name is the name of the workload + // +required + Name string `json:"name"` + + // Namespace is the namespace of the workload + // +required + Namespace string `json:"namespace"` +} + +// +genclient +// +genclient:nonNamespaced +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Cluster",categories={fleet,fleet-placement} +// +kubebuilder:storageversion +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// WorkloadTracker expresses user intent to track certain workloads +type WorkloadTracker struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Workloads is a list of workloads to track + // +optional + Workloads []WorkloadReference `json:"workloads,omitempty"` +} + +// +kubebuilder:object:root=true +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// WorkloadTrackerList contains a list of WorkloadTracker +type WorkloadTrackerList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []WorkloadTracker `json:"items"` +} + +func init() { + SchemeBuilder.Register(&WorkloadTracker{}, &WorkloadTrackerList{}) +} diff --git a/apis/placement/v1beta1/zz_generated.deepcopy.go b/apis/placement/v1beta1/zz_generated.deepcopy.go index b9ff2e710..2086a2189 100644 --- a/apis/placement/v1beta1/zz_generated.deepcopy.go +++ b/apis/placement/v1beta1/zz_generated.deepcopy.go @@ -21,7 +21,7 @@ limitations under the License. package v1beta1 import ( - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -1483,6 +1483,188 @@ func (in *ManifestCondition) DeepCopy() *ManifestCondition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollector) DeepCopyInto(out *MetricCollector) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollector. +func (in *MetricCollector) DeepCopy() *MetricCollector { + if in == nil { + return nil + } + out := new(MetricCollector) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollector) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorList) DeepCopyInto(out *MetricCollectorList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetricCollector, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorList. +func (in *MetricCollectorList) DeepCopy() *MetricCollectorList { + if in == nil { + return nil + } + out := new(MetricCollectorList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorReport) DeepCopyInto(out *MetricCollectorReport) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastCollectionTime != nil { + in, out := &in.LastCollectionTime, &out.LastCollectionTime + *out = (*in).DeepCopy() + } + if in.CollectedMetrics != nil { + in, out := &in.CollectedMetrics, &out.CollectedMetrics + *out = make([]WorkloadMetrics, len(*in)) + copy(*out, *in) + } + if in.LastReportTime != nil { + in, out := &in.LastReportTime, &out.LastReportTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorReport. +func (in *MetricCollectorReport) DeepCopy() *MetricCollectorReport { + if in == nil { + return nil + } + out := new(MetricCollectorReport) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorReport) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorReportList) DeepCopyInto(out *MetricCollectorReportList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetricCollectorReport, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorReportList. +func (in *MetricCollectorReportList) DeepCopy() *MetricCollectorReportList { + if in == nil { + return nil + } + out := new(MetricCollectorReportList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorReportList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorSpec) DeepCopyInto(out *MetricCollectorSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorSpec. +func (in *MetricCollectorSpec) DeepCopy() *MetricCollectorSpec { + if in == nil { + return nil + } + out := new(MetricCollectorSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorStatus) DeepCopyInto(out *MetricCollectorStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastCollectionTime != nil { + in, out := &in.LastCollectionTime, &out.LastCollectionTime + *out = (*in).DeepCopy() + } + if in.CollectedMetrics != nil { + in, out := &in.CollectedMetrics, &out.CollectedMetrics + *out = make([]WorkloadMetrics, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorStatus. +func (in *MetricCollectorStatus) DeepCopy() *MetricCollectorStatus { + if in == nil { + return nil + } + out := new(MetricCollectorStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NamespacedName) DeepCopyInto(out *NamespacedName) { *out = *in @@ -3185,6 +3367,36 @@ func (in *WorkStatus) DeepCopy() *WorkStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadMetrics) DeepCopyInto(out *WorkloadMetrics) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadMetrics. +func (in *WorkloadMetrics) DeepCopy() *WorkloadMetrics { + if in == nil { + return nil + } + out := new(WorkloadMetrics) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadReference) DeepCopyInto(out *WorkloadReference) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadReference. +func (in *WorkloadReference) DeepCopy() *WorkloadReference { + if in == nil { + return nil + } + out := new(WorkloadReference) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkloadTemplate) DeepCopyInto(out *WorkloadTemplate) { *out = *in @@ -3206,3 +3418,65 @@ func (in *WorkloadTemplate) DeepCopy() *WorkloadTemplate { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadTracker) DeepCopyInto(out *WorkloadTracker) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Workloads != nil { + in, out := &in.Workloads, &out.Workloads + *out = make([]WorkloadReference, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadTracker. +func (in *WorkloadTracker) DeepCopy() *WorkloadTracker { + if in == nil { + return nil + } + out := new(WorkloadTracker) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *WorkloadTracker) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadTrackerList) DeepCopyInto(out *WorkloadTrackerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]WorkloadTracker, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadTrackerList. +func (in *WorkloadTrackerList) DeepCopy() *WorkloadTrackerList { + if in == nil { + return nil + } + out := new(WorkloadTrackerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *WorkloadTrackerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} diff --git a/approval-request-controller/Makefile b/approval-request-controller/Makefile new file mode 100644 index 000000000..9bfa96195 --- /dev/null +++ b/approval-request-controller/Makefile @@ -0,0 +1,78 @@ +# Makefile for ApprovalRequest Controller + +# Image settings +IMAGE_NAME ?= approval-request-controller +IMAGE_TAG ?= latest +REGISTRY ?= + +# Build settings +GOARCH ?= amd64 +GOOS ?= linux + +# Tools +CONTROLLER_GEN_VERSION ?= v0.16.0 +CONTROLLER_GEN = go run sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_GEN_VERSION) + +.PHONY: help +help: ## Display this help + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Code Generation + +.PHONY: manifests +manifests: ## Generate CRD manifests + $(CONTROLLER_GEN) crd paths="./apis/..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: ## Generate DeepCopy code + $(CONTROLLER_GEN) object:headerFile="../hack/boilerplate.go.txt" paths="./apis/..." + +##@ Build + +.PHONY: docker-build +docker-build: ## Build docker image + docker buildx build \ + --file docker/Dockerfile \ + --output=type=docker \ + --platform=linux/$(GOARCH) \ + --build-arg GOARCH=$(GOARCH) \ + --tag $(IMAGE_NAME):$(IMAGE_TAG) \ + --build-context kubefleet=.. \ + .. + +.PHONY: docker-push +docker-push: ## Push docker image + docker push $(REGISTRY)$(IMAGE_NAME):$(IMAGE_TAG) + +##@ Development + +.PHONY: run +run: ## Run controller locally + cd .. && go run ./approval-request-controller/cmd/approvalrequestcontroller/main.go + +##@ Deployment + +.PHONY: install +install: ## Install helm chart + helm install approval-request-controller ./charts/approval-request-controller \ + --namespace fleet-system \ + --create-namespace \ + --set image.repository=$(IMAGE_NAME) \ + --set image.tag=$(IMAGE_TAG) + +.PHONY: upgrade +upgrade: ## Upgrade helm chart + helm upgrade approval-request-controller ./charts/approval-request-controller \ + --namespace fleet-system \ + --set image.repository=$(IMAGE_NAME) \ + --set image.tag=$(IMAGE_TAG) + +.PHONY: uninstall +uninstall: ## Uninstall helm chart + helm uninstall approval-request-controller --namespace fleet-system + +##@ Kind + +.PHONY: kind-load +kind-load: docker-build ## Build and load image into kind cluster + kind load docker-image $(IMAGE_NAME):$(IMAGE_TAG) --name hub diff --git a/approval-request-controller/README.md b/approval-request-controller/README.md new file mode 100644 index 000000000..c192faead --- /dev/null +++ b/approval-request-controller/README.md @@ -0,0 +1,78 @@ +# ApprovalRequest Controller + +The ApprovalRequest Controller is a standalone controller that runs on the **hub cluster** to automate approval decisions for staged updates based on workload health metrics. + +## Overview + +This controller is designed to be a standalone component that can run independently from the main kubefleet repository. It: +- Uses kubefleet v0.1.2 as an external dependency +- Includes its own APIs for MetricCollectorReport and WorkloadTracker +- Watches `ApprovalRequest` and `ClusterApprovalRequest` resources (from kubefleet) +- Creates `MetricCollector` resources on member clusters via ClusterResourcePlacement +- Monitors workload health via `MetricCollectorReport` objects +- Automatically approves requests when all tracked workloads are healthy +- Runs every 15 seconds to check health status + +## Architecture + +The controller is designed to run on the hub cluster and: +1. Deploys MetricCollector instances to member clusters using CRP +2. Collects health metrics from MetricCollectorReports +3. Compares metrics against WorkloadTracker specifications +4. Approves ApprovalRequests when all workloads are healthy + +## Installation + +### Prerequisites + +The following CRDs must be installed on the hub cluster (installed by kubefleet hub-agent): +- `approvalrequests.placement.kubernetes-fleet.io` +- `clusterapprovalrequests.placement.kubernetes-fleet.io` +- `clusterresourceplacements.placement.kubernetes-fleet.io` +- `clusterresourceoverrides.placement.kubernetes-fleet.io` +- `clusterstagedupdateruns.placement.kubernetes-fleet.io` +- `stagedupdateruns.placement.kubernetes-fleet.io` + +The following CRDs are installed by this chart: +- `metriccollectors.placement.kubernetes-fleet.io` +- `metriccollectorreports.placement.kubernetes-fleet.io` +- `workloadtrackers.placement.kubernetes-fleet.io` + +### Install via Helm + +```bash +# Build the image +make docker-build IMAGE_NAME=approval-request-controller IMAGE_TAG=latest + +# Load into kind (if using kind) +kind load docker-image approval-request-controller:latest --name hub + +# Install the chart +helm install approval-request-controller ./charts/approval-request-controller \ + --namespace fleet-system \ + --create-namespace +``` + +## Configuration + +Key settings: +- `controller.logLevel`: Log verbosity (default: 2) +- `controller.resources`: Resource requests and limits +- `rbac.create`: Create RBAC resources (default: true) +- `crds.install`: Install MetricCollector, MetricCollectorReport, and WorkloadTracker CRDs (default: true) +- `rbac.create`: Create RBAC resources (default: true) +- `crds.install`: Install MetricCollector and MetricCollectorReport CRDs (default: true) + +## Development + +### Build + +```bash +make docker-build +``` + +### Test Locally + +```bash +go run ./cmd/approvalrequestcontroller/main.go +``` diff --git a/approval-request-controller/apis/placement/v1beta1/doc.go b/approval-request-controller/apis/placement/v1beta1/doc.go new file mode 100644 index 000000000..5acd18c11 --- /dev/null +++ b/approval-request-controller/apis/placement/v1beta1/doc.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1beta1 contains API Schema definitions for the placement v1beta1 API group +// +kubebuilder:object:generate=true +// +groupName=placement.kubernetes-fleet.io +package v1beta1 diff --git a/approval-request-controller/apis/placement/v1beta1/groupversion_info.go b/approval-request-controller/apis/placement/v1beta1/groupversion_info.go new file mode 100644 index 000000000..c488f5dca --- /dev/null +++ b/approval-request-controller/apis/placement/v1beta1/groupversion_info.go @@ -0,0 +1,35 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +kubebuilder:object:generate=true +// +groupName=placement.kubernetes-fleet.io +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "placement.kubernetes-fleet.io", Version: "v1beta1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/approval-request-controller/apis/placement/v1beta1/metriccollector_types.go b/approval-request-controller/apis/placement/v1beta1/metriccollector_types.go new file mode 100644 index 000000000..f8f6abc1e --- /dev/null +++ b/approval-request-controller/apis/placement/v1beta1/metriccollector_types.go @@ -0,0 +1,146 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +genclient:nonNamespaced +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Cluster",shortName=mc,categories={fleet,fleet-metrics} +// +kubebuilder:subresource:status +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:JSONPath=`.metadata.generation`,name="Gen",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="MetricCollectorReady")].status`,name="Ready",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.workloadsMonitored`,name="Workloads",type=integer +// +kubebuilder:printcolumn:JSONPath=`.status.lastCollectionTime`,name="Last-Collection",type=date +// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date + +// MetricCollector is used by member-agent to scrape and collect metrics from workloads +// running on the member cluster. It runs on each member cluster and collects metrics +// from Prometheus-compatible endpoints. +type MetricCollector struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // The desired state of MetricCollector. + // +required + Spec MetricCollectorSpec `json:"spec"` + + // The observed status of MetricCollector. + // +optional + Status MetricCollectorStatus `json:"status,omitempty"` +} + +// MetricCollectorSpec defines the desired state of MetricCollector. +type MetricCollectorSpec struct { + // PrometheusURL is the URL of the Prometheus server. + // Example: http://prometheus.test-ns.svc.cluster.local:9090 + // +required + // +kubebuilder:validation:Pattern=`^https?://.*$` + PrometheusURL string `json:"prometheusUrl"` + + // ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + // This should be the fleet-member-{clusterName} namespace. + // Example: fleet-member-cluster-1 + // +required + ReportNamespace string `json:"reportNamespace"` +} + +// MetricsEndpointSpec defines how to access the metrics endpoint.ctor. +type MetricCollectorStatus struct { + // Conditions is an array of current observed conditions. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration is the generation most recently observed. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // WorkloadsMonitored is the count of workloads being monitored. + // +optional + WorkloadsMonitored int32 `json:"workloadsMonitored,omitempty"` + + // LastCollectionTime is when metrics were last collected. + // +optional + LastCollectionTime *metav1.Time `json:"lastCollectionTime,omitempty"` + + // CollectedMetrics contains the most recent metrics from each workload. + // +optional + CollectedMetrics []WorkloadMetrics `json:"collectedMetrics,omitempty"` +} + +// WorkloadMetrics represents metrics collected from a single workload pod. +type WorkloadMetrics struct { + // Namespace is the namespace of the pod. + // +required + Namespace string `json:"namespace"` + + // ClusterName from the workload_health metric label. + // +required + ClusterName string `json:"clusterName"` + + // WorkloadName from the workload_health metric label (typically the deployment name). + // +required + WorkloadName string `json:"workloadName"` + + // Health indicates if the workload is healthy (true=healthy, false=unhealthy). + // +required + Health bool `json:"health"` +} + +const ( + // MetricCollectorConditionTypeReady indicates the collector is ready. + MetricCollectorConditionTypeReady string = "MetricCollectorReady" + + // MetricCollectorConditionTypeCollecting indicates metrics are being collected. + MetricCollectorConditionTypeCollecting string = "MetricsCollecting" + + // MetricCollectorConditionTypeReported indicates metrics were successfully reported to hub. + MetricCollectorConditionTypeReported string = "MetricsReported" +) + +// +kubebuilder:object:root=true + +// MetricCollectorList contains a list of MetricCollector. +type MetricCollectorList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetricCollector `json:"items"` +} + +// GetConditions returns the conditions of the MetricCollector. +func (m *MetricCollector) GetConditions() []metav1.Condition { + return m.Status.Conditions +} + +// SetConditions sets the conditions of the MetricCollector. +func (m *MetricCollector) SetConditions(conditions ...metav1.Condition) { + m.Status.Conditions = conditions +} + +// GetCondition returns the condition of the given MetricCollector. +func (m *MetricCollector) GetCondition(conditionType string) *metav1.Condition { + return meta.FindStatusCondition(m.Status.Conditions, conditionType) +} + +func init() { + SchemeBuilder.Register(&MetricCollector{}, &MetricCollectorList{}) +} diff --git a/approval-request-controller/apis/placement/v1beta1/metriccollectorreport_types.go b/approval-request-controller/apis/placement/v1beta1/metriccollectorreport_types.go new file mode 100644 index 000000000..209da838a --- /dev/null +++ b/approval-request-controller/apis/placement/v1beta1/metriccollectorreport_types.go @@ -0,0 +1,86 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Namespaced",shortName=mcr,categories={fleet,fleet-metrics} +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:JSONPath=`.workloadsMonitored`,name="Workloads",type=integer +// +kubebuilder:printcolumn:JSONPath=`.lastCollectionTime`,name="Last-Collection",type=date +// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date + +// MetricCollectorReport is created by the MetricCollector controller on the hub cluster +// in the fleet-member-{clusterName} namespace to report collected metrics from a member cluster. +// The controller watches MetricCollector objects on the member cluster, collects metrics, +// and syncs the status to the hub as MetricCollectorReport objects. +// +// Controller workflow: +// 1. MetricCollector reconciles and collects metrics on member cluster +// 2. Metrics include clusterName from workload_health labels +// 3. Controller creates/updates MetricCollectorReport in fleet-member-{clusterName} namespace on hub +// 4. Report name matches MetricCollector name for easy lookup +// +// Namespace: fleet-member-{clusterName} (extracted from CollectedMetrics[0].ClusterName) +// Name: Same as MetricCollector name +// All metrics in CollectedMetrics are guaranteed to have the same ClusterName. +type MetricCollectorReport struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Conditions copied from the MetricCollector status. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration is the generation most recently observed from the MetricCollector. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // WorkloadsMonitored is the count of workloads being monitored. + // +optional + WorkloadsMonitored int32 `json:"workloadsMonitored,omitempty"` + + // LastCollectionTime is when metrics were last collected on the member cluster. + // +optional + LastCollectionTime *metav1.Time `json:"lastCollectionTime,omitempty"` + + // CollectedMetrics contains the most recent metrics from each workload. + // All metrics are guaranteed to have the same ClusterName since they're collected from one member cluster. + // +optional + CollectedMetrics []WorkloadMetrics `json:"collectedMetrics,omitempty"` + + // LastReportTime is when this report was last synced to the hub. + // +optional + LastReportTime *metav1.Time `json:"lastReportTime,omitempty"` +} + +// +kubebuilder:object:root=true + +// MetricCollectorReportList contains a list of MetricCollectorReport. +type MetricCollectorReportList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetricCollectorReport `json:"items"` +} + +func init() { + SchemeBuilder.Register(&MetricCollectorReport{}, &MetricCollectorReportList{}) +} diff --git a/approval-request-controller/apis/placement/v1beta1/workloadtracker_types.go b/approval-request-controller/apis/placement/v1beta1/workloadtracker_types.go new file mode 100644 index 000000000..62437183b --- /dev/null +++ b/approval-request-controller/apis/placement/v1beta1/workloadtracker_types.go @@ -0,0 +1,63 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// WorkloadReference represents a workload to be tracked +type WorkloadReference struct { + // Name is the name of the workload + // +required + Name string `json:"name"` + + // Namespace is the namespace of the workload + // +required + Namespace string `json:"namespace"` +} + +// +genclient +// +genclient:nonNamespaced +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Cluster",categories={fleet,fleet-placement} +// +kubebuilder:storageversion +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// WorkloadTracker expresses user intent to track certain workloads +type WorkloadTracker struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Workloads is a list of workloads to track + // +optional + Workloads []WorkloadReference `json:"workloads,omitempty"` +} + +// +kubebuilder:object:root=true +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// WorkloadTrackerList contains a list of WorkloadTracker +type WorkloadTrackerList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []WorkloadTracker `json:"items"` +} + +func init() { + SchemeBuilder.Register(&WorkloadTracker{}, &WorkloadTrackerList{}) +} diff --git a/approval-request-controller/apis/placement/v1beta1/zz_generated.deepcopy.go b/approval-request-controller/apis/placement/v1beta1/zz_generated.deepcopy.go new file mode 100644 index 000000000..298aad2bd --- /dev/null +++ b/approval-request-controller/apis/placement/v1beta1/zz_generated.deepcopy.go @@ -0,0 +1,300 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollector) DeepCopyInto(out *MetricCollector) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollector. +func (in *MetricCollector) DeepCopy() *MetricCollector { + if in == nil { + return nil + } + out := new(MetricCollector) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollector) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorList) DeepCopyInto(out *MetricCollectorList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetricCollector, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorList. +func (in *MetricCollectorList) DeepCopy() *MetricCollectorList { + if in == nil { + return nil + } + out := new(MetricCollectorList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorReport) DeepCopyInto(out *MetricCollectorReport) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastCollectionTime != nil { + in, out := &in.LastCollectionTime, &out.LastCollectionTime + *out = (*in).DeepCopy() + } + if in.CollectedMetrics != nil { + in, out := &in.CollectedMetrics, &out.CollectedMetrics + *out = make([]WorkloadMetrics, len(*in)) + copy(*out, *in) + } + if in.LastReportTime != nil { + in, out := &in.LastReportTime, &out.LastReportTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorReport. +func (in *MetricCollectorReport) DeepCopy() *MetricCollectorReport { + if in == nil { + return nil + } + out := new(MetricCollectorReport) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorReport) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorReportList) DeepCopyInto(out *MetricCollectorReportList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetricCollectorReport, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorReportList. +func (in *MetricCollectorReportList) DeepCopy() *MetricCollectorReportList { + if in == nil { + return nil + } + out := new(MetricCollectorReportList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorReportList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorSpec) DeepCopyInto(out *MetricCollectorSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorSpec. +func (in *MetricCollectorSpec) DeepCopy() *MetricCollectorSpec { + if in == nil { + return nil + } + out := new(MetricCollectorSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorStatus) DeepCopyInto(out *MetricCollectorStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastCollectionTime != nil { + in, out := &in.LastCollectionTime, &out.LastCollectionTime + *out = (*in).DeepCopy() + } + if in.CollectedMetrics != nil { + in, out := &in.CollectedMetrics, &out.CollectedMetrics + *out = make([]WorkloadMetrics, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorStatus. +func (in *MetricCollectorStatus) DeepCopy() *MetricCollectorStatus { + if in == nil { + return nil + } + out := new(MetricCollectorStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadMetrics) DeepCopyInto(out *WorkloadMetrics) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadMetrics. +func (in *WorkloadMetrics) DeepCopy() *WorkloadMetrics { + if in == nil { + return nil + } + out := new(WorkloadMetrics) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadReference) DeepCopyInto(out *WorkloadReference) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadReference. +func (in *WorkloadReference) DeepCopy() *WorkloadReference { + if in == nil { + return nil + } + out := new(WorkloadReference) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadTracker) DeepCopyInto(out *WorkloadTracker) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Workloads != nil { + in, out := &in.Workloads, &out.Workloads + *out = make([]WorkloadReference, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadTracker. +func (in *WorkloadTracker) DeepCopy() *WorkloadTracker { + if in == nil { + return nil + } + out := new(WorkloadTracker) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *WorkloadTracker) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadTrackerList) DeepCopyInto(out *WorkloadTrackerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]WorkloadTracker, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadTrackerList. +func (in *WorkloadTrackerList) DeepCopy() *WorkloadTrackerList { + if in == nil { + return nil + } + out := new(WorkloadTrackerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *WorkloadTrackerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} diff --git a/approval-request-controller/charts/approval-request-controller/Chart.yaml b/approval-request-controller/charts/approval-request-controller/Chart.yaml new file mode 100644 index 000000000..f5e253c0a --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: approval-request-controller +description: A Helm chart for ApprovalRequest Controller on Hub Cluster +type: application +version: 0.1.0 +appVersion: "1.0" diff --git a/approval-request-controller/charts/approval-request-controller/templates/_helpers.tpl b/approval-request-controller/charts/approval-request-controller/templates/_helpers.tpl new file mode 100644 index 000000000..a603facd5 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/_helpers.tpl @@ -0,0 +1,60 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "approval-request-controller.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "approval-request-controller.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "approval-request-controller.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "approval-request-controller.labels" -}} +helm.sh/chart: {{ include "approval-request-controller.chart" . }} +{{ include "approval-request-controller.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "approval-request-controller.selectorLabels" -}} +app.kubernetes.io/name: {{ include "approval-request-controller.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "approval-request-controller.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "approval-request-controller.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/approval-request-controller/charts/approval-request-controller/templates/crds/metriccollectorreports.yaml b/approval-request-controller/charts/approval-request-controller/templates/crds/metriccollectorreports.yaml new file mode 100644 index 000000000..53a3420c5 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/crds/metriccollectorreports.yaml @@ -0,0 +1,176 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectorreports.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollectorReport + listKind: MetricCollectorReportList + plural: metriccollectorreports + shortNames: + - mcr + singular: metriccollectorreport + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .workloadsMonitored + name: Workloads + type: integer + - jsonPath: .lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollectorReport is created by the MetricCollector controller on the hub cluster + in the fleet-member-{clusterName} namespace to report collected metrics from a member cluster. + The controller watches MetricCollector objects on the member cluster, collects metrics, + and syncs the status to the hub as MetricCollectorReport objects. + + Controller workflow: + 1. MetricCollector reconciles and collects metrics on member cluster + 2. Metrics include clusterName from workload_health labels + 3. Controller creates/updates MetricCollectorReport in fleet-member-{clusterName} namespace on hub + 4. Report name matches MetricCollector name for easy lookup + + Namespace: fleet-member-{clusterName} (extracted from CollectedMetrics[0].ClusterName) + Name: Same as MetricCollector name + All metrics in CollectedMetrics are guaranteed to have the same ClusterName. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + collectedMetrics: + description: |- + CollectedMetrics contains the most recent metrics from each workload. + All metrics are guaranteed to have the same ClusterName since they're collected from one member cluster. + items: + description: WorkloadMetrics represents metrics collected from a single + workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions copied from the MetricCollector status. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected on + the member cluster. + format: date-time + type: string + lastReportTime: + description: LastReportTime is when this report was last synced to the + hub. + format: date-time + type: string + metadata: + type: object + observedGeneration: + description: ObservedGeneration is the generation most recently observed + from the MetricCollector. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + served: true + storage: true + subresources: {} diff --git a/approval-request-controller/charts/approval-request-controller/templates/crds/metriccollectors.yaml b/approval-request-controller/charts/approval-request-controller/templates/crds/metriccollectors.yaml new file mode 100644 index 000000000..47679a15f --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/crds/metriccollectors.yaml @@ -0,0 +1,189 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectors.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollector + listKind: MetricCollectorList + plural: metriccollectors + shortNames: + - mc + singular: metriccollector + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.generation + name: Gen + type: string + - jsonPath: .status.conditions[?(@.type=="MetricCollectorReady")].status + name: Ready + type: string + - jsonPath: .status.workloadsMonitored + name: Workloads + type: integer + - jsonPath: .status.lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollector is used by member-agent to scrape and collect metrics from workloads + running on the member cluster. It runs on each member cluster and collects metrics + from Prometheus-compatible endpoints. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: The desired state of MetricCollector. + properties: + prometheusUrl: + description: |- + PrometheusURL is the URL of the Prometheus server. + Example: http://prometheus.test-ns.svc.cluster.local:9090 + pattern: ^https?://.*$ + type: string + reportNamespace: + description: |- + ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + This should be the fleet-member-{clusterName} namespace. + Example: fleet-member-cluster-1 + type: string + required: + - prometheusUrl + - reportNamespace + type: object + status: + description: The observed status of MetricCollector. + properties: + collectedMetrics: + description: CollectedMetrics contains the most recent metrics from + each workload. + items: + description: WorkloadMetrics represents metrics collected from a + single workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions is an array of current observed conditions. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected. + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the generation most recently observed. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/approval-request-controller/charts/approval-request-controller/templates/crds/workloadtrackers.yaml b/approval-request-controller/charts/approval-request-controller/templates/crds/workloadtrackers.yaml new file mode 100644 index 000000000..b28e8da04 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/crds/workloadtrackers.yaml @@ -0,0 +1,60 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: workloadtrackers.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-placement + kind: WorkloadTracker + listKind: WorkloadTrackerList + plural: workloadtrackers + singular: workloadtracker + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: WorkloadTracker expresses user intent to track certain workloads + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + workloads: + description: Workloads is a list of workloads to track + items: + description: WorkloadReference represents a workload to be tracked + properties: + name: + description: Name is the name of the workload + type: string + namespace: + description: Namespace is the namespace of the workload + type: string + required: + - name + - namespace + type: object + type: array + type: object + served: true + storage: true diff --git a/approval-request-controller/charts/approval-request-controller/templates/deployment.yaml b/approval-request-controller/charts/approval-request-controller/templates/deployment.yaml new file mode 100644 index 000000000..654acba15 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/deployment.yaml @@ -0,0 +1,84 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "approval-request-controller.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "approval-request-controller.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.controller.replicas }} + selector: + matchLabels: + {{- include "approval-request-controller.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "approval-request-controller.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "approval-request-controller.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: controller + securityContext: + {{- toYaml .Values.securityContext | nindent 10 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - /approval-request-controller + args: + - --v={{ .Values.controller.logLevel }} + - --metrics-bind-address=:{{ .Values.metrics.port }} + - --health-probe-bind-address=:{{ .Values.healthProbe.port }} + + ports: + {{- if .Values.metrics.enabled }} + - name: metrics + containerPort: {{ .Values.metrics.port }} + protocol: TCP + {{- end }} + {{- if .Values.healthProbe.enabled }} + - name: health + containerPort: {{ .Values.healthProbe.port }} + protocol: TCP + {{- end }} + + {{- if .Values.healthProbe.enabled }} + livenessProbe: + httpGet: + path: /healthz + port: health + initialDelaySeconds: 15 + periodSeconds: 20 + + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 5 + periodSeconds: 10 + {{- end }} + + resources: + {{- toYaml .Values.controller.resources | nindent 10 }} + + {{- with .Values.controller.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/approval-request-controller/charts/approval-request-controller/templates/rbac.yaml b/approval-request-controller/charts/approval-request-controller/templates/rbac.yaml new file mode 100644 index 000000000..68608bcb8 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/rbac.yaml @@ -0,0 +1,72 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "approval-request-controller.fullname" . }} + labels: + {{- include "approval-request-controller.labels" . | nindent 4 }} +rules: + # CRD access for checking prerequisites + - apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list"] + + # ApprovalRequest and ClusterApprovalRequest + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["approvalrequests", "clusterapprovalrequests"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["approvalrequests/status", "clusterapprovalrequests/status"] + verbs: ["update", "patch"] + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["approvalrequests/finalizers", "clusterapprovalrequests/finalizers"] + verbs: ["update"] + + # MetricCollector and MetricCollectorReport + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectors", "metriccollectorreports"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectors/status", "metriccollectorreports/status"] + verbs: ["update", "patch"] + + # ClusterResourcePlacement and ClusterResourceOverride + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["clusterresourceplacements", "clusterresourceoverrides"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + + # UpdateRuns + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["stagedupdateruns", "clusterstagedupdateruns"] + verbs: ["get", "list", "watch"] + + # WorkloadTracker + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["workloadtrackers"] + verbs: ["get", "list", "watch"] + + # Events + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + + # Leader election + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "create", "update", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "approval-request-controller.fullname" . }} + labels: + {{- include "approval-request-controller.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "approval-request-controller.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "approval-request-controller.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/approval-request-controller/charts/approval-request-controller/templates/serviceaccount.yaml b/approval-request-controller/charts/approval-request-controller/templates/serviceaccount.yaml new file mode 100644 index 000000000..ba3fdd187 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "approval-request-controller.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "approval-request-controller.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/approval-request-controller/charts/approval-request-controller/values.yaml b/approval-request-controller/charts/approval-request-controller/values.yaml new file mode 100644 index 000000000..89713c089 --- /dev/null +++ b/approval-request-controller/charts/approval-request-controller/values.yaml @@ -0,0 +1,84 @@ +# Default values for approval-request-controller +# This is a YAML-formatted file. + +# Controller image configuration +image: + repository: approval-request-controller + pullPolicy: IfNotPresent + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +# Controller configuration +controller: + # Number of replicas + replicas: 1 + + # Log verbosity level (0-10) + logLevel: 2 + + # Resource requests and limits + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 128Mi + + # Node selector + nodeSelector: {} + + # Tolerations + tolerations: [] + + # Affinity + affinity: {} + +# RBAC configuration +rbac: + create: true + +# ServiceAccount configuration +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# Pod annotations +podAnnotations: {} + +# Pod security context +podSecurityContext: + runAsNonRoot: true + runAsUser: 65532 + fsGroup: 65532 + +# Container security context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + +# Metrics server configuration +metrics: + enabled: true + port: 8080 + +# Health probe configuration +healthProbe: + enabled: true + port: 8081 + +# CRD installation +crds: + # Install MetricCollectorReport CRD + install: true diff --git a/approval-request-controller/cmd/approvalrequestcontroller/main.go b/approval-request-controller/cmd/approvalrequestcontroller/main.go new file mode 100644 index 000000000..99d73f22d --- /dev/null +++ b/approval-request-controller/cmd/approvalrequestcontroller/main.go @@ -0,0 +1,169 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "flag" + "fmt" + "os" + + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" + localv1beta1 "github.com/kubefleet-dev/kubefleet/approval-request-controller/apis/placement/v1beta1" + "github.com/kubefleet-dev/kubefleet/approval-request-controller/pkg/controller" +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(placementv1beta1.AddToScheme(scheme)) + utilruntime.Must(localv1beta1.AddToScheme(scheme)) + utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) +} + +func main() { + var metricsAddr string + var probeAddr string + var logLevel int + + flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.IntVar(&logLevel, "v", 2, "Log level (0-10)") + + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + klog.InfoS("Starting ApprovalRequest Controller", "logLevel", logLevel) + + config := ctrl.GetConfigOrDie() + + // Check required CRDs are installed before starting + if err := checkRequiredCRDs(config); err != nil { + klog.ErrorS(err, "Required CRDs not found") + os.Exit(1) + } + + mgr, err := ctrl.NewManager(config, ctrl.Options{ + Scheme: scheme, + Metrics: metricsserver.Options{ + BindAddress: metricsAddr, + }, + HealthProbeBindAddress: probeAddr, + }) + if err != nil { + klog.ErrorS(err, "Unable to create manager") + os.Exit(1) + } + + // Setup ApprovalRequest controller + approvalRequestReconciler := &controller.Reconciler{ + Client: mgr.GetClient(), + } + if err = approvalRequestReconciler.SetupWithManagerForApprovalRequest(mgr); err != nil { + klog.ErrorS(err, "Unable to create controller", "controller", "ApprovalRequest") + os.Exit(1) + } + + // Setup ClusterApprovalRequest controller + clusterApprovalRequestReconciler := &controller.Reconciler{ + Client: mgr.GetClient(), + } + if err = clusterApprovalRequestReconciler.SetupWithManagerForClusterApprovalRequest(mgr); err != nil { + klog.ErrorS(err, "Unable to create controller", "controller", "ClusterApprovalRequest") + os.Exit(1) + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + klog.ErrorS(err, "Unable to set up health check") + os.Exit(1) + } + + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + klog.ErrorS(err, "Unable to set up ready check") + os.Exit(1) + } + + klog.InfoS("Starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + klog.ErrorS(err, "Problem running manager") + os.Exit(1) + } +} + +// checkRequiredCRDs checks that all required CRDs are installed +func checkRequiredCRDs(config *rest.Config) error { + requiredCRDs := []string{ + "approvalrequests.placement.kubernetes-fleet.io", + "clusterapprovalrequests.placement.kubernetes-fleet.io", + "metriccollectors.placement.kubernetes-fleet.io", + "metriccollectorreports.placement.kubernetes-fleet.io", + "workloadtrackers.placement.kubernetes-fleet.io", + "clusterresourceplacements.placement.kubernetes-fleet.io", + "clusterresourceoverrides.placement.kubernetes-fleet.io", + "clusterstagedupdateruns.placement.kubernetes-fleet.io", + "stagedupdateruns.placement.kubernetes-fleet.io", + } + + klog.InfoS("Checking for required CRDs", "count", len(requiredCRDs)) + + c, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return err + } + + ctx := context.Background() + missingCRDs := []string{} + + for _, crdName := range requiredCRDs { + crd := &apiextensionsv1.CustomResourceDefinition{} + err := c.Get(ctx, client.ObjectKey{Name: crdName}, crd) + if err != nil { + klog.ErrorS(err, "CRD not found", "crd", crdName) + missingCRDs = append(missingCRDs, crdName) + } else { + klog.V(3).InfoS("CRD found", "crd", crdName) + } + } + + if len(missingCRDs) > 0 { + return fmt.Errorf("missing required CRDs: %v", missingCRDs) + } + + klog.InfoS("All required CRDs are installed") + return nil +} diff --git a/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml b/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml new file mode 100644 index 000000000..53a3420c5 --- /dev/null +++ b/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml @@ -0,0 +1,176 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectorreports.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollectorReport + listKind: MetricCollectorReportList + plural: metriccollectorreports + shortNames: + - mcr + singular: metriccollectorreport + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .workloadsMonitored + name: Workloads + type: integer + - jsonPath: .lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollectorReport is created by the MetricCollector controller on the hub cluster + in the fleet-member-{clusterName} namespace to report collected metrics from a member cluster. + The controller watches MetricCollector objects on the member cluster, collects metrics, + and syncs the status to the hub as MetricCollectorReport objects. + + Controller workflow: + 1. MetricCollector reconciles and collects metrics on member cluster + 2. Metrics include clusterName from workload_health labels + 3. Controller creates/updates MetricCollectorReport in fleet-member-{clusterName} namespace on hub + 4. Report name matches MetricCollector name for easy lookup + + Namespace: fleet-member-{clusterName} (extracted from CollectedMetrics[0].ClusterName) + Name: Same as MetricCollector name + All metrics in CollectedMetrics are guaranteed to have the same ClusterName. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + collectedMetrics: + description: |- + CollectedMetrics contains the most recent metrics from each workload. + All metrics are guaranteed to have the same ClusterName since they're collected from one member cluster. + items: + description: WorkloadMetrics represents metrics collected from a single + workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions copied from the MetricCollector status. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected on + the member cluster. + format: date-time + type: string + lastReportTime: + description: LastReportTime is when this report was last synced to the + hub. + format: date-time + type: string + metadata: + type: object + observedGeneration: + description: ObservedGeneration is the generation most recently observed + from the MetricCollector. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + served: true + storage: true + subresources: {} diff --git a/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml b/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml new file mode 100644 index 000000000..47679a15f --- /dev/null +++ b/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml @@ -0,0 +1,189 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectors.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollector + listKind: MetricCollectorList + plural: metriccollectors + shortNames: + - mc + singular: metriccollector + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.generation + name: Gen + type: string + - jsonPath: .status.conditions[?(@.type=="MetricCollectorReady")].status + name: Ready + type: string + - jsonPath: .status.workloadsMonitored + name: Workloads + type: integer + - jsonPath: .status.lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollector is used by member-agent to scrape and collect metrics from workloads + running on the member cluster. It runs on each member cluster and collects metrics + from Prometheus-compatible endpoints. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: The desired state of MetricCollector. + properties: + prometheusUrl: + description: |- + PrometheusURL is the URL of the Prometheus server. + Example: http://prometheus.test-ns.svc.cluster.local:9090 + pattern: ^https?://.*$ + type: string + reportNamespace: + description: |- + ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + This should be the fleet-member-{clusterName} namespace. + Example: fleet-member-cluster-1 + type: string + required: + - prometheusUrl + - reportNamespace + type: object + status: + description: The observed status of MetricCollector. + properties: + collectedMetrics: + description: CollectedMetrics contains the most recent metrics from + each workload. + items: + description: WorkloadMetrics represents metrics collected from a + single workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions is an array of current observed conditions. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected. + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the generation most recently observed. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_workloadtrackers.yaml b/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_workloadtrackers.yaml new file mode 100644 index 000000000..b28e8da04 --- /dev/null +++ b/approval-request-controller/config/crd/bases/placement.kubernetes-fleet.io_workloadtrackers.yaml @@ -0,0 +1,60 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: workloadtrackers.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-placement + kind: WorkloadTracker + listKind: WorkloadTrackerList + plural: workloadtrackers + singular: workloadtracker + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: WorkloadTracker expresses user intent to track certain workloads + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + workloads: + description: Workloads is a list of workloads to track + items: + description: WorkloadReference represents a workload to be tracked + properties: + name: + description: Name is the name of the workload + type: string + namespace: + description: Namespace is the namespace of the workload + type: string + required: + - name + - namespace + type: object + type: array + type: object + served: true + storage: true diff --git a/approval-request-controller/docker/Dockerfile b/approval-request-controller/docker/Dockerfile new file mode 100644 index 000000000..46a10d0d7 --- /dev/null +++ b/approval-request-controller/docker/Dockerfile @@ -0,0 +1,27 @@ +# Build stage +FROM golang:1.24 AS builder + +WORKDIR /workspace + +# Copy go mod files +COPY approval-request-controller/go.mod approval-request-controller/go.sum* ./ +RUN go mod download + +# Copy source code +COPY approval-request-controller/apis/ apis/ +COPY approval-request-controller/pkg/ pkg/ +COPY approval-request-controller/cmd/ cmd/ + +# Build the controller +ARG GOARCH=amd64 +RUN CGO_ENABLED=0 GOOS=linux GOARCH=${GOARCH} go build \ + -a -o approval-request-controller \ + ./cmd/approvalrequestcontroller + +# Runtime stage +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/approval-request-controller . +USER 65532:65532 + +ENTRYPOINT ["/approval-request-controller"] diff --git a/approval-request-controller/go.mod b/approval-request-controller/go.mod new file mode 100644 index 000000000..936e4b19d --- /dev/null +++ b/approval-request-controller/go.mod @@ -0,0 +1,72 @@ +module github.com/kubefleet-dev/kubefleet/approval-request-controller + +go 1.24.9 + +require ( + github.com/kubefleet-dev/kubefleet v0.1.2 + k8s.io/apiextensions-apiserver v0.34.1 + k8s.io/apimachinery v0.34.1 + k8s.io/client-go v0.34.1 + k8s.io/klog/v2 v2.130.1 + sigs.k8s.io/controller-runtime v0.22.4 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.21.1 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.9.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/onsi/gomega v1.37.0 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.22.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.62.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.goms.io/fleet-networking v0.3.3 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/oauth2 v0.29.0 // indirect + golang.org/x/sync v0.18.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/term v0.37.0 // indirect + golang.org/x/text v0.31.0 // indirect + golang.org/x/time v0.11.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.34.1 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect + k8s.io/metrics v0.32.3 // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) diff --git a/approval-request-controller/go.sum b/approval-request-controller/go.sum new file mode 100644 index 000000000..90d099504 --- /dev/null +++ b/approval-request-controller/go.sum @@ -0,0 +1,196 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= +github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic= +github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU= +github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kubefleet-dev/kubefleet v0.1.2 h1:BUOwehI9iBavU6TEbebrSxtFXHwyOcY1eacHyfHEjxo= +github.com/kubefleet-dev/kubefleet v0.1.2/go.mod h1:EYDCdtdM02qQkH3Gm5/K1cHDy26f2LbM7WzVGn2saLs= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= +github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= +github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= +github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.goms.io/fleet-networking v0.3.3 h1:5rwBntaUoLF+E1CzaWAEL4GdvLJPQorKhjgkbLlllPE= +go.goms.io/fleet-networking v0.3.3/go.mod h1:Qgbi8M1fGaz/p5rtb6HJPmTDATWRnMt9HD1gz57WKUc= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98= +golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= +k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= +k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI= +k8s.io/apiextensions-apiserver v0.34.1/go.mod h1:hP9Rld3zF5Ay2Of3BeEpLAToP+l4s5UlxiHfqRaRcMc= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= +k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/metrics v0.32.3 h1:2vsBvw0v8rIIlczZ/lZ8Kcqk9tR6Fks9h+dtFNbc2a4= +k8s.io/metrics v0.32.3/go.mod h1:9R1Wk5cb+qJpCQon9h52mgkVCcFeYxcY+YkumfwHVCU= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= +sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/approval-request-controller/install-on-hub.sh b/approval-request-controller/install-on-hub.sh new file mode 100755 index 000000000..006820f89 --- /dev/null +++ b/approval-request-controller/install-on-hub.sh @@ -0,0 +1,97 @@ +#!/bin/bash +set -e + +# Configuration +HUB_CONTEXT="kind-hub" +IMAGE_NAME="approval-request-controller" +IMAGE_TAG="latest" +NAMESPACE="fleet-system" +CHART_NAME="approval-request-controller" + +echo "=== Installing ApprovalRequest Controller on hub cluster ===" +echo "Hub cluster: ${HUB_CONTEXT}" +echo "Namespace: ${NAMESPACE}" +echo "" + +# Step 0: Build and load Docker image +echo "Step 0: Building and loading Docker image..." +cd .. +docker buildx build \ + --file approval-request-controller/docker/Dockerfile \ + --output=type=docker \ + --platform=linux/$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \ + --tag ${IMAGE_NAME}:${IMAGE_TAG} \ + --build-arg GOARCH=$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \ + . +cd approval-request-controller +kind load docker-image ${IMAGE_NAME}:${IMAGE_TAG} --name hub +echo "✓ Docker image built and loaded into kind cluster" +echo "" + +# Step 1: Verify kubefleet CRDs are installed +echo "Step 1: Verifying required kubefleet CRDs..." +REQUIRED_CRDS=( + "approvalrequests.placement.kubernetes-fleet.io" + "clusterapprovalrequests.placement.kubernetes-fleet.io" + "clusterresourceplacements.placement.kubernetes-fleet.io" + "clusterresourceoverrides.placement.kubernetes-fleet.io" + "clusterstagedupdateruns.placement.kubernetes-fleet.io" + "stagedupdateruns.placement.kubernetes-fleet.io" +) + +MISSING_CRDS=() +for crd in "${REQUIRED_CRDS[@]}"; do + if ! kubectl --context=${HUB_CONTEXT} get crd ${crd} &>/dev/null; then + MISSING_CRDS+=("${crd}") + fi +done + +if [ ${#MISSING_CRDS[@]} -ne 0 ]; then + echo "Error: Missing required CRDs from kubefleet hub-agent:" + for crd in "${MISSING_CRDS[@]}"; do + echo " - ${crd}" + done + echo "" + echo "Please ensure kubefleet hub-agent is installed first." + exit 1 +fi + +echo "✓ All required kubefleet CRDs are installed" +echo "" + +# Step 2: Install helm chart on hub cluster (includes MetricCollector, MetricCollectorReport, WorkloadTracker CRDs) +echo "Step 2: Installing helm chart on hub cluster..." +helm upgrade --install ${CHART_NAME} ./charts/${CHART_NAME} \ + --kube-context=${HUB_CONTEXT} \ + --namespace ${NAMESPACE} \ + --create-namespace \ + --set image.repository=${IMAGE_NAME} \ + --set image.tag=${IMAGE_TAG} \ + --set image.pullPolicy=IfNotPresent \ + --set controller.logLevel=2 + +echo "✓ Helm chart installed on hub cluster" +echo "" + +# Step 3: Verify installation +echo "Step 3: Verifying installation..." +echo "Checking CRDs installed by this chart..." +kubectl --context=${HUB_CONTEXT} get crd | grep -E "metriccollectors|metriccollectorreports|workloadtrackers" || echo " (CRDs may take a moment to appear)" + +echo "" +echo "Checking pods in ${NAMESPACE}..." +kubectl --context=${HUB_CONTEXT} get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${CHART_NAME} + +echo "" +echo "=== Installation Complete ===" +echo "" +echo "To check controller logs:" +echo " kubectl --context=${HUB_CONTEXT} logs -n ${NAMESPACE} -l app.kubernetes.io/name=${CHART_NAME} -f" +echo "" +echo "To verify CRDs:" +echo " kubectl --context=${HUB_CONTEXT} get crd | grep placement.kubernetes-fleet.io" +echo "" +echo "Next steps:" +echo " 1. Create a WorkloadTracker to define which workloads to monitor" +echo " 2. ApprovalRequests will be automatically processed when created by staged updates" +echo "" diff --git a/approval-request-controller/pkg/controller/controller.go b/approval-request-controller/pkg/controller/controller.go new file mode 100644 index 000000000..ca8d9165f --- /dev/null +++ b/approval-request-controller/pkg/controller/controller.go @@ -0,0 +1,596 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package controller features a controller to reconcile ApprovalRequest objects +// and create MetricCollector resources on member clusters for approved stages. +package controller + +import ( + "context" + "fmt" + "time" + + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" + localv1beta1 "github.com/kubefleet-dev/kubefleet/approval-request-controller/apis/placement/v1beta1" + "github.com/kubefleet-dev/kubefleet/pkg/utils" +) + +const ( + // metricCollectorFinalizer is the finalizer added to ApprovalRequest objects + metricCollectorFinalizer = "kubernetes-fleet.io/metric-collector-cleanup" + + // prometheusURL is the default Prometheus URL to use + prometheusURL = "http://prometheus.test-ns.svc.cluster.local:9090" +) + +// Reconciler reconciles an ApprovalRequest object and creates MetricCollector resources +// on member clusters when the approval is granted. +type Reconciler struct { + client.Client + recorder record.EventRecorder +} + +// Reconcile reconciles an ApprovalRequest or ClusterApprovalRequest object. +func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + startTime := time.Now() + klog.V(2).InfoS("ApprovalRequest reconciliation starts", "request", req.NamespacedName) + defer func() { + latency := time.Since(startTime).Milliseconds() + klog.V(2).InfoS("ApprovalRequest reconciliation ends", "request", req.NamespacedName, "latency", latency) + }() + + var approvalReqObj placementv1beta1.ApprovalRequestObj + var isClusterScoped bool + + // Check if request has a namespace to determine resource type + if req.Namespace != "" { + // Fetch namespaced ApprovalRequest + approvalReq := &placementv1beta1.ApprovalRequest{} + if err := r.Client.Get(ctx, req.NamespacedName, approvalReq); err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("ApprovalRequest not found, ignoring", "request", req.NamespacedName) + return ctrl.Result{}, nil + } + klog.ErrorS(err, "Failed to get ApprovalRequest", "request", req.NamespacedName) + return ctrl.Result{}, err + } + approvalReqObj = approvalReq + isClusterScoped = false + } else { + // Fetch cluster-scoped ClusterApprovalRequest + clusterApprovalReq := &placementv1beta1.ClusterApprovalRequest{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: req.Name}, clusterApprovalReq); err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("ClusterApprovalRequest not found, ignoring", "request", req.Name) + return ctrl.Result{}, nil + } + klog.ErrorS(err, "Failed to get ClusterApprovalRequest", "request", req.Name) + return ctrl.Result{}, err + } + approvalReqObj = clusterApprovalReq + isClusterScoped = true + } + + return r.reconcileApprovalRequestObj(ctx, approvalReqObj, isClusterScoped) +} + +// reconcileApprovalRequestObj reconciles an ApprovalRequestObj (either ApprovalRequest or ClusterApprovalRequest). +func (r *Reconciler) reconcileApprovalRequestObj(ctx context.Context, approvalReqObj placementv1beta1.ApprovalRequestObj, isClusterScoped bool) (ctrl.Result, error) { + obj := approvalReqObj.(client.Object) + approvalReqRef := klog.KObj(obj) + + // Handle deletion + if !obj.GetDeletionTimestamp().IsZero() { + return r.handleDelete(ctx, approvalReqObj, isClusterScoped) + } + + // Check if the approval request is already approved or rejected - stop reconciliation if so + approvedCond := meta.FindStatusCondition(approvalReqObj.GetApprovalRequestStatus().Conditions, string(placementv1beta1.ApprovalRequestConditionApproved)) + if approvedCond != nil && approvedCond.Status == metav1.ConditionTrue { + klog.V(2).InfoS("ApprovalRequest has been approved, stopping reconciliation", "approvalRequest", approvalReqRef) + return ctrl.Result{}, nil + } + + // Add finalizer if not present + if !controllerutil.ContainsFinalizer(obj, metricCollectorFinalizer) { + controllerutil.AddFinalizer(obj, metricCollectorFinalizer) + if err := r.Client.Update(ctx, obj); err != nil { + klog.ErrorS(err, "Failed to add finalizer", "approvalRequest", approvalReqRef) + return ctrl.Result{}, err + } + klog.V(2).InfoS("Added finalizer to ApprovalRequest", "approvalRequest", approvalReqRef) + } + + // Get the UpdateRun (ClusterStagedUpdateRun or StagedUpdateRun) + spec := approvalReqObj.GetApprovalRequestSpec() + updateRunName := spec.TargetUpdateRun + stageName := spec.TargetStage + + var stageStatus *placementv1beta1.StageUpdatingStatus + if isClusterScoped { + updateRun := &placementv1beta1.ClusterStagedUpdateRun{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { + klog.ErrorS(err, "Failed to get ClusterStagedUpdateRun", "approvalRequest", approvalReqRef, "updateRun", updateRunName) + return ctrl.Result{}, err + } + + // Find the stage + for i := range updateRun.Status.StagesStatus { + if updateRun.Status.StagesStatus[i].StageName == stageName { + stageStatus = &updateRun.Status.StagesStatus[i] + break + } + } + } else { + updateRun := &placementv1beta1.StagedUpdateRun{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: updateRunName, Namespace: obj.GetNamespace()}, updateRun); err != nil { + klog.ErrorS(err, "Failed to get StagedUpdateRun", "approvalRequest", approvalReqRef, "updateRun", updateRunName) + return ctrl.Result{}, err + } + + // Find the stage + for i := range updateRun.Status.StagesStatus { + if updateRun.Status.StagesStatus[i].StageName == stageName { + stageStatus = &updateRun.Status.StagesStatus[i] + break + } + } + } + + if stageStatus == nil { + err := fmt.Errorf("stage %s not found in UpdateRun %s", stageName, updateRunName) + klog.ErrorS(err, "Failed to find stage", "approvalRequest", approvalReqRef) + return ctrl.Result{}, err + } + + // Get all cluster names from the stage + clusterNames := make([]string, 0, len(stageStatus.Clusters)) + for _, cluster := range stageStatus.Clusters { + clusterNames = append(clusterNames, cluster.ClusterName) + } + + if len(clusterNames) == 0 { + klog.V(2).InfoS("No clusters in stage, skipping", "approvalRequest", approvalReqRef, "stage", stageName) + return ctrl.Result{}, nil + } + + klog.V(2).InfoS("Found clusters in stage", "approvalRequest", approvalReqRef, "stage", stageName, "clusters", clusterNames) + + // Create or update the MetricCollector resource, CRP, and ResourceOverrides + if err := r.ensureMetricCollectorResources(ctx, obj, clusterNames, updateRunName, stageName); err != nil { + klog.ErrorS(err, "Failed to ensure MetricCollector resources", "approvalRequest", approvalReqRef) + return ctrl.Result{}, err + } + + klog.V(2).InfoS("Successfully ensured MetricCollector resources", "approvalRequest", approvalReqRef, "clusters", clusterNames) + + // Check workload health and approve if all workloads are healthy + if err := r.checkWorkloadHealthAndApprove(ctx, approvalReqObj, clusterNames, updateRunName, stageName); err != nil { + klog.ErrorS(err, "Failed to check workload health", "approvalRequest", approvalReqRef) + return ctrl.Result{RequeueAfter: 15 * time.Second}, err + } + + // Requeue after 15 seconds to check again (will stop if approved in next reconciliation) + return ctrl.Result{RequeueAfter: 15 * time.Second}, nil +} + +// ensureMetricCollectorResources creates the Namespace, MetricCollector, CRP, and ResourceOverrides +func (r *Reconciler) ensureMetricCollectorResources( + ctx context.Context, + approvalReq client.Object, + clusterNames []string, + updateRunName, stageName string, +) error { + // Generate names + metricCollectorName := fmt.Sprintf("mc-%s-%s", updateRunName, stageName) + crpName := fmt.Sprintf("crp-mc-%s-%s", updateRunName, stageName) + roName := fmt.Sprintf("ro-mc-%s-%s", updateRunName, stageName) + + // Create MetricCollector resource (cluster-scoped) on hub + metricCollector := &localv1beta1.MetricCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: metricCollectorName, + Labels: map[string]string{ + "app": "metric-collector", + "approval-request": approvalReq.GetName(), + "update-run": updateRunName, + "stage": stageName, + }, + }, + Spec: localv1beta1.MetricCollectorSpec{ + PrometheusURL: prometheusURL, + // ReportNamespace will be overridden per cluster + ReportNamespace: "placeholder", + }, + } + + // Create or update MetricCollector + existingMC := &localv1beta1.MetricCollector{} + err := r.Client.Get(ctx, types.NamespacedName{Name: metricCollectorName}, existingMC) + if err != nil { + if errors.IsNotFound(err) { + if err := r.Client.Create(ctx, metricCollector); err != nil { + return fmt.Errorf("failed to create MetricCollector: %w", err) + } + klog.V(2).InfoS("Created MetricCollector", "metricCollector", klog.KObj(metricCollector)) + } else { + return fmt.Errorf("failed to get MetricCollector: %w", err) + } + } + + // Create ResourceOverride with rules for each cluster + overrideRules := make([]placementv1beta1.OverrideRule, 0, len(clusterNames)) + for _, clusterName := range clusterNames { + reportNamespace := fmt.Sprintf(utils.NamespaceNameFormat, clusterName) + + overrideRules = append(overrideRules, placementv1beta1.OverrideRule{ + ClusterSelector: &placementv1beta1.ClusterSelector{ + ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "kubernetes-fleet.io/cluster-name": clusterName, + }, + }, + }, + }, + }, + JSONPatchOverrides: []placementv1beta1.JSONPatchOverride{ + { + Operator: placementv1beta1.JSONPatchOverrideOpReplace, + Path: "/spec/reportNamespace", + Value: apiextensionsv1.JSON{Raw: []byte(fmt.Sprintf(`"%s"`, reportNamespace))}, + }, + }, + }) + } + + // Create ClusterResourceOverride with rules for each cluster + clusterResourceOverride := &placementv1beta1.ClusterResourceOverride{ + ObjectMeta: metav1.ObjectMeta{ + Name: roName, + Labels: map[string]string{ + "approval-request": approvalReq.GetName(), + "update-run": updateRunName, + "stage": stageName, + }, + }, + Spec: placementv1beta1.ClusterResourceOverrideSpec{ + ClusterResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: "placement.kubernetes-fleet.io", + Version: "v1beta1", + Kind: "MetricCollector", + Name: metricCollectorName, + }, + }, + Policy: &placementv1beta1.OverridePolicy{ + OverrideRules: overrideRules, + }, + }, + } + + // Create or update ClusterResourceOverride + existingCRO := &placementv1beta1.ClusterResourceOverride{} + err = r.Client.Get(ctx, types.NamespacedName{Name: roName}, existingCRO) + if err != nil { + if errors.IsNotFound(err) { + if err := r.Client.Create(ctx, clusterResourceOverride); err != nil { + return fmt.Errorf("failed to create ClusterResourceOverride: %w", err) + } + klog.V(2).InfoS("Created ClusterResourceOverride", "clusterResourceOverride", roName) + } else { + return fmt.Errorf("failed to get ClusterResourceOverride: %w", err) + } + } + + // Create ClusterResourcePlacement with PickFixed policy + // CRP resource selector selects the MetricCollector directly + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + Labels: map[string]string{ + "approval-request": approvalReq.GetName(), + "update-run": updateRunName, + "stage": stageName, + }, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: "placement.kubernetes-fleet.io", + Version: "v1beta1", + Kind: "MetricCollector", + Name: metricCollectorName, + }, + }, + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: clusterNames, + }, + }, + } + + // Create or update CRP + existingCRP := &placementv1beta1.ClusterResourcePlacement{} + err = r.Client.Get(ctx, types.NamespacedName{Name: crpName}, existingCRP) + if err != nil { + if errors.IsNotFound(err) { + if err := r.Client.Create(ctx, crp); err != nil { + return fmt.Errorf("failed to create ClusterResourcePlacement: %w", err) + } + klog.V(2).InfoS("Created ClusterResourcePlacement", "crp", crpName) + } else { + return fmt.Errorf("failed to get ClusterResourcePlacement: %w", err) + } + } + + return nil +} + +// checkWorkloadHealthAndApprove checks if all workloads specified in WorkloadTracker are healthy +// across all clusters in the stage, and approves the ApprovalRequest if they are. +func (r *Reconciler) checkWorkloadHealthAndApprove( + ctx context.Context, + approvalReqObj placementv1beta1.ApprovalRequestObj, + clusterNames []string, + updateRunName, stageName string, +) error { + obj := approvalReqObj.(client.Object) + approvalReqRef := klog.KObj(obj) + + klog.V(2).InfoS("Starting workload health check", "approvalRequest", approvalReqRef, "clusters", clusterNames) + + // Get the WorkloadTracker (there should be one cluster-scoped object) + workloadTrackerList := &localv1beta1.WorkloadTrackerList{} + if err := r.Client.List(ctx, workloadTrackerList); err != nil { + klog.ErrorS(err, "Failed to list WorkloadTracker", "approvalRequest", approvalReqRef) + return fmt.Errorf("failed to list WorkloadTracker: %w", err) + } + + if len(workloadTrackerList.Items) == 0 { + klog.V(2).InfoS("No WorkloadTracker found, skipping health check", "approvalRequest", approvalReqRef) + return nil + } + + // Use the first WorkloadTracker (assuming there's only one) + workloadTracker := &workloadTrackerList.Items[0] + klog.V(2).InfoS("Found WorkloadTracker", "approvalRequest", approvalReqRef, "workloadTracker", klog.KObj(workloadTracker), "workloadCount", len(workloadTracker.Workloads)) + + if len(workloadTracker.Workloads) == 0 { + klog.V(2).InfoS("WorkloadTracker has no workloads defined, skipping health check", "approvalRequest", approvalReqRef) + return nil + } + + // MetricCollectorReport name is same as MetricCollector name + metricCollectorName := fmt.Sprintf("mc-%s-%s", updateRunName, stageName) + + // Check each cluster for the required workloads + allHealthy := true + unhealthyDetails := []string{} + + for _, clusterName := range clusterNames { + reportNamespace := fmt.Sprintf(utils.NamespaceNameFormat, clusterName) + + klog.V(2).InfoS("Checking MetricCollectorReport", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "reportName", metricCollectorName, + "reportNamespace", reportNamespace) + + // Get MetricCollectorReport for this cluster + report := &localv1beta1.MetricCollectorReport{} + err := r.Client.Get(ctx, types.NamespacedName{ + Name: metricCollectorName, + Namespace: reportNamespace, + }, report) + + if err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("MetricCollectorReport not found yet", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "report", metricCollectorName, + "namespace", reportNamespace) + allHealthy = false + unhealthyDetails = append(unhealthyDetails, fmt.Sprintf("cluster %s: report not found", clusterName)) + continue + } + klog.ErrorS(err, "Failed to get MetricCollectorReport", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "report", metricCollectorName, + "namespace", reportNamespace) + return fmt.Errorf("failed to get MetricCollectorReport for cluster %s: %w", clusterName, err) + } + + klog.V(2).InfoS("Found MetricCollectorReport", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "collectedMetrics", len(report.CollectedMetrics), + "workloadsMonitored", report.WorkloadsMonitored) + + // Check if all workloads from WorkloadTracker are present and healthy + for _, trackedWorkload := range workloadTracker.Workloads { + found := false + healthy := false + + for _, collectedMetric := range report.CollectedMetrics { + if collectedMetric.Namespace == trackedWorkload.Namespace && + collectedMetric.WorkloadName == trackedWorkload.Name { + found = true + healthy = collectedMetric.Health + klog.V(3).InfoS("Workload metric found", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "workload", trackedWorkload.Name, + "namespace", trackedWorkload.Namespace, + "healthy", healthy) + break + } + } + + if !found { + klog.V(2).InfoS("Workload not found in MetricCollectorReport", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "workload", trackedWorkload.Name, + "namespace", trackedWorkload.Namespace) + allHealthy = false + unhealthyDetails = append(unhealthyDetails, + fmt.Sprintf("cluster %s: workload %s/%s not found", clusterName, trackedWorkload.Namespace, trackedWorkload.Name)) + } else if !healthy { + klog.V(2).InfoS("Workload is not healthy", + "approvalRequest", approvalReqRef, + "cluster", clusterName, + "workload", trackedWorkload.Name, + "namespace", trackedWorkload.Namespace) + allHealthy = false + unhealthyDetails = append(unhealthyDetails, + fmt.Sprintf("cluster %s: workload %s/%s unhealthy", clusterName, trackedWorkload.Namespace, trackedWorkload.Name)) + } + } + } + + // If all workloads are healthy across all clusters, approve the ApprovalRequest + if allHealthy { + klog.InfoS("All workloads are healthy, approving ApprovalRequest", + "approvalRequest", approvalReqRef, + "clusters", clusterNames, + "workloads", len(workloadTracker.Workloads)) + + status := approvalReqObj.GetApprovalRequestStatus() + approvedCond := meta.FindStatusCondition(status.Conditions, string(placementv1beta1.ApprovalRequestConditionApproved)) + + // Only update if not already approved + if approvedCond == nil || approvedCond.Status != metav1.ConditionTrue { + meta.SetStatusCondition(&status.Conditions, metav1.Condition{ + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + Status: metav1.ConditionTrue, + ObservedGeneration: obj.GetGeneration(), + Reason: "AllWorkloadsHealthy", + Message: fmt.Sprintf("All %d workloads are healthy across %d clusters", len(workloadTracker.Workloads), len(clusterNames)), + }) + + approvalReqObj.SetApprovalRequestStatus(*status) + if err := r.Client.Status().Update(ctx, obj); err != nil { + klog.ErrorS(err, "Failed to approve ApprovalRequest", "approvalRequest", approvalReqRef) + return fmt.Errorf("failed to approve ApprovalRequest: %w", err) + } + + klog.InfoS("Successfully approved ApprovalRequest", "approvalRequest", approvalReqRef) + r.recorder.Event(obj, "Normal", "Approved", fmt.Sprintf("All %d workloads are healthy across %d clusters in stage %s", len(workloadTracker.Workloads), len(clusterNames), stageName)) + } else { + klog.V(2).InfoS("ApprovalRequest already approved", "approvalRequest", approvalReqRef) + } + + // Approval successful or already approved + return nil + } + + // Not all workloads are healthy yet, log details and return nil (reconcile will requeue) + klog.V(2).InfoS("Not all workloads are healthy yet", + "approvalRequest", approvalReqRef, + "unhealthyDetails", unhealthyDetails) + + return nil +} + +// handleDelete handles the deletion of an ApprovalRequest or ClusterApprovalRequest +func (r *Reconciler) handleDelete(ctx context.Context, approvalReqObj placementv1beta1.ApprovalRequestObj, isClusterScoped bool) (ctrl.Result, error) { + obj := approvalReqObj.(client.Object) + if !controllerutil.ContainsFinalizer(obj, metricCollectorFinalizer) { + return ctrl.Result{}, nil + } + + approvalReqRef := klog.KObj(obj) + klog.V(2).InfoS("Cleaning up resources for ApprovalRequest", "approvalRequest", approvalReqRef) + + // Delete CRP (it will cascade delete the resources on member clusters) + spec := approvalReqObj.GetApprovalRequestSpec() + updateRunName := spec.TargetUpdateRun + stageName := spec.TargetStage + crpName := fmt.Sprintf("crp-mc-%s-%s", updateRunName, stageName) + metricCollectorName := fmt.Sprintf("mc-%s-%s", updateRunName, stageName) + croName := fmt.Sprintf("ro-mc-%s-%s", updateRunName, stageName) + + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: crpName}, crp); err == nil { + if err := r.Client.Delete(ctx, crp); err != nil && !errors.IsNotFound(err) { + return ctrl.Result{}, fmt.Errorf("failed to delete CRP: %w", err) + } + klog.V(2).InfoS("Deleted ClusterResourcePlacement", "crp", crpName) + } + + // Delete ClusterResourceOverride + cro := &placementv1beta1.ClusterResourceOverride{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: croName}, cro); err == nil { + if err := r.Client.Delete(ctx, cro); err != nil && !errors.IsNotFound(err) { + return ctrl.Result{}, fmt.Errorf("failed to delete ClusterResourceOverride: %w", err) + } + klog.V(2).InfoS("Deleted ClusterResourceOverride", "clusterResourceOverride", croName) + } + + // Delete MetricCollector + metricCollector := &localv1beta1.MetricCollector{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: metricCollectorName}, metricCollector); err == nil { + if err := r.Client.Delete(ctx, metricCollector); err != nil && !errors.IsNotFound(err) { + return ctrl.Result{}, fmt.Errorf("failed to delete MetricCollector: %w", err) + } + klog.V(2).InfoS("Deleted MetricCollector", "metricCollector", metricCollectorName) + } + + // Remove finalizer + controllerutil.RemoveFinalizer(obj, metricCollectorFinalizer) + if err := r.Client.Update(ctx, obj); err != nil { + klog.ErrorS(err, "Failed to remove finalizer", "approvalRequest", approvalReqRef) + return ctrl.Result{}, err + } + + klog.V(2).InfoS("Successfully cleaned up resources", "approvalRequest", approvalReqRef) + return ctrl.Result{}, nil +} + +// SetupWithManagerForClusterApprovalRequest sets up the controller with the Manager for ClusterApprovalRequest resources. +func (r *Reconciler) SetupWithManagerForClusterApprovalRequest(mgr ctrl.Manager) error { + r.recorder = mgr.GetEventRecorderFor("clusterapprovalrequest-controller") + return ctrl.NewControllerManagedBy(mgr). + Named("clusterapprovalrequest-controller"). + For(&placementv1beta1.ClusterApprovalRequest{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Complete(r) +} + +// SetupWithManagerForApprovalRequest sets up the controller with the Manager for ApprovalRequest resources. +func (r *Reconciler) SetupWithManagerForApprovalRequest(mgr ctrl.Manager) error { + r.recorder = mgr.GetEventRecorderFor("approvalrequest-controller") + return ctrl.NewControllerManagedBy(mgr). + Named("approvalrequest-controller"). + For(&placementv1beta1.ApprovalRequest{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Complete(r) +} diff --git a/cmd/hubagent/workload/setup.go b/cmd/hubagent/workload/setup.go index cbaee55d5..42c7299cd 100644 --- a/cmd/hubagent/workload/setup.go +++ b/cmd/hubagent/workload/setup.go @@ -328,6 +328,26 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager, return err } } + + // // Set up the cluster approval request controller + // klog.Info("Setting up clusterApprovalRequest controller") + // if err = (&approvalrequest.Reconciler{ + // Client: mgr.GetClient(), + // }).SetupWithManagerForClusterApprovalRequest(mgr); err != nil { + // klog.ErrorS(err, "Unable to set up clusterApprovalRequest controller") + // return err + // } + + // // Set up the approval request controller for namespaced resources + // if opts.EnableResourcePlacement { + // klog.Info("Setting up approvalRequest controller") + // if err = (&approvalrequest.Reconciler{ + // Client: mgr.GetClient(), + // }).SetupWithManagerForApprovalRequest(mgr); err != nil { + // klog.ErrorS(err, "Unable to set up approvalRequest controller") + // return err + // } + // } } // Set up the work generator diff --git a/cmd/memberagent/main.go b/cmd/memberagent/main.go index d8e0e4cdc..d79f6a36d 100644 --- a/cmd/memberagent/main.go +++ b/cmd/memberagent/main.go @@ -463,6 +463,16 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb klog.ErrorS(err, "Failed to set up InternalMemberCluster v1beta1 controller with the controller manager") return fmt.Errorf("failed to set up InternalMemberCluster v1beta1 controller with the controller manager: %w", err) } + + // // Set up the MetricCollector controller. + // mcReconciler := &metriccollector.Reconciler{ + // MemberClient: memberMgr.GetClient(), + // HubClient: hubMgr.GetClient(), + // } + // if err := mcReconciler.SetupWithManager(memberMgr); err != nil { + // klog.ErrorS(err, "Failed to set up MetricCollector controller with the controller manager") + // return fmt.Errorf("failed to set up MetricCollector controller with the controller manager: %w", err) + // } } klog.InfoS("starting hub manager") diff --git a/cmd/metric-app/main.go b/cmd/metric-app/main.go new file mode 100644 index 000000000..17dc094fe --- /dev/null +++ b/cmd/metric-app/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +func main() { + // Define a simple gauge metric for health with labels + workloadHealth := prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "workload_health", + Help: "Indicates if the workload is healthy (1=healthy, 0=unhealthy)", + }, + ) + + // Set it to 1 (healthy) with labels + workloadHealth.Set(1) + + // Register metric with Prometheus default registry + prometheus.MustRegister(workloadHealth) + + // Expose metrics endpoint + http.Handle("/metrics", promhttp.Handler()) + + // Start HTTP server + http.ListenAndServe(":8080", nil) +} diff --git a/config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml b/config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml new file mode 100644 index 000000000..53a3420c5 --- /dev/null +++ b/config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml @@ -0,0 +1,176 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectorreports.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollectorReport + listKind: MetricCollectorReportList + plural: metriccollectorreports + shortNames: + - mcr + singular: metriccollectorreport + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .workloadsMonitored + name: Workloads + type: integer + - jsonPath: .lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollectorReport is created by the MetricCollector controller on the hub cluster + in the fleet-member-{clusterName} namespace to report collected metrics from a member cluster. + The controller watches MetricCollector objects on the member cluster, collects metrics, + and syncs the status to the hub as MetricCollectorReport objects. + + Controller workflow: + 1. MetricCollector reconciles and collects metrics on member cluster + 2. Metrics include clusterName from workload_health labels + 3. Controller creates/updates MetricCollectorReport in fleet-member-{clusterName} namespace on hub + 4. Report name matches MetricCollector name for easy lookup + + Namespace: fleet-member-{clusterName} (extracted from CollectedMetrics[0].ClusterName) + Name: Same as MetricCollector name + All metrics in CollectedMetrics are guaranteed to have the same ClusterName. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + collectedMetrics: + description: |- + CollectedMetrics contains the most recent metrics from each workload. + All metrics are guaranteed to have the same ClusterName since they're collected from one member cluster. + items: + description: WorkloadMetrics represents metrics collected from a single + workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions copied from the MetricCollector status. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected on + the member cluster. + format: date-time + type: string + lastReportTime: + description: LastReportTime is when this report was last synced to the + hub. + format: date-time + type: string + metadata: + type: object + observedGeneration: + description: ObservedGeneration is the generation most recently observed + from the MetricCollector. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + served: true + storage: true + subresources: {} diff --git a/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml b/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml new file mode 100644 index 000000000..47679a15f --- /dev/null +++ b/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml @@ -0,0 +1,189 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectors.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollector + listKind: MetricCollectorList + plural: metriccollectors + shortNames: + - mc + singular: metriccollector + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.generation + name: Gen + type: string + - jsonPath: .status.conditions[?(@.type=="MetricCollectorReady")].status + name: Ready + type: string + - jsonPath: .status.workloadsMonitored + name: Workloads + type: integer + - jsonPath: .status.lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollector is used by member-agent to scrape and collect metrics from workloads + running on the member cluster. It runs on each member cluster and collects metrics + from Prometheus-compatible endpoints. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: The desired state of MetricCollector. + properties: + prometheusUrl: + description: |- + PrometheusURL is the URL of the Prometheus server. + Example: http://prometheus.test-ns.svc.cluster.local:9090 + pattern: ^https?://.*$ + type: string + reportNamespace: + description: |- + ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + This should be the fleet-member-{clusterName} namespace. + Example: fleet-member-cluster-1 + type: string + required: + - prometheusUrl + - reportNamespace + type: object + status: + description: The observed status of MetricCollector. + properties: + collectedMetrics: + description: CollectedMetrics contains the most recent metrics from + each workload. + items: + description: WorkloadMetrics represents metrics collected from a + single workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions is an array of current observed conditions. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected. + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the generation most recently observed. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/placement.kubernetes-fleet.io_workloadtrackers.yaml b/config/crd/bases/placement.kubernetes-fleet.io_workloadtrackers.yaml new file mode 100644 index 000000000..b28e8da04 --- /dev/null +++ b/config/crd/bases/placement.kubernetes-fleet.io_workloadtrackers.yaml @@ -0,0 +1,60 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: workloadtrackers.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-placement + kind: WorkloadTracker + listKind: WorkloadTrackerList + plural: workloadtrackers + singular: workloadtracker + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: WorkloadTracker expresses user intent to track certain workloads + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + workloads: + description: Workloads is a list of workloads to track + items: + description: WorkloadReference represents a workload to be tracked + properties: + name: + description: Name is the name of the workload + type: string + namespace: + description: Namespace is the namespace of the workload + type: string + required: + - name + - namespace + type: object + type: array + type: object + served: true + storage: true diff --git a/docker/metric-app.Dockerfile b/docker/metric-app.Dockerfile new file mode 100644 index 000000000..a092fd94f --- /dev/null +++ b/docker/metric-app.Dockerfile @@ -0,0 +1,17 @@ +# Build stage +FROM golang:1.24-alpine AS builder +WORKDIR /workspace +# Copy go mod files +COPY go.mod go.sum ./ +RUN go mod download +# Copy source code +COPY cmd/metric-app/ ./cmd/metric-app/ +# Build the application +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o metric-app ./cmd/metric-app/main.go + +# Run stage +FROM alpine:3.18 +WORKDIR /app +COPY --from=builder /workspace/metric-app . +EXPOSE 8080 +CMD ["./metric-app"] diff --git a/examples/fleet_v1beta1_membercluster.yaml b/examples/fleet_v1beta1_membercluster.yaml index 41257af52..d4170cbce 100644 --- a/examples/fleet_v1beta1_membercluster.yaml +++ b/examples/fleet_v1beta1_membercluster.yaml @@ -2,6 +2,9 @@ apiVersion: cluster.kubernetes-fleet.io/v1beta1 kind: MemberCluster metadata: name: kind-cluster-1 + labels: + environment: staging + kubernetes-fleet.io/cluster-name: kind-cluster-1 spec: identity: name: fleet-member-agent-cluster-1 diff --git a/examples/metrics/README.md b/examples/metrics/README.md new file mode 100644 index 000000000..8a161b9c9 --- /dev/null +++ b/examples/metrics/README.md @@ -0,0 +1,463 @@ +# Metric Collection API Examples + +This directory contains examples for the Fleet metric collection system. + +## Architecture + +``` +Member Cluster Hub Cluster +┌─────────────────────────────────────┐ ┌──────────────────────────────────┐ +│ │ │ │ +│ ┌────────────────────────────┐ │ │ ┌────────────────────────────┐ │ +│ │ Workload Pods │ │ │ │ MetricCollectorReport │ │ +│ │ (sample-metric-app) │ │ │ │ (cluster-1-...) │ │ +│ │ - Expose /metrics endpoint │ │ │ │ │ │ +│ └────────────┬───────────────┘ │ │ │ Status: │ │ +│ │ │ │ │ - collectedMetrics[] │ │ +│ │ scrape │ │ │ - workloadsMonitored │ │ +│ ▼ │ │ │ - lastCollectionTime │ │ +│ ┌────────────────────────────┐ │ │ └────────────────────────────┘ │ +│ │ Prometheus │ │ │ ▲ │ +│ │ (test-ns namespace) │ │ │ │ │ +│ └────────────┬───────────────┘ │ │ │ copy status │ +│ │ │ │ │ │ +│ │ query │ │ ┌────────────────────────────┐ │ +│ ▼ │ │ │ Member Agent │ │ +│ ┌────────────────────────────┐ │ │ │ (MC Status Reporter) │ │ +│ │ MetricCollector │ │ │ │ - Watches MetricCollector │ │ +│ │ (test-ns namespace) │────┼──────────┼──┤ - Creates/Updates MCR │ │ +│ │ │ │ │ └────────────────────────────┘ │ +│ │ Spec: │ │ │ │ +│ │ - prometheusUrl │ │ └──────────────────────────────────┘ +│ │ │ │ +│ │ Status: │ │ +│ │ - collectedMetrics[] │ │ +│ │ - namespace │ │ +│ │ - clusterName │ │ +│ │ - workloadName │ │ +│ │ - health (bool) │ │ +│ │ - workloadsMonitored │ │ +│ │ - lastCollectionTime │ │ +│ └────────────────────────────┘ │ +│ ▲ │ +│ │ reconcile (every 30s) │ +│ │ │ +│ ┌────────────────────────────┐ │ +│ │ Member Agent │ │ +│ │ (MC Controller) │ │ +│ └────────────────────────────┘ │ +│ │ +└─────────────────────────────────────┘ +``` + +## Components + +### MetricCollector (Member Cluster) + +Runs on each member cluster as part of the member-agent. It: +- Watches for workloads matching the selector +- Scrapes Prometheus-compatible metrics endpoints +- Collects specified metrics (e.g., `workload_health`) +- Stores metrics in its status +- Reports metrics to hub via InternalMemberCluster + +**Example:** +```yaml +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricCollector +metadata: + name: sample-app-collector + namespace: fleet-system +spec: + workloadSelector: + labelSelector: + matchLabels: + app: sample-metric-app + namespaces: + - test-ns + metricsEndpoint: + port: 8080 + path: /metrics + collectionInterval: 30s + metricsToCollect: + - workload_health +``` + +### MetricsAggregator (Hub Cluster) + +Runs on the hub cluster as part of the hub-agent. It: +- Reads metrics from InternalMemberCluster statuses +- Aggregates metrics across all member clusters +- Determines workload health based on thresholds +- Provides fleet-wide health view + +**Example:** +```yaml +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricsAggregator +metadata: + name: sample-app-aggregator +spec: + metricCollectorRef: + name: sample-app-collector + namespace: fleet-system + healthThreshold: + metricName: workload_health + minValue: "1" +``` + +## Setup + +### 1. Deploy the Sample Metric App + +First, deploy the sample application that exposes metrics: + +```bash +kubectl apply -f ../sample-metric-app.yaml +``` + +This creates a Pod that exposes a `/metrics` endpoint with `workload_health` metric. + +### 2. (Optional) Setup Prometheus + +If using the Prometheus approach (recommended), ensure Prometheus is deployed: + +```bash +# Add Prometheus helm repo +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +# Install Prometheus +helm install prometheus prometheus-community/prometheus \ + --namespace monitoring \ + --create-namespace +``` + +Configure Prometheus to scrape your metric-app pods by adding annotations: +```yaml +annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" +``` + +### 3. Create MetricCollector on Member Clusters + +On each member cluster, create a MetricCollector: + +**Option A: Using Prometheus (Recommended)** +```bash +kubectl apply -f - <:` + - Scrapes metrics using HTTP client + - Parses Prometheus text format +4. Updates MetricCollector status with collected metrics +5. Reports metrics via InternalMemberCluster status + +**Key Difference:** +- Prometheus: **1 API call** → get all workload metrics +- Direct: **N API calls** → one per pod + +### 2. MetricsAggregator Controller (Hub Agent) + +The controller on the hub cluster: + +1. Watches MetricsAggregator CRs +2. Lists InternalMemberClusters (filtered by `clusterSelector` if specified) +3. For each cluster: + - Reads MetricCollector status from IMC + - Extracts workload metrics + - Applies health threshold (e.g., `workload_health >= 1`) + - Determines per-workload and per-cluster health +4. Aggregates across all clusters: + - Counts total/healthy clusters and workloads + - Sets conditions (Ready, Aggregating, AllClustersHealthy) +5. Updates MetricsAggregator status + +## Integration with Sample Metric App + +The metric-app (`cmd/metric-app/main.go`) exposes: + +``` +workload_health{cluster_name="cluster-1", workload_name="sample-metric-app"} 1 +``` + +- **Metric Name**: `workload_health` +- **Labels**: `cluster_name`, `workload_name` (from environment variables) +- **Value**: `1` (healthy) or `0` (unhealthy) + +The MetricCollector scrapes this and stores: +```yaml +metrics: + workload_health: "1" +labels: + cluster_name: "cluster-1" + workload_name: "sample-metric-app" +healthy: true # Because workload_health == 1 +``` + +## Advanced Examples + +### Monitor Multiple Workload Types + +```yaml +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricCollector +metadata: + name: multi-workload-collector + namespace: fleet-system +spec: + workloadSelector: + labelSelector: + matchLabels: + tier: backend + workloadTypes: + - Deployment + - StatefulSet + metricsEndpoint: + port: 9090 + path: /metrics + metricsToCollect: + - http_requests_total + - http_request_duration_seconds + - workload_health +``` + +### Cluster-Specific Aggregation + +```yaml +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricsAggregator +metadata: + name: prod-clusters-aggregator +spec: + metricCollectorRef: + name: multi-workload-collector + namespace: fleet-system + clusterSelector: + matchLabels: + environment: production + healthThreshold: + metricName: workload_health + minValue: "1" +``` + +## Troubleshooting + +### Metrics Not Being Collected + +Check MetricCollector status: +```bash +kubectl get mc sample-app-collector -n fleet-system -o jsonpath='{.status.conditions}' +``` + +Common issues: +- Pod selector not matching any pods +- Wrong port or path +- Network policy blocking access to pod metrics endpoint +- Workload not exposing metrics + +### Aggregator Not Showing Metrics + +Check: +1. MetricCollector is running and collecting on member clusters +2. InternalMemberCluster status contains metrics +3. MetricsAggregator `metricCollectorRef` matches the MetricCollector name/namespace + +```bash +# On hub cluster +kubectl get imc -A -o yaml | grep -A 20 "collectedMetrics" +``` + +## Future Enhancements + +- Support for multiple health metrics +- Alerting based on aggregated metrics +- Historical metric trends +- Integration with external monitoring systems +- Automatic scaling based on metrics diff --git a/examples/metrics/metriccollector-sample.yaml b/examples/metrics/metriccollector-sample.yaml new file mode 100644 index 000000000..776b14647 --- /dev/null +++ b/examples/metrics/metriccollector-sample.yaml @@ -0,0 +1,13 @@ +# MetricCollector - Runs on Member Cluster +# This collects workload_health metrics from Prometheus running on the member cluster +# The controller queries Prometheus every 30 seconds and updates the status with collected metrics + +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricCollector +metadata: + name: sample-app-collector + namespace: test-ns +spec: + # URL of the Prometheus server in the cluster + # This Prometheus instance scrapes metrics from pods with prometheus.io/* annotations + prometheusUrl: http://prometheus.test-ns.svc.cluster.local:9090 diff --git a/examples/metrics/metriccollectorreport-sample.yaml b/examples/metrics/metriccollectorreport-sample.yaml new file mode 100644 index 000000000..ed54d947d --- /dev/null +++ b/examples/metrics/metriccollectorreport-sample.yaml @@ -0,0 +1,45 @@ +# MetricCollectorReport - Created by MetricCollector controller on Hub Cluster +# This report is automatically synced by the MetricCollector controller when it reconciles +# a MetricCollector on the member cluster. +# +# Controller workflow: +# 1. MetricCollector reconciles and collects metrics from Prometheus on member cluster +# 2. Metrics include clusterName label (e.g., cluster-1) from workload_health metric +# 3. Controller extracts clusterName from CollectedMetrics[0].ClusterName +# 4. Controller creates/updates MetricCollectorReport on hub in fleet-member-{clusterName} namespace +# 5. All metrics are guaranteed to have the same clusterName +# +# Namespace: fleet-member-{clusterName} (e.g., fleet-member-cluster-1) +# Name: Same as MetricCollector name (e.g., sample-app-collector) +# Labels: Optional, can be used to track source MetricCollector + +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricCollectorReport +metadata: + name: sample-app-collector + namespace: fleet-member-cluster-1 + labels: + metriccollector-name: sample-app-collector + metriccollector-namespace: test-ns + +# All fields below are synced from MetricCollector status by the controller +conditions: + - type: MetricCollectorReady + status: "True" + reason: CollectorReady + message: Metric collector is ready and collecting metrics + lastTransitionTime: "2025-11-20T01:45:00Z" + - type: MetricsCollecting + status: "True" + reason: Collecting + message: Successfully collecting metrics from Prometheus + lastTransitionTime: "2025-11-20T01:45:00Z" +observedGeneration: 1 +workloadsMonitored: 1 +lastCollectionTime: "2025-11-20T01:45:00Z" +collectedMetrics: + - namespace: test-ns + clusterName: cluster-1 # All metrics have the same clusterName + workloadName: sample-metric-app + health: true +lastReportTime: "2025-11-20T01:45:05Z" diff --git a/examples/prometheus/README.md b/examples/prometheus/README.md new file mode 100644 index 000000000..bd1b9e66e --- /dev/null +++ b/examples/prometheus/README.md @@ -0,0 +1,117 @@ +# Prometheus Setup for MetricCollector Testing + +This directory contains manifests to deploy Prometheus for testing the MetricCollector controller with the sample-metric-app. + +## Prerequisites + +- Kind cluster running (e.g., cluster-1, cluster-2, or cluster-3) +- `test-ns` namespace exists +- `ghcr.io/metric-app:6d6cd69` image loaded into the cluster + +## Quick Start + +```bash +# Switch to target cluster +kubectl config use-context kind-cluster-1 + +# Create namespace if needed +kubectl create namespace test-ns --dry-run=client -o yaml | kubectl apply -f - + +# Deploy Prometheus +kubectl apply -f rbac.yaml +kubectl apply -f configmap.yaml +kubectl apply -f deployment.yaml +kubectl apply -f service.yaml + +# Deploy sample-metric-app (from parent examples directory) +kubectl apply -f ../sample-metric-app.yaml + +# Wait for pods to be ready +kubectl wait --for=condition=ready pod -l app=prometheus -n test-ns --timeout=60s +kubectl wait --for=condition=ready pod -l app=sample-metric-app -n test-ns --timeout=60s +``` + +## Verify Setup + +### Check Prometheus is scraping metrics + +```bash +# Port-forward Prometheus +kubectl port-forward -n test-ns svc/prometheus 9090:9090 +``` + +Open http://localhost:9090 in your browser and: +1. Go to Status > Targets - you should see `sample-metric-app` pod listed +2. Go to Graph and query: `workload_health` - you should see metrics + +### Test with prometheus-test tool + +```bash +# In another terminal (while port-forward is running) +cd tools/prometheus-test +go build -o prometheus-test main.go +./prometheus-test http://localhost:9090 workload_health + +# Or with namespace filter +./prometheus-test http://localhost:9090 workload_health test-ns +``` + +Expected output: +``` +Querying Prometheus at: http://localhost:9090 +Query: workload_health{namespace="test-ns"} + +Status: success +Result Type: vector +Number of results: 1 + +Result 1: + Labels: + app: sample-metric-app + namespace: test-ns + pod: sample-metric-app-xxxxx + Timestamp: 1732032000.0 + Value: 1 +``` + +## Configuration Details + +### Prometheus ConfigMap +The Prometheus configuration discovers pods with these annotations: +- `prometheus.io/scrape: "true"` - Enable scraping +- `prometheus.io/port: "8080"` - Port to scrape +- `prometheus.io/path: "/metrics"` - Metrics endpoint path + +The sample-metric-app already has these annotations configured. + +### RBAC +Prometheus needs permissions to discover and scrape pods. The `rbac.yaml` creates: +- ServiceAccount for Prometheus +- ClusterRole with pod discovery permissions +- ClusterRoleBinding to grant permissions + +## Testing MetricCollector + +Once Prometheus is running, create a MetricCollector CR: + +```bash +kubectl apply -f - <= 2 { + if valueStr, ok := res.Value[1].(string); ok { + fmt.Sscanf(valueStr, "%f", &health) + } + } + + wm := placementv1beta1.WorkloadMetrics{ + Namespace: namespace, + WorkloadName: workloadName, + Health: health == 1.0, // Convert to boolean: 1.0 = true, 0.0 = false + } + workloadMetrics = append(workloadMetrics, wm) + } + + klog.V(2).InfoS("Collected metrics from Prometheus", "workloads", len(workloadMetrics)) + return workloadMetrics, nil +} + +// buildPromQLQuery builds a PromQL query for workload_health metric +func buildPromQLQuery(mc *placementv1beta1.MetricCollector) string { + // Query all workload_health metrics (MetricCollector is cluster-scoped) + return `workload_health` +} diff --git a/pkg/controllers/metriccollector/controller.go b/pkg/controllers/metriccollector/controller.go new file mode 100644 index 000000000..79f1b09fc --- /dev/null +++ b/pkg/controllers/metriccollector/controller.go @@ -0,0 +1,288 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metriccollector + +import ( + "context" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" +) + +const ( + // defaultCollectionInterval is the interval for collecting metrics (30 seconds) + defaultCollectionInterval = 30 * time.Second + + // metricCollectorFinalizer is the finalizer for cleaning up MetricCollectorReport + metricCollectorFinalizer = "kubernetes-fleet.io/metric-collector-report-cleanup" +) + +// Reconciler reconciles a MetricCollector object +type Reconciler struct { + // MemberClient is the client to access the member cluster + MemberClient client.Client + + // HubClient is the client to access the hub cluster + HubClient client.Client + + // recorder is the event recorder + recorder record.EventRecorder + + // prometheusClient is the client to query Prometheus + prometheusClient PrometheusClient +} + +// Reconcile reconciles a MetricCollector object +func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + startTime := time.Now() + klog.V(2).InfoS("MetricCollector reconciliation starts", "metricCollector", req.Name) + defer func() { + latency := time.Since(startTime).Milliseconds() + klog.V(2).InfoS("MetricCollector reconciliation ends", "metricCollector", req.Name, "latency", latency) + }() + + // Fetch the MetricCollector instance (cluster-scoped) + mc := &placementv1beta1.MetricCollector{} + if err := r.MemberClient.Get(ctx, client.ObjectKey{Name: req.Name}, mc); err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("MetricCollector not found, ignoring", "metricCollector", req.Name) + return ctrl.Result{}, nil + } + klog.ErrorS(err, "Failed to get MetricCollector", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Handle deletion - cleanup MetricCollectorReport on hub + if !mc.DeletionTimestamp.IsZero() { + if controllerutil.ContainsFinalizer(mc, metricCollectorFinalizer) { + klog.V(2).InfoS("Cleaning up MetricCollectorReport on hub", "metricCollector", req.Name) + + // Delete MetricCollectorReport from hub cluster + if err := r.deleteReportFromHub(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to delete MetricCollectorReport from hub", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Remove finalizer + controllerutil.RemoveFinalizer(mc, metricCollectorFinalizer) + if err := r.MemberClient.Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to remove finalizer", "metricCollector", req.Name) + return ctrl.Result{}, err + } + klog.V(2).InfoS("Successfully cleaned up MetricCollectorReport", "metricCollector", req.Name) + } + return ctrl.Result{}, nil + } + + // Add finalizer if not present + if !controllerutil.ContainsFinalizer(mc, metricCollectorFinalizer) { + controllerutil.AddFinalizer(mc, metricCollectorFinalizer) + if err := r.MemberClient.Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to add finalizer", "metricCollector", req.Name) + return ctrl.Result{}, err + } + klog.V(2).InfoS("Added finalizer to MetricCollector", "metricCollector", req.Name) + } + + // Collect metrics from Prometheus + collectedMetrics, collectErr := r.collectFromPrometheus(ctx, mc) + + // Update status with collected metrics + now := metav1.Now() + mc.Status.LastCollectionTime = &now + mc.Status.CollectedMetrics = collectedMetrics + mc.Status.WorkloadsMonitored = int32(len(collectedMetrics)) + mc.Status.ObservedGeneration = mc.Generation + + if collectErr != nil { + klog.ErrorS(collectErr, "Failed to collect metrics", "metricCollector", req.Name) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReady, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "CollectorConfigured", + Message: "Collector is configured", + }) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeCollecting, + Status: metav1.ConditionFalse, + ObservedGeneration: mc.Generation, + Reason: "CollectionFailed", + Message: fmt.Sprintf("Failed to collect metrics: %v", collectErr), + }) + } else { + klog.V(2).InfoS("Successfully collected metrics", "metricCollector", req.Name, "workloads", len(collectedMetrics)) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReady, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "CollectorConfigured", + Message: "Collector is configured and collecting metrics", + }) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeCollecting, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "MetricsCollected", + Message: fmt.Sprintf("Successfully collected metrics from %d workloads", len(collectedMetrics)), + }) + } + + if err := r.MemberClient.Status().Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to update MetricCollector status", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Sync MetricCollectorReport to hub cluster + if err := r.syncReportToHub(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to sync MetricCollectorReport to hub", "metricCollector", req.Name) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReported, + Status: metav1.ConditionFalse, + ObservedGeneration: mc.Generation, + Reason: "ReportSyncFailed", + Message: fmt.Sprintf("Failed to sync report to hub: %v", err), + }) + } else { + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReported, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "ReportSyncSucceeded", + Message: "Successfully synced metrics to hub cluster", + }) + } + + // Update status with reporting condition + if err := r.MemberClient.Status().Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to update MetricCollector status with reporting condition", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Requeue after 30 seconds + return ctrl.Result{RequeueAfter: defaultCollectionInterval}, nil +} + +// syncReportToHub syncs the MetricCollectorReport to the hub cluster +func (r *Reconciler) syncReportToHub(ctx context.Context, mc *placementv1beta1.MetricCollector) error { + // Use the reportNamespace from the MetricCollector spec + reportNamespace := mc.Spec.ReportNamespace + if reportNamespace == "" { + return fmt.Errorf("reportNamespace is not set in MetricCollector spec") + } + + // Create or update MetricCollectorReport on hub + report := &placementv1beta1.MetricCollectorReport{ + ObjectMeta: metav1.ObjectMeta{ + Name: mc.Name, + Namespace: reportNamespace, + Labels: map[string]string{ + "metriccollector-name": mc.Name, + }, + }, + } + + // Check if report already exists + existingReport := &placementv1beta1.MetricCollectorReport{} + err := r.HubClient.Get(ctx, client.ObjectKey{Name: mc.Name, Namespace: reportNamespace}, existingReport) + + now := metav1.Now() + if err != nil { + if errors.IsNotFound(err) { + // Create new report + report.Conditions = mc.Status.Conditions + report.ObservedGeneration = mc.Status.ObservedGeneration + report.WorkloadsMonitored = mc.Status.WorkloadsMonitored + report.LastCollectionTime = mc.Status.LastCollectionTime + report.CollectedMetrics = mc.Status.CollectedMetrics + report.LastReportTime = &now + + if err := r.HubClient.Create(ctx, report); err != nil { + klog.ErrorS(err, "Failed to create MetricCollectorReport", "report", klog.KObj(report)) + return err + } + klog.V(2).InfoS("Created MetricCollectorReport on hub", "report", klog.KObj(report), "reportNamespace", reportNamespace) + return nil + } + return err + } + + // Update existing report + existingReport.Labels = report.Labels + existingReport.Conditions = mc.Status.Conditions + existingReport.ObservedGeneration = mc.Status.ObservedGeneration + existingReport.WorkloadsMonitored = mc.Status.WorkloadsMonitored + existingReport.LastCollectionTime = mc.Status.LastCollectionTime + existingReport.CollectedMetrics = mc.Status.CollectedMetrics + existingReport.LastReportTime = &now + + if err := r.HubClient.Update(ctx, existingReport); err != nil { + klog.ErrorS(err, "Failed to update MetricCollectorReport", "report", klog.KObj(existingReport)) + return err + } + klog.V(2).InfoS("Updated MetricCollectorReport on hub", "report", klog.KObj(existingReport), "reportNamespace", reportNamespace) + return nil +} + +// deleteReportFromHub deletes the MetricCollectorReport from the hub cluster +func (r *Reconciler) deleteReportFromHub(ctx context.Context, mc *placementv1beta1.MetricCollector) error { + // Use the reportNamespace from the MetricCollector spec + reportNamespace := mc.Spec.ReportNamespace + if reportNamespace == "" { + klog.V(2).InfoS("reportNamespace is not set, skipping deletion", "metricCollector", mc.Name) + return nil + } + + // Try to delete MetricCollectorReport on hub + report := &placementv1beta1.MetricCollectorReport{} + err := r.HubClient.Get(ctx, client.ObjectKey{Name: mc.Name, Namespace: reportNamespace}, report) + if err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("MetricCollectorReport not found on hub, already deleted", "report", mc.Name, "namespace", reportNamespace) + return nil + } + return fmt.Errorf("failed to get MetricCollectorReport: %w", err) + } + + if err := r.HubClient.Delete(ctx, report); err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete MetricCollectorReport: %w", err) + } + + klog.InfoS("Deleted MetricCollectorReport from hub", "report", mc.Name, "namespace", reportNamespace) + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { + r.recorder = mgr.GetEventRecorderFor("metriccollector-controller") + return ctrl.NewControllerManagedBy(mgr). + Named("metriccollector-controller"). + For(&placementv1beta1.MetricCollector{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Complete(r) +} diff --git a/standalone-metric-collector/HUB_SETUP.md b/standalone-metric-collector/HUB_SETUP.md new file mode 100644 index 000000000..a0d27fa1c --- /dev/null +++ b/standalone-metric-collector/HUB_SETUP.md @@ -0,0 +1,212 @@ +# Hub Cluster RBAC Setup + +This guide explains how to set up RBAC permissions on the hub cluster for the MetricCollector controller running on member clusters. + +## Overview + +The MetricCollector controller needs permissions on the **hub cluster** to: +- Create/update `MetricCollectorReport` resources in `fleet-{cluster}` namespaces +- List namespaces + +## Option 1: Using the Helm Chart Template (Recommended) + +Generate and apply the RBAC resources from the helm chart: + +```bash +# Generate hub RBAC manifest +helm template metric-collector ./charts/metric-collector \ + --set hubCluster.createRBAC=true \ + --set memberCluster.name=cluster-1 \ + --show-only templates/hub-rbac.yaml > hub-rbac.yaml + +# Apply on the hub cluster +kubectl apply -f hub-rbac.yaml --context=hub-cluster +``` + +## Option 2: Manual RBAC Setup + +Apply this manifest directly on the hub cluster: + +```yaml +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metric-collector-hub-access + labels: + app: metric-collector +rules: + # MetricCollectorReport access + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectorreports"] + verbs: ["get", "list", "create", "update", "patch", "delete"] + # Namespace access + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metric-collector-cluster-1 + labels: + app: metric-collector + fleet.kubernetes.io/member-cluster: cluster-1 +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metric-collector-hub-access +subjects: + # Option A: Use ServiceAccount from hub cluster + - kind: ServiceAccount + name: metric-collector-sa + namespace: fleet-member-cluster-1 + + # Option B: Use token directly (for testing) + # Create token secret on member cluster and reference here +``` + +## Creating ServiceAccount Token for Member Cluster + +On the **hub cluster**: + +```bash +# 1. Create namespace for the member cluster +kubectl create namespace fleet-member-cluster-1 + +# 2. Create ServiceAccount +kubectl create serviceaccount metric-collector-sa -n fleet-member-cluster-1 + +# 3. Bind to ClusterRole +kubectl create clusterrolebinding metric-collector-cluster-1 \ + --clusterrole=metric-collector-hub-access \ + --serviceaccount=fleet-member-cluster-1:metric-collector-sa + +# 4. Create token secret +kubectl apply -f - < hub-token.txt + +# 6. Get CA certificate +kubectl get secret metric-collector-token -n fleet-member-cluster-1 \ + -o jsonpath='{.data.ca\.crt}' | base64 -d > hub-ca.crt +``` + +On the **member cluster**: + +```bash +# 1. Create namespace +kubectl create namespace fleet-system + +# 2. Create token secret +kubectl create secret generic hub-token \ + --from-file=token=hub-token.txt \ + -n fleet-system + +# 3. (Optional) Create CA secret +kubectl create secret generic hub-ca \ + --from-file=ca.crt=hub-ca.crt \ + -n fleet-system + +# 4. Install the chart +helm install metric-collector ./charts/metric-collector \ + --namespace fleet-system \ + --set memberCluster.name=cluster-1 \ + --set hubCluster.url=https://hub-cluster:6443 \ + --set hubCluster.tls.caSecretName=hub-ca \ + --set prometheus.url=http://prometheus:9090 +``` + +## Verification + +On the **hub cluster**: + +```bash +# Check if RBAC is created +kubectl get clusterrole metric-collector-hub-access +kubectl get clusterrolebinding metric-collector-cluster-1 + +# Check if namespace exists for reports +kubectl get namespace fleet-cluster-1 + +# Watch for reports +kubectl get metriccollectorreports -n fleet-cluster-1 --watch +``` + +On the **member cluster**: + +```bash +# Check controller logs +kubectl logs -n fleet-system deployment/metric-collector -f + +# Check for errors +kubectl logs -n fleet-system deployment/metric-collector | grep -i error +``` + +## Troubleshooting + +### Permission Denied Errors + +If you see errors like: +``` +Failed to sync report to hub: ... forbidden: User "system:serviceaccount:fleet-member-cluster-1:metric-collector-sa" cannot create resource "metriccollectorreports" +``` + +**Solution**: Verify RBAC is correctly configured on hub cluster: +```bash +kubectl auth can-i create metriccollectorreports \ + --as=system:serviceaccount:fleet-member-cluster-1:metric-collector-sa \ + -n fleet-cluster-1 \ + --context=hub-cluster +``` + +### Token Expired + +If authentication fails: +``` +Failed to connect to hub: Unauthorized +``` + +**Solution**: Regenerate the token secret on the hub cluster and update the secret on the member cluster. + +### Namespace Not Found + +If reports fail to be created: +``` +Failed to sync report: namespace "fleet-cluster-1" not found +``` + +**Solution**: Ensure the `fleet-{cluster}` namespace exists on the hub cluster. The hub agent typically creates these. + +## Multi-Cluster Setup + +For multiple member clusters, repeat the RBAC setup for each cluster: + +```bash +# For cluster-1 +helm template metric-collector ./charts/metric-collector \ + --set hubCluster.createRBAC=true \ + --set memberCluster.name=cluster-1 \ + --show-only templates/hub-rbac.yaml | \ + kubectl apply -f - --context=hub-cluster + +# For cluster-2 +helm template metric-collector ./charts/metric-collector \ + --set hubCluster.createRBAC=true \ + --set memberCluster.name=cluster-2 \ + --show-only templates/hub-rbac.yaml | \ + kubectl apply -f - --context=hub-cluster +``` + +Each member cluster will have its own ClusterRoleBinding with a unique ServiceAccount. diff --git a/standalone-metric-collector/Makefile b/standalone-metric-collector/Makefile new file mode 100644 index 000000000..b7025d5f4 --- /dev/null +++ b/standalone-metric-collector/Makefile @@ -0,0 +1,141 @@ +# Image URL to use for building/pushing image targets +REGISTRY ?= ghcr.io/kubefleet-dev +IMAGE_NAME ?= metric-collector +TAG ?= latest +IMG ?= $(REGISTRY)/$(IMAGE_NAME):$(TAG) + +# Go parameters +GOOS ?= $(shell go env GOOS) +GOARCH ?= $(shell go env GOARCH) + +# Directories +ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +TOOLS_DIR := hack/tools +TOOLS_BIN_DIR := $(abspath $(TOOLS_DIR)/bin) + +# Binaries +CONTROLLER_GEN_VER := v0.16.0 +CONTROLLER_GEN_BIN := controller-gen +CONTROLLER_GEN := $(abspath $(TOOLS_BIN_DIR)/$(CONTROLLER_GEN_BIN)-$(CONTROLLER_GEN_VER)) + +GOIMPORTS_VER := latest +GOIMPORTS_BIN := goimports +GOIMPORTS := $(abspath $(TOOLS_BIN_DIR)/$(GOIMPORTS_BIN)-$(GOIMPORTS_VER)) + +# Scripts +GO_INSTALL := ../hack/go-install.sh + +# CRD Options +CRD_OPTIONS ?= "crd" + +##@ General + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Tooling + +$(CONTROLLER_GEN): + GOBIN=$(TOOLS_BIN_DIR) $(GO_INSTALL) sigs.k8s.io/controller-tools/cmd/controller-gen $(CONTROLLER_GEN_BIN) $(CONTROLLER_GEN_VER) + +$(GOIMPORTS): + GOBIN=$(TOOLS_BIN_DIR) $(GO_INSTALL) golang.org/x/tools/cmd/goimports $(GOIMPORTS_BIN) $(GOIMPORTS_VER) + +##@ Development + +.PHONY: manifests +manifests: $(CONTROLLER_GEN) ## Generate CRD manifests. + $(CONTROLLER_GEN) \ + $(CRD_OPTIONS) paths="./apis/..." output:crd:artifacts:config=config/crd/bases + @echo "Copying CRDs to helm chart..." + @cp config/crd/bases/*.yaml charts/metric-collector/templates/crds/ + +.PHONY: generate +generate: $(CONTROLLER_GEN) ## Generate code (DeepCopy, etc). + $(CONTROLLER_GEN) \ + object:headerFile="../hack/boilerplate.go.txt" paths="./..." + +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: imports +imports: $(GOIMPORTS) ## Organize imports. + $(GOIMPORTS) -local github.com/kubefleet-dev/standalone-metric-collector -w . + +.PHONY: test +test: fmt vet ## Run tests. + go test ./... -coverprofile cover.out + +##@ Build + +.PHONY: build +build: fmt vet ## Build metric-collector binary. + CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build -o bin/metric-collector ./cmd/metriccollector + +.PHONY: run +run: fmt vet ## Run controller from your host. + go run ./cmd/metriccollector/main.go + +.PHONY: docker-build +docker-build: ## Build docker image. + docker build -t ${IMG} -f docker/Dockerfile . + +.PHONY: docker-push +docker-push: ## Push docker image. + docker push ${IMG} + +.PHONY: docker-build-push +docker-build-push: docker-build docker-push ## Build and push docker image. + +##@ Deployment + +.PHONY: helm-lint +helm-lint: ## Lint helm chart. + helm lint charts/metric-collector + +.PHONY: helm-template +helm-template: ## Template helm chart. + helm template metric-collector charts/metric-collector \ + --set memberCluster.name=cluster-1 \ + --set hubCluster.url=https://hub-cluster:6443 \ + --set prometheus.url=http://prometheus.test-ns:9090 + +.PHONY: helm-package +helm-package: helm-lint ## Package helm chart. + helm package charts/metric-collector -d dist/ + +.PHONY: helm-install +helm-install: ## Install helm chart. + helm upgrade --install metric-collector charts/metric-collector \ + --namespace fleet-system --create-namespace \ + --set memberCluster.name=$(CLUSTER_NAME) \ + --set hubCluster.url=$(HUB_URL) \ + --set prometheus.url=$(PROMETHEUS_URL) \ + --set image.repository=$(REGISTRY)/$(IMAGE_NAME) \ + --set image.tag=$(TAG) + +.PHONY: helm-uninstall +helm-uninstall: ## Uninstall helm chart. + helm uninstall metric-collector --namespace fleet-system + +##@ CRD + +.PHONY: install-crds +install-crds: ## Install CRDs into the K8s cluster. + kubectl apply -f config/crd/bases/ + +.PHONY: uninstall-crds +uninstall-crds: ## Uninstall CRDs from the K8s cluster. + kubectl delete -f config/crd/bases/ + +##@ Cleanup + +.PHONY: clean +clean: ## Clean build artifacts. + rm -rf bin/ dist/ cover.out diff --git a/standalone-metric-collector/QUICKSTART.md b/standalone-metric-collector/QUICKSTART.md new file mode 100644 index 000000000..6560f8efb --- /dev/null +++ b/standalone-metric-collector/QUICKSTART.md @@ -0,0 +1,259 @@ +# Quick Start Guide + +This guide will help you quickly install and configure the MetricCollector on a member cluster. + +## Prerequisites + +- Kubernetes member cluster (v1.24+) +- Access to a hub cluster +- Prometheus running on the member cluster +- Helm 3.x + +## Installation Steps + +### Step 1: Setup Hub Cluster RBAC + +See [HUB_SETUP.md](HUB_SETUP.md) for detailed instructions. + +Quick version: + +```bash +# On hub cluster +kubectl create namespace fleet-member-cluster-1 +kubectl create serviceaccount metric-collector-sa -n fleet-member-cluster-1 + +# Apply RBAC +helm template metric-collector ./charts/metric-collector \ + --set hubCluster.createRBAC=true \ + --set memberCluster.name=cluster-1 \ + --show-only templates/hub-rbac.yaml | \ + kubectl apply -f - --context=hub-cluster + +# Create token +kubectl apply -f - < hub-token.txt +``` + +### Step 2: Install MetricCollector CRDs + +```bash +# On member cluster +kubectl apply -f config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml +kubectl apply -f config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml +``` + +### Step 3: Create Hub Token Secret + +```bash +# On member cluster +kubectl create namespace fleet-system + +kubectl create secret generic hub-token \ + --from-file=token=hub-token.txt \ + -n fleet-system +``` + +### Step 4: Install Helm Chart + +```bash +# On member cluster +helm install metric-collector ./charts/metric-collector \ + --namespace fleet-system \ + --set memberCluster.name=cluster-1 \ + --set hubCluster.url=https://hub-api-server:6443 \ + --set prometheus.url=http://prometheus.test-ns:9090 +``` + +### Step 5: Verify Installation + +```bash +# Check pod is running +kubectl get pods -n fleet-system + +# Check logs +kubectl logs -n fleet-system deployment/metric-collector + +# On hub cluster, check for reports +kubectl get metriccollectorreports -n fleet-cluster-1 +``` + +## Example: Complete Setup + +Here's a complete example with real values: + +```bash +# === On Hub Cluster === +export MEMBER_CLUSTER_NAME="prod-us-east-1" +export HUB_NAMESPACE="fleet-member-${MEMBER_CLUSTER_NAME}" + +# Create namespace and SA +kubectl create namespace ${HUB_NAMESPACE} +kubectl create serviceaccount metric-collector-sa -n ${HUB_NAMESPACE} + +# Apply RBAC +cat < /tmp/hub-token.txt + +echo "Hub token saved to /tmp/hub-token.txt" + +# Get hub API server URL +export HUB_URL=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}') +echo "Hub URL: ${HUB_URL}" + +# === On Member Cluster === + +# Switch to member cluster context +kubectl config use-context prod-us-east-1 + +# Create namespace +kubectl create namespace fleet-system + +# Create token secret +kubectl create secret generic hub-token \ + --from-file=token=/tmp/hub-token.txt \ + -n fleet-system + +# Install CRDs +kubectl apply -f config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml +kubectl apply -f config/crd/bases/placement.kubernetes-fleet.io_metriccollectorreports.yaml + +# Install chart +helm install metric-collector ./charts/metric-collector \ + --namespace fleet-system \ + --set memberCluster.name=${MEMBER_CLUSTER_NAME} \ + --set hubCluster.url=${HUB_URL} \ + --set prometheus.url=http://prometheus.monitoring:9090 \ + --set image.tag=v0.1.0 \ + --set resources.limits.memory=256Mi \ + --set resources.requests.memory=128Mi + +# Verify +kubectl get pods -n fleet-system +kubectl logs -n fleet-system -l app.kubernetes.io/name=metric-collector --tail=50 + +# === Verification on Hub === +kubectl config use-context hub-cluster +kubectl get metriccollectorreports -n fleet-${MEMBER_CLUSTER_NAME} +``` + +## Next Steps + +1. **Create a MetricCollector**: See [examples/metriccollector-sample.yaml](../examples/metrics/metriccollector-sample.yaml) +2. **Setup Prometheus**: Ensure Prometheus has `workload_health` metrics +3. **Monitor Reports**: Watch for `MetricCollectorReport` resources on the hub cluster +4. **Integration**: Use with ApprovalRequest controller for automated health checks + +## Common Configuration + +### Custom Prometheus URL + +```bash +helm upgrade metric-collector ./charts/metric-collector \ + --namespace fleet-system \ + --reuse-values \ + --set prometheus.url=http://prometheus.custom-ns:9090 +``` + +### Use Certificate Auth Instead of Token + +```bash +# Create cert secret +kubectl create secret tls hub-cert \ + --cert=hub-client.crt \ + --key=hub-client.key \ + -n fleet-system + +helm upgrade metric-collector ./charts/metric-collector \ + --namespace fleet-system \ + --reuse-values \ + --set hubCluster.auth.useTokenAuth=false \ + --set hubCluster.auth.useCertificateAuth=true \ + --set hubCluster.auth.certSecretName=hub-cert +``` + +### Adjust Collection Interval + +Edit the MetricCollector resource: + +```yaml +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricCollector +metadata: + name: workload-health-collector +spec: + prometheusURL: http://prometheus:9090 + promQLQuery: workload_health + pollingIntervalSeconds: 60 # Collect every 60 seconds + reportNamespace: fleet-cluster-1 +``` + +## Uninstall + +```bash +# On member cluster +helm uninstall metric-collector -n fleet-system + +# On hub cluster +kubectl delete clusterrolebinding metric-collector-cluster-1 +kubectl delete clusterrole metric-collector-hub-access +kubectl delete namespace fleet-member-cluster-1 +``` + +## Support + +For issues, see [Troubleshooting](README.md#troubleshooting) in the main README. diff --git a/standalone-metric-collector/README.md b/standalone-metric-collector/README.md new file mode 100644 index 000000000..e0783ff53 --- /dev/null +++ b/standalone-metric-collector/README.md @@ -0,0 +1,185 @@ +# Standalone MetricCollector + +This is a standalone implementation of the MetricCollector controller for Kubernetes Fleet. It collects workload health metrics from Prometheus on member clusters and reports them to the hub cluster. + +## Overview + +The MetricCollector controller: +- Runs on member clusters +- Watches `MetricCollector` CRDs on the member cluster +- Queries Prometheus for `workload_health` metrics +- Creates/updates `MetricCollectorReport` resources on the hub cluster in `fleet-{cluster}` namespaces +- Supports both token and certificate-based authentication to the hub cluster + +## Prerequisites + +- Kubernetes cluster (member cluster) +- Access to a hub cluster +- Prometheus running on the member cluster +- Hub cluster credentials (token or certificates) + +## Installation + +### 1. Create Hub Token Secret + +On the **member cluster**, create a secret with the hub cluster token: + +```bash +kubectl create namespace fleet-system + +kubectl create secret generic hub-token \ + --from-literal=token= \ + -n fleet-system +``` + +### 2. Install the Helm Chart + +```bash +helm install metric-collector ./charts/metric-collector \ + --namespace fleet-system \ + --set memberCluster.name=cluster-1 \ + --set hubCluster.url=https://hub-cluster:6443 \ + --set prometheus.url=http://prometheus.test-ns:9090 +``` + +### 3. Verify Installation + +```bash +kubectl get pods -n fleet-system +kubectl logs -n fleet-system deployment/metric-collector +``` + +## Configuration + +Key configuration options in `values.yaml`: + +```yaml +memberCluster: + name: "cluster-1" # Your cluster name + +hubCluster: + url: "https://hub-cluster:6443" # Hub API server URL + auth: + tokenSecretName: "hub-token" # Secret with hub token + +prometheus: + url: "http://prometheus.test-ns:9090" # Prometheus URL +``` + +See [values.yaml](charts/metric-collector/values.yaml) for all options. + +## Hub Cluster Setup + +On the **hub cluster**, you need to create RBAC permissions for the MetricCollector: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metric-collector-hub-access +rules: + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectorreports"] + verbs: ["get", "list", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metric-collector-cluster-1 +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metric-collector-hub-access +subjects: + - kind: ServiceAccount + name: metric-collector-sa # Match your token's SA + namespace: fleet-system +``` + +## Development + +### Build Binary + +```bash +make build +``` + +### Build Docker Image + +```bash +make docker-build IMG=your-registry/metric-collector:tag +``` + +### Run Locally + +```bash +export HUB_SERVER_URL=https://hub-cluster:6443 +export PROMETHEUS_URL=http://localhost:9090 +export CONFIG_PATH=/path/to/hub-token + +make run +``` + +## Architecture + +``` +Member Cluster: + ┌─────────────────────────────────────┐ + │ MetricCollector Controller │ + │ ┌───────────────────────────────┐ │ + │ │ Member Client (in-cluster) │ │ ─── Watches MetricCollector CRDs + │ └───────────────────────────────┘ │ + │ ┌───────────────────────────────┐ │ + │ │ Hub Client (remote) │ │ ─── Creates MetricCollectorReport + │ └───────────────────────────────┘ │ + │ ┌───────────────────────────────┐ │ + │ │ Prometheus Client │ │ ─── Queries metrics + │ └───────────────────────────────┘ │ + └─────────────────────────────────────┘ + │ │ + │ └────────────────┐ + ▼ ▼ + Prometheus (9090) Hub Cluster (6443) + workload_health metrics fleet-{cluster} namespace +``` + +## Troubleshooting + +### Controller not starting + +Check logs: +```bash +kubectl logs -n fleet-system deployment/metric-collector +``` + +Common issues: +- Hub cluster URL incorrect +- Token expired or invalid +- Network connectivity to hub cluster + +### No metrics collected + +1. Verify Prometheus is accessible: +```bash +kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- \ + curl http://prometheus.test-ns:9090/api/v1/query?query=workload_health +``` + +2. Check MetricCollector status: +```bash +kubectl get metriccollectors -A +kubectl describe metriccollector +``` + +### Reports not appearing on hub + +1. Verify hub cluster connectivity +2. Check RBAC permissions on hub cluster +3. Verify `fleet-{cluster}` namespace exists on hub + +## License + +Apache License 2.0 diff --git a/standalone-metric-collector/apis/interface.go b/standalone-metric-collector/apis/interface.go new file mode 100644 index 000000000..0f8ada820 --- /dev/null +++ b/standalone-metric-collector/apis/interface.go @@ -0,0 +1,38 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package apis contains API interfaces for the fleet API group. +package apis + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// A Conditioned may have conditions set or retrieved. Conditions typically +// indicate the status of both a resource and its reconciliation process. +// +kubebuilder:object:generate=false +type Conditioned interface { + SetConditions(...metav1.Condition) + GetCondition(string) *metav1.Condition +} + +// A ConditionedObj is for kubernetes resource with conditions. +// +kubebuilder:object:generate=false +type ConditionedObj interface { + client.Object + Conditioned +} diff --git a/standalone-metric-collector/apis/v1beta1/groupversion_info.go b/standalone-metric-collector/apis/v1beta1/groupversion_info.go new file mode 100644 index 000000000..c488f5dca --- /dev/null +++ b/standalone-metric-collector/apis/v1beta1/groupversion_info.go @@ -0,0 +1,35 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +kubebuilder:object:generate=true +// +groupName=placement.kubernetes-fleet.io +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "placement.kubernetes-fleet.io", Version: "v1beta1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/standalone-metric-collector/apis/v1beta1/metriccollector_types.go b/standalone-metric-collector/apis/v1beta1/metriccollector_types.go new file mode 100644 index 000000000..729a0ff22 --- /dev/null +++ b/standalone-metric-collector/apis/v1beta1/metriccollector_types.go @@ -0,0 +1,151 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubefleet-dev/standalone-metric-collector/apis" +) + +// +genclient +// +genclient:nonNamespaced +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Cluster",shortName=mc,categories={fleet,fleet-metrics} +// +kubebuilder:subresource:status +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:JSONPath=`.metadata.generation`,name="Gen",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="MetricCollectorReady")].status`,name="Ready",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.workloadsMonitored`,name="Workloads",type=integer +// +kubebuilder:printcolumn:JSONPath=`.status.lastCollectionTime`,name="Last-Collection",type=date +// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date + +// MetricCollector is used by member-agent to scrape and collect metrics from workloads +// running on the member cluster. It runs on each member cluster and collects metrics +// from Prometheus-compatible endpoints. +type MetricCollector struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // The desired state of MetricCollector. + // +required + Spec MetricCollectorSpec `json:"spec"` + + // The observed status of MetricCollector. + // +optional + Status MetricCollectorStatus `json:"status,omitempty"` +} + +// MetricCollectorSpec defines the desired state of MetricCollector. +type MetricCollectorSpec struct { + // PrometheusURL is the URL of the Prometheus server. + // Example: http://prometheus.test-ns.svc.cluster.local:9090 + // +required + // +kubebuilder:validation:Pattern=`^https?://.*$` + PrometheusURL string `json:"prometheusUrl"` + + // ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + // This should be the fleet-member-{clusterName} namespace. + // Example: fleet-member-cluster-1 + // +required + ReportNamespace string `json:"reportNamespace"` +} + +// MetricsEndpointSpec defines how to access the metrics endpoint.ctor. +type MetricCollectorStatus struct { + // Conditions is an array of current observed conditions. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration is the generation most recently observed. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // WorkloadsMonitored is the count of workloads being monitored. + // +optional + WorkloadsMonitored int32 `json:"workloadsMonitored,omitempty"` + + // LastCollectionTime is when metrics were last collected. + // +optional + LastCollectionTime *metav1.Time `json:"lastCollectionTime,omitempty"` + + // CollectedMetrics contains the most recent metrics from each workload. + // +optional + CollectedMetrics []WorkloadMetrics `json:"collectedMetrics,omitempty"` +} + +// WorkloadMetrics represents metrics collected from a single workload pod. +type WorkloadMetrics struct { + // Namespace is the namespace of the pod. + // +required + Namespace string `json:"namespace"` + + // ClusterName from the workload_health metric label. + // +required + ClusterName string `json:"clusterName"` + + // WorkloadName from the workload_health metric label (typically the deployment name). + // +required + WorkloadName string `json:"workloadName"` + + // Health indicates if the workload is healthy (true=healthy, false=unhealthy). + // +required + Health bool `json:"health"` +} + +const ( + // MetricCollectorConditionTypeReady indicates the collector is ready. + MetricCollectorConditionTypeReady string = "MetricCollectorReady" + + // MetricCollectorConditionTypeCollecting indicates metrics are being collected. + MetricCollectorConditionTypeCollecting string = "MetricsCollecting" + + // MetricCollectorConditionTypeReported indicates metrics were successfully reported to hub. + MetricCollectorConditionTypeReported string = "MetricsReported" +) + +// +kubebuilder:object:root=true + +// MetricCollectorList contains a list of MetricCollector. +type MetricCollectorList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetricCollector `json:"items"` +} + +// GetConditions returns the conditions of the MetricCollector. +func (m *MetricCollector) GetConditions() []metav1.Condition { + return m.Status.Conditions +} + +// SetConditions sets the conditions of the MetricCollector. +func (m *MetricCollector) SetConditions(conditions ...metav1.Condition) { + m.Status.Conditions = conditions +} + +// GetCondition returns the condition of the given MetricCollector. +func (m *MetricCollector) GetCondition(conditionType string) *metav1.Condition { + return meta.FindStatusCondition(m.Status.Conditions, conditionType) +} + +// Ensure MetricCollector implements the ConditionedObj interface. +var _ apis.ConditionedObj = &MetricCollector{} + +func init() { + SchemeBuilder.Register(&MetricCollector{}, &MetricCollectorList{}) +} diff --git a/standalone-metric-collector/apis/v1beta1/metriccollectorreport_types.go b/standalone-metric-collector/apis/v1beta1/metriccollectorreport_types.go new file mode 100644 index 000000000..209da838a --- /dev/null +++ b/standalone-metric-collector/apis/v1beta1/metriccollectorreport_types.go @@ -0,0 +1,86 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope="Namespaced",shortName=mcr,categories={fleet,fleet-metrics} +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:JSONPath=`.workloadsMonitored`,name="Workloads",type=integer +// +kubebuilder:printcolumn:JSONPath=`.lastCollectionTime`,name="Last-Collection",type=date +// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date + +// MetricCollectorReport is created by the MetricCollector controller on the hub cluster +// in the fleet-member-{clusterName} namespace to report collected metrics from a member cluster. +// The controller watches MetricCollector objects on the member cluster, collects metrics, +// and syncs the status to the hub as MetricCollectorReport objects. +// +// Controller workflow: +// 1. MetricCollector reconciles and collects metrics on member cluster +// 2. Metrics include clusterName from workload_health labels +// 3. Controller creates/updates MetricCollectorReport in fleet-member-{clusterName} namespace on hub +// 4. Report name matches MetricCollector name for easy lookup +// +// Namespace: fleet-member-{clusterName} (extracted from CollectedMetrics[0].ClusterName) +// Name: Same as MetricCollector name +// All metrics in CollectedMetrics are guaranteed to have the same ClusterName. +type MetricCollectorReport struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Conditions copied from the MetricCollector status. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration is the generation most recently observed from the MetricCollector. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // WorkloadsMonitored is the count of workloads being monitored. + // +optional + WorkloadsMonitored int32 `json:"workloadsMonitored,omitempty"` + + // LastCollectionTime is when metrics were last collected on the member cluster. + // +optional + LastCollectionTime *metav1.Time `json:"lastCollectionTime,omitempty"` + + // CollectedMetrics contains the most recent metrics from each workload. + // All metrics are guaranteed to have the same ClusterName since they're collected from one member cluster. + // +optional + CollectedMetrics []WorkloadMetrics `json:"collectedMetrics,omitempty"` + + // LastReportTime is when this report was last synced to the hub. + // +optional + LastReportTime *metav1.Time `json:"lastReportTime,omitempty"` +} + +// +kubebuilder:object:root=true + +// MetricCollectorReportList contains a list of MetricCollectorReport. +type MetricCollectorReportList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetricCollectorReport `json:"items"` +} + +func init() { + SchemeBuilder.Register(&MetricCollectorReport{}, &MetricCollectorReportList{}) +} diff --git a/standalone-metric-collector/apis/v1beta1/zz_generated.deepcopy.go b/standalone-metric-collector/apis/v1beta1/zz_generated.deepcopy.go new file mode 100644 index 000000000..ed9591cbb --- /dev/null +++ b/standalone-metric-collector/apis/v1beta1/zz_generated.deepcopy.go @@ -0,0 +1,223 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollector) DeepCopyInto(out *MetricCollector) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollector. +func (in *MetricCollector) DeepCopy() *MetricCollector { + if in == nil { + return nil + } + out := new(MetricCollector) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollector) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorList) DeepCopyInto(out *MetricCollectorList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetricCollector, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorList. +func (in *MetricCollectorList) DeepCopy() *MetricCollectorList { + if in == nil { + return nil + } + out := new(MetricCollectorList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorReport) DeepCopyInto(out *MetricCollectorReport) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastCollectionTime != nil { + in, out := &in.LastCollectionTime, &out.LastCollectionTime + *out = (*in).DeepCopy() + } + if in.CollectedMetrics != nil { + in, out := &in.CollectedMetrics, &out.CollectedMetrics + *out = make([]WorkloadMetrics, len(*in)) + copy(*out, *in) + } + if in.LastReportTime != nil { + in, out := &in.LastReportTime, &out.LastReportTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorReport. +func (in *MetricCollectorReport) DeepCopy() *MetricCollectorReport { + if in == nil { + return nil + } + out := new(MetricCollectorReport) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorReport) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorReportList) DeepCopyInto(out *MetricCollectorReportList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetricCollectorReport, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorReportList. +func (in *MetricCollectorReportList) DeepCopy() *MetricCollectorReportList { + if in == nil { + return nil + } + out := new(MetricCollectorReportList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetricCollectorReportList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorSpec) DeepCopyInto(out *MetricCollectorSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorSpec. +func (in *MetricCollectorSpec) DeepCopy() *MetricCollectorSpec { + if in == nil { + return nil + } + out := new(MetricCollectorSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricCollectorStatus) DeepCopyInto(out *MetricCollectorStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastCollectionTime != nil { + in, out := &in.LastCollectionTime, &out.LastCollectionTime + *out = (*in).DeepCopy() + } + if in.CollectedMetrics != nil { + in, out := &in.CollectedMetrics, &out.CollectedMetrics + *out = make([]WorkloadMetrics, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricCollectorStatus. +func (in *MetricCollectorStatus) DeepCopy() *MetricCollectorStatus { + if in == nil { + return nil + } + out := new(MetricCollectorStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkloadMetrics) DeepCopyInto(out *WorkloadMetrics) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadMetrics. +func (in *WorkloadMetrics) DeepCopy() *WorkloadMetrics { + if in == nil { + return nil + } + out := new(WorkloadMetrics) + in.DeepCopyInto(out) + return out +} diff --git a/standalone-metric-collector/charts/metric-collector/Chart.yaml b/standalone-metric-collector/charts/metric-collector/Chart.yaml new file mode 100644 index 000000000..2ea221ded --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/Chart.yaml @@ -0,0 +1,16 @@ +apiVersion: v2 +name: metric-collector +description: MetricCollector for Kubernetes Fleet - Collects workload health metrics and reports to hub cluster +type: application +version: 0.1.0 +appVersion: "latest" +keywords: + - kubernetes + - fleet + - metrics + - monitoring +maintainers: + - name: KubeFleet Team +home: https://github.com/kubefleet-dev/kubefleet +sources: + - https://github.com/kubefleet-dev/kubefleet/tree/main/standalone-metric-collector diff --git a/standalone-metric-collector/charts/metric-collector/templates/_helpers.tpl b/standalone-metric-collector/charts/metric-collector/templates/_helpers.tpl new file mode 100644 index 000000000..653f3de24 --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/templates/_helpers.tpl @@ -0,0 +1,60 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "metric-collector.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "metric-collector.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "metric-collector.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "metric-collector.labels" -}} +helm.sh/chart: {{ include "metric-collector.chart" . }} +{{ include "metric-collector.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "metric-collector.selectorLabels" -}} +app.kubernetes.io/name: {{ include "metric-collector.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "metric-collector.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "metric-collector.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/standalone-metric-collector/charts/metric-collector/templates/crds/placement.kubernetes-fleet.io_metriccollectors.yaml b/standalone-metric-collector/charts/metric-collector/templates/crds/placement.kubernetes-fleet.io_metriccollectors.yaml new file mode 100644 index 000000000..47679a15f --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/templates/crds/placement.kubernetes-fleet.io_metriccollectors.yaml @@ -0,0 +1,189 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectors.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollector + listKind: MetricCollectorList + plural: metriccollectors + shortNames: + - mc + singular: metriccollector + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.generation + name: Gen + type: string + - jsonPath: .status.conditions[?(@.type=="MetricCollectorReady")].status + name: Ready + type: string + - jsonPath: .status.workloadsMonitored + name: Workloads + type: integer + - jsonPath: .status.lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollector is used by member-agent to scrape and collect metrics from workloads + running on the member cluster. It runs on each member cluster and collects metrics + from Prometheus-compatible endpoints. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: The desired state of MetricCollector. + properties: + prometheusUrl: + description: |- + PrometheusURL is the URL of the Prometheus server. + Example: http://prometheus.test-ns.svc.cluster.local:9090 + pattern: ^https?://.*$ + type: string + reportNamespace: + description: |- + ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + This should be the fleet-member-{clusterName} namespace. + Example: fleet-member-cluster-1 + type: string + required: + - prometheusUrl + - reportNamespace + type: object + status: + description: The observed status of MetricCollector. + properties: + collectedMetrics: + description: CollectedMetrics contains the most recent metrics from + each workload. + items: + description: WorkloadMetrics represents metrics collected from a + single workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions is an array of current observed conditions. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected. + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the generation most recently observed. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/standalone-metric-collector/charts/metric-collector/templates/deployment.yaml b/standalone-metric-collector/charts/metric-collector/templates/deployment.yaml new file mode 100644 index 000000000..3e22a23d1 --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/templates/deployment.yaml @@ -0,0 +1,155 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "metric-collector.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "metric-collector.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.controller.replicas }} + selector: + matchLabels: + {{- include "metric-collector.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "metric-collector.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "metric-collector.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: controller + securityContext: + {{- toYaml .Values.securityContext | nindent 10 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - /metric-collector + args: + - --v={{ .Values.controller.logLevel }} + - --member-qps=100 + - --member-burst=200 + - --hub-qps=100 + - --hub-burst=200 + - --metrics-bind-address=:{{ .Values.metrics.port }} + - --health-probe-bind-address=:{{ .Values.healthProbe.port }} + env: + # Hub cluster connection + - name: HUB_SERVER_URL + value: {{ .Values.hubCluster.url | quote }} + + # Prometheus URL + - name: PROMETHEUS_URL + value: {{ .Values.prometheus.url | quote }} + + {{- if .Values.hubCluster.customHeader }} + - name: HUB_KUBE_HEADER + value: {{ .Values.hubCluster.customHeader | quote }} + {{- end }} + + {{- if .Values.hubCluster.auth.useCertificateAuth }} + # Certificate-based authentication + - name: IDENTITY_CERT + value: /etc/hub-certs/{{ .Values.hubCluster.auth.certSecretKey }} + - name: IDENTITY_KEY + value: /etc/hub-certs/{{ .Values.hubCluster.auth.keySecretKey }} + {{- else }} + # Token-based authentication + - name: CONFIG_PATH + value: /var/run/secrets/hub/{{ .Values.hubCluster.auth.tokenSecretKey }} + {{- end }} + + {{- if .Values.hubCluster.tls.insecure }} + - name: TLS_INSECURE + value: "true" + {{- else if .Values.hubCluster.tls.caSecretName }} + - name: HUB_CERTIFICATE_AUTHORITY + value: /etc/hub-ca/{{ .Values.hubCluster.tls.caSecretKey }} + {{- end }} + + volumeMounts: + {{- if .Values.hubCluster.auth.useCertificateAuth }} + - name: hub-certs + mountPath: /etc/hub-certs + readOnly: true + {{- else }} + - name: hub-token + mountPath: /var/run/secrets/hub + readOnly: true + {{- end }} + + {{- if and (not .Values.hubCluster.tls.insecure) .Values.hubCluster.tls.caSecretName }} + - name: hub-ca + mountPath: /etc/hub-ca + readOnly: true + {{- end }} + + ports: + {{- if .Values.metrics.enabled }} + - name: metrics + containerPort: {{ .Values.metrics.port }} + protocol: TCP + {{- end }} + {{- if .Values.healthProbe.enabled }} + - name: health + containerPort: {{ .Values.healthProbe.port }} + protocol: TCP + {{- end }} + + {{- if .Values.healthProbe.enabled }} + livenessProbe: + httpGet: + path: /healthz + port: health + initialDelaySeconds: 15 + periodSeconds: 20 + + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 5 + periodSeconds: 10 + {{- end }} + + resources: + {{- toYaml .Values.controller.resources | nindent 10 }} + + volumes: + {{- if .Values.hubCluster.auth.useCertificateAuth }} + - name: hub-certs + secret: + secretName: {{ .Values.hubCluster.auth.certSecretName }} + {{- else }} + - name: hub-token + secret: + secretName: {{ .Values.hubCluster.auth.tokenSecretName }} + {{- end }} + + {{- if and (not .Values.hubCluster.tls.insecure) .Values.hubCluster.tls.caSecretName }} + - name: hub-ca + secret: + secretName: {{ .Values.hubCluster.tls.caSecretName }} + {{- end }} + + {{- with .Values.controller.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/standalone-metric-collector/charts/metric-collector/templates/hub-rbac.yaml b/standalone-metric-collector/charts/metric-collector/templates/hub-rbac.yaml new file mode 100644 index 000000000..f7089c38f --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/templates/hub-rbac.yaml @@ -0,0 +1,49 @@ +{{- if .Values.hubCluster.createRBAC }} +# This template generates RBAC resources for the hub cluster +# Apply this on the HUB cluster to grant the metric-collector permissions +# to create/update MetricCollectorReport resources +# +# Usage: +# helm template metric-collector ./charts/metric-collector \ +# --set hubCluster.createRBAC=true \ +# --show-only templates/hub-rbac.yaml | kubectl apply -f - --context=hub-cluster +# +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "metric-collector.fullname" . }}-hub-access + labels: + {{- include "metric-collector.labels" . | nindent 4 }} + app.kubernetes.io/component: hub-rbac + annotations: + helm.sh/resource-policy: keep +rules: + # MetricCollectorReport access + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectorreports"] + verbs: ["get", "list", "create", "update", "patch", "delete"] + # Namespace access for fleet-{cluster} namespaces + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "metric-collector.fullname" . }}-{{ .Values.memberCluster.name }} + labels: + {{- include "metric-collector.labels" . | nindent 4 }} + app.kubernetes.io/component: hub-rbac + fleet.kubernetes.io/member-cluster: {{ .Values.memberCluster.name }} + annotations: + helm.sh/resource-policy: keep +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "metric-collector.fullname" . }}-hub-access +subjects: + - kind: ServiceAccount + name: {{ .Values.hubCluster.auth.serviceAccountName | default (include "metric-collector.serviceAccountName" .) }} + namespace: {{ .Values.hubCluster.auth.serviceAccountNamespace | default .Release.Namespace }} +{{- end }} diff --git a/standalone-metric-collector/charts/metric-collector/templates/rbac-member.yaml b/standalone-metric-collector/charts/metric-collector/templates/rbac-member.yaml new file mode 100644 index 000000000..bad037606 --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/templates/rbac-member.yaml @@ -0,0 +1,44 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "metric-collector.fullname" . }} + labels: + {{- include "metric-collector.labels" . | nindent 4 }} +rules: + # MetricCollector CRD access on member cluster + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectors"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectors/status"] + verbs: ["update", "patch"] + - apiGroups: ["placement.kubernetes-fleet.io"] + resources: ["metriccollectors/finalizers"] + verbs: ["update"] + + # Events + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + + # Leader election + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "create", "update", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "metric-collector.fullname" . }} + labels: + {{- include "metric-collector.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "metric-collector.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "metric-collector.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/standalone-metric-collector/charts/metric-collector/templates/serviceaccount.yaml b/standalone-metric-collector/charts/metric-collector/templates/serviceaccount.yaml new file mode 100644 index 000000000..b5d081dab --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "metric-collector.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "metric-collector.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/standalone-metric-collector/charts/metric-collector/values.yaml b/standalone-metric-collector/charts/metric-collector/values.yaml new file mode 100644 index 000000000..d3d7f70cd --- /dev/null +++ b/standalone-metric-collector/charts/metric-collector/values.yaml @@ -0,0 +1,134 @@ +# Default values for metric-collector +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Controller image configuration +image: + repository: metric-collector + pullPolicy: IfNotPresent + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +# Member cluster configuration +memberCluster: + # Name of the member cluster (required) + # This should match the cluster name in the fleet + name: "" + +# Hub cluster connection configuration +hubCluster: + # Hub API server URL (required) + # Example: https://hub-cluster.example.com:6443 + url: "" + + # Set to true to generate hub RBAC resources + # These resources must be applied on the hub cluster + createRBAC: false + + # Authentication configuration + auth: + # Token-based authentication (default) + useTokenAuth: true + tokenSecretName: "hub-token" + tokenSecretKey: "token" + + # Certificate-based authentication + useCertificateAuth: false + certSecretName: "" + certSecretKey: "tls.crt" + keySecretKey: "tls.key" + + # ServiceAccount details for RBAC binding on hub cluster + # Leave empty to use the default serviceAccount from this chart + serviceAccountName: "" + serviceAccountNamespace: "" + + # TLS configuration + tls: + # Skip TLS verification (not recommended for production) + insecure: false + # CA certificate for hub cluster + caSecretName: "" + caSecretKey: "ca.crt" + + # Custom header for hub requests (optional) + customHeader: "" + +# Prometheus configuration +prometheus: + # Prometheus URL (required) + # Example: http://prometheus.monitoring.svc.cluster.local:9090 + url: "" + +# Controller configuration +controller: + # Number of replicas + replicas: 1 + + # Collection interval (how often to scrape metrics) + collectionInterval: "30s" + + # Log verbosity level (0-10) + logLevel: 2 + + # Resource requests and limits + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + + # Node selector + nodeSelector: {} + + # Tolerations + tolerations: [] + + # Affinity + affinity: {} + +# RBAC configuration +rbac: + create: true + +# ServiceAccount configuration +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# Pod annotations +podAnnotations: {} + +# Pod security context +podSecurityContext: + runAsNonRoot: true + runAsUser: 65532 + fsGroup: 65532 + +# Container security context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + +# Metrics server configuration +metrics: + enabled: true + port: 8080 + +# Health probe configuration +healthProbe: + enabled: true + port: 8081 diff --git a/standalone-metric-collector/cmd/metriccollector/main.go b/standalone-metric-collector/cmd/metriccollector/main.go new file mode 100644 index 000000000..06f9485bc --- /dev/null +++ b/standalone-metric-collector/cmd/metriccollector/main.go @@ -0,0 +1,244 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "flag" + "fmt" + "net/http" + "os" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/healthz" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + placementv1beta1 "github.com/kubefleet-dev/standalone-metric-collector/apis/v1beta1" + metriccollector "github.com/kubefleet-dev/standalone-metric-collector/pkg/controller" +) + +var ( + memberQPS = flag.Int("member-qps", 100, "QPS for member cluster client") + memberBurst = flag.Int("member-burst", 200, "Burst for member cluster client") + hubQPS = flag.Int("hub-qps", 100, "QPS for hub cluster client") + hubBurst = flag.Int("hub-burst", 200, "Burst for hub cluster client") + metricsAddr = flag.String("metrics-bind-address", ":8080", "The address the metric endpoint binds to.") + probeAddr = flag.String("health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + leaderElectionID = flag.String("leader-election-id", "metric-collector-leader", "The leader election ID.") + enableLeaderElect = flag.Bool("leader-elect", true, "Enable leader election for controller manager.") +) + +func main() { + klog.InitFlags(nil) + flag.Parse() + + klog.InfoS("Starting MetricCollector Controller") + + // Get member cluster config (in-cluster) + memberConfig := ctrl.GetConfigOrDie() + memberConfig.QPS = float32(*memberQPS) + memberConfig.Burst = *memberBurst + + // Build hub cluster config + hubConfig, err := buildHubConfig() + if err != nil { + klog.ErrorS(err, "Failed to build hub cluster config") + os.Exit(1) + } + hubConfig.QPS = float32(*hubQPS) + hubConfig.Burst = *hubBurst + + // Start controller with both clients + if err := Start(ctrl.SetupSignalHandler(), hubConfig, memberConfig); err != nil { + klog.ErrorS(err, "Failed to start controller") + os.Exit(1) + } +} + +// buildHubConfig creates hub cluster config from environment variables +// following the same pattern as member-agent +func buildHubConfig() (*rest.Config, error) { + hubURL := os.Getenv("HUB_SERVER_URL") + if hubURL == "" { + return nil, fmt.Errorf("HUB_SERVER_URL environment variable not set") + } + + // Check for custom headers + customHeader := os.Getenv("HUB_KUBE_HEADER") + + // Check TLS insecure flag + tlsInsecure := os.Getenv("TLS_INSECURE") == "true" + + // Initialize hub config + hubConfig := &rest.Config{ + Host: hubURL, + TLSClientConfig: rest.TLSClientConfig{ + Insecure: tlsInsecure, + }, + WrapTransport: func(rt http.RoundTripper) http.RoundTripper { + if customHeader != "" { + return &customHeaderTransport{ + Base: rt, + Header: customHeader, + } + } + return rt + }, + } + + // Check for certificate-based authentication + identityKey := os.Getenv("IDENTITY_KEY") + identityCert := os.Getenv("IDENTITY_CERT") + if identityKey != "" && identityCert != "" { + klog.InfoS("Using certificate-based authentication for hub cluster") + // Read certificate files + certData, err := os.ReadFile(identityCert) + if err != nil { + return nil, fmt.Errorf("failed to read identity cert: %w", err) + } + keyData, err := os.ReadFile(identityKey) + if err != nil { + return nil, fmt.Errorf("failed to read identity key: %w", err) + } + hubConfig.CertData = certData + hubConfig.KeyData = keyData + } else { + // Token-based authentication + klog.InfoS("Using token-based authentication for hub cluster") + configPath := os.Getenv("CONFIG_PATH") + if configPath == "" { + configPath = "/var/run/secrets/hub/token" + } + tokenData, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("failed to read hub token from %s: %w", configPath, err) + } + hubConfig.BearerToken = string(tokenData) + } + + // Handle CA certificate + caBundle := os.Getenv("CA_BUNDLE") + hubCA := os.Getenv("HUB_CERTIFICATE_AUTHORITY") + if caBundle != "" { + klog.InfoS("Using CA bundle for hub cluster TLS") + caData, err := os.ReadFile(caBundle) + if err != nil { + return nil, fmt.Errorf("failed to read CA bundle: %w", err) + } + hubConfig.CAData = caData + } else if hubCA != "" { + klog.InfoS("Using hub certificate authority for hub cluster TLS") + caData, err := os.ReadFile(hubCA) + if err != nil { + return nil, fmt.Errorf("failed to read hub CA: %w", err) + } + hubConfig.CAData = caData + } else { + // If no CA specified, try to load system CA pool + klog.InfoS("No CA specified, using insecure connection or system CA pool") + } + + return hubConfig, nil +} + +// customHeaderTransport adds custom headers to requests +type customHeaderTransport struct { + Base http.RoundTripper + Header string +} + +func (t *customHeaderTransport) RoundTrip(req *http.Request) (*http.Response, error) { + req.Header.Add("X-Custom-Header", t.Header) + return t.Base.RoundTrip(req) +} + +// Start starts the controller with dual managers for hub and member clusters +func Start(ctx context.Context, hubCfg, memberCfg *rest.Config) error { + // Create scheme with required APIs + scheme := runtime.NewScheme() + if err := placementv1beta1.AddToScheme(scheme); err != nil { + return fmt.Errorf("failed to add placement API to scheme: %w", err) + } + if err := corev1.AddToScheme(scheme); err != nil { + return fmt.Errorf("failed to add core API to scheme: %w", err) + } + + // Create member cluster manager (where controller runs and watches MetricCollector) + memberMgr, err := ctrl.NewManager(memberCfg, ctrl.Options{ + Scheme: scheme, + Metrics: metricsserver.Options{ + BindAddress: *metricsAddr, + }, + HealthProbeBindAddress: *probeAddr, + LeaderElection: *enableLeaderElect, + LeaderElectionID: *leaderElectionID, + }) + if err != nil { + return fmt.Errorf("failed to create member manager: %w", err) + } + + // Create hub cluster client (for writing MetricCollectorReports) + hubClient, err := client.New(hubCfg, client.Options{Scheme: scheme}) + if err != nil { + return fmt.Errorf("failed to create hub client: %w", err) + } + + // Get Prometheus URL from environment + prometheusURL := os.Getenv("PROMETHEUS_URL") + if prometheusURL == "" { + prometheusURL = "http://prometheus.fleet-system.svc.cluster.local:9090" + klog.InfoS("PROMETHEUS_URL not set, using default", "url", prometheusURL) + } + + // Create Prometheus client + prometheusClient := metriccollector.NewPrometheusClient(prometheusURL, "", nil) + + // Setup MetricCollector controller + if err := (&metriccollector.Reconciler{ + MemberClient: memberMgr.GetClient(), + HubClient: hubClient, + PrometheusClient: prometheusClient, + }).SetupWithManager(memberMgr); err != nil { + return fmt.Errorf("failed to setup MetricCollector controller: %w", err) + } + + // Add health checks + if err := memberMgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + return fmt.Errorf("failed to add healthz check: %w", err) + } + if err := memberMgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + return fmt.Errorf("failed to add readyz check: %w", err) + } + + klog.InfoS("Starting MetricCollector controller", + "hubUrl", hubCfg.Host, + "prometheusUrl", prometheusURL, + "metricsAddr", *metricsAddr, + "probeAddr", *probeAddr) + + // Start the manager + if err := memberMgr.Start(ctx); err != nil { + return fmt.Errorf("failed to start manager: %w", err) + } + + return nil +} diff --git a/standalone-metric-collector/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml b/standalone-metric-collector/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml new file mode 100644 index 000000000..47679a15f --- /dev/null +++ b/standalone-metric-collector/config/crd/bases/placement.kubernetes-fleet.io_metriccollectors.yaml @@ -0,0 +1,189 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.0 + name: metriccollectors.placement.kubernetes-fleet.io +spec: + group: placement.kubernetes-fleet.io + names: + categories: + - fleet + - fleet-metrics + kind: MetricCollector + listKind: MetricCollectorList + plural: metriccollectors + shortNames: + - mc + singular: metriccollector + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.generation + name: Gen + type: string + - jsonPath: .status.conditions[?(@.type=="MetricCollectorReady")].status + name: Ready + type: string + - jsonPath: .status.workloadsMonitored + name: Workloads + type: integer + - jsonPath: .status.lastCollectionTime + name: Last-Collection + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MetricCollector is used by member-agent to scrape and collect metrics from workloads + running on the member cluster. It runs on each member cluster and collects metrics + from Prometheus-compatible endpoints. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: The desired state of MetricCollector. + properties: + prometheusUrl: + description: |- + PrometheusURL is the URL of the Prometheus server. + Example: http://prometheus.test-ns.svc.cluster.local:9090 + pattern: ^https?://.*$ + type: string + reportNamespace: + description: |- + ReportNamespace is the namespace in the hub cluster where the MetricCollectorReport will be created. + This should be the fleet-member-{clusterName} namespace. + Example: fleet-member-cluster-1 + type: string + required: + - prometheusUrl + - reportNamespace + type: object + status: + description: The observed status of MetricCollector. + properties: + collectedMetrics: + description: CollectedMetrics contains the most recent metrics from + each workload. + items: + description: WorkloadMetrics represents metrics collected from a + single workload pod. + properties: + clusterName: + description: ClusterName from the workload_health metric label. + type: string + health: + description: Health indicates if the workload is healthy (true=healthy, + false=unhealthy). + type: boolean + namespace: + description: Namespace is the namespace of the pod. + type: string + workloadName: + description: WorkloadName from the workload_health metric label + (typically the deployment name). + type: string + required: + - clusterName + - health + - namespace + - workloadName + type: object + type: array + conditions: + description: Conditions is an array of current observed conditions. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCollectionTime: + description: LastCollectionTime is when metrics were last collected. + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the generation most recently observed. + format: int64 + type: integer + workloadsMonitored: + description: WorkloadsMonitored is the count of workloads being monitored. + format: int32 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/standalone-metric-collector/docker/Dockerfile b/standalone-metric-collector/docker/Dockerfile new file mode 100644 index 000000000..e9780f3ca --- /dev/null +++ b/standalone-metric-collector/docker/Dockerfile @@ -0,0 +1,23 @@ +FROM golang:1.24 AS builder +WORKDIR /workspace + +# Copy go mod files +COPY go.mod go.sum* ./ +RUN go mod download + +# Copy source code +COPY apis/ apis/ +COPY cmd/ cmd/ +COPY pkg/ pkg/ + +# Build +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -a -o metric-collector \ + ./cmd/metriccollector + +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/metric-collector . +USER 65532:65532 + +ENTRYPOINT ["/metric-collector"] diff --git a/standalone-metric-collector/examples/metriccollector-example.yaml b/standalone-metric-collector/examples/metriccollector-example.yaml new file mode 100644 index 000000000..1b7d15490 --- /dev/null +++ b/standalone-metric-collector/examples/metriccollector-example.yaml @@ -0,0 +1,8 @@ +apiVersion: placement.kubernetes-fleet.io/v1beta1 +kind: MetricCollector +metadata: + name: mc-example-run-staging +spec: + prometheusURL: "http://prometheus.test-ns:9090" + promQLQuery: "workload_health" + reportNamespace: "fleet-member-cluster-1" diff --git a/standalone-metric-collector/go.mod b/standalone-metric-collector/go.mod new file mode 100644 index 000000000..dd5d0b9a2 --- /dev/null +++ b/standalone-metric-collector/go.mod @@ -0,0 +1,63 @@ +module github.com/kubefleet-dev/standalone-metric-collector + +go 1.24.9 + +require ( + k8s.io/api v0.32.3 + k8s.io/apimachinery v0.32.3 + k8s.io/client-go v0.32.3 + k8s.io/klog/v2 v2.130.1 + sigs.k8s.io/controller-runtime v0.20.4 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.9.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.22.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.62.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.40.0 // indirect + golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/sync v0.15.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/term v0.32.0 // indirect + golang.org/x/text v0.25.0 // indirect + golang.org/x/time v0.11.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.32.3 // indirect + k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect + k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e // indirect + sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/standalone-metric-collector/go.sum b/standalone-metric-collector/go.sum new file mode 100644 index 000000000..049fc5719 --- /dev/null +++ b/standalone-metric-collector/go.sum @@ -0,0 +1,182 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU= +github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= +golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= +golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= +golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.32.3 h1:Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls= +k8s.io/api v0.32.3/go.mod h1:2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k= +k8s.io/apiextensions-apiserver v0.32.3 h1:4D8vy+9GWerlErCwVIbcQjsWunF9SUGNu7O7hiQTyPY= +k8s.io/apiextensions-apiserver v0.32.3/go.mod h1:8YwcvVRMVzw0r1Stc7XfGAzB/SIVLunqApySV5V7Dss= +k8s.io/apimachinery v0.32.3 h1:JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U= +k8s.io/apimachinery v0.32.3/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= +k8s.io/client-go v0.32.3 h1:RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU= +k8s.io/client-go v0.32.3/go.mod h1:3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y= +k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4= +k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e h1:KqK5c/ghOm8xkHYhlodbp6i6+r+ChV2vuAuVRdFbLro= +k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU= +sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= +sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA= +sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/standalone-metric-collector/install-on-member.sh b/standalone-metric-collector/install-on-member.sh new file mode 100755 index 000000000..5eb9a1a2a --- /dev/null +++ b/standalone-metric-collector/install-on-member.sh @@ -0,0 +1,160 @@ +#!/bin/bash +set -e + +# Configuration +HUB_CONTEXT="kind-hub" +MEMBER_CLUSTER_COUNT="${1:-1}" # Default to 1 if not specified +MEMBER_NAMESPACE="default" +PROMETHEUS_URL="http://prometheus.test-ns:9090" +IMAGE_NAME="metric-collector" +IMAGE_TAG="latest" + +# Get hub cluster API server URL dynamically using docker inspect (following kubefleet pattern) +HUB_API_SERVER="https://$(docker inspect hub-control-plane --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'):6443" + +echo "=== Installing MetricCollector on ${MEMBER_CLUSTER_COUNT} member cluster(s) ===" +echo "Hub cluster: ${HUB_CONTEXT}" +echo "Hub API server: ${HUB_API_SERVER}" +echo "" + +# Step 0: Build and load Docker image (once for all clusters) +echo "Step 0: Building Docker image..." +docker buildx build \ + --file docker/Dockerfile \ + --output=type=docker \ + --platform=linux/$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \ + --tag ${IMAGE_NAME}:${IMAGE_TAG} \ + --build-arg GOARCH=$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \ + --build-arg GOOS=linux \ + . +echo "✓ Docker image built" +echo "" + +# Install on each member cluster +for i in $(seq 1 ${MEMBER_CLUSTER_COUNT}); do + MEMBER_CONTEXT="kind-cluster-${i}" + MEMBER_CLUSTER_NAME="cluster-${i}" + HUB_NAMESPACE="fleet-member-${MEMBER_CLUSTER_NAME}" + + echo "========================================" + echo "Installing on Member Cluster ${i}/${MEMBER_CLUSTER_COUNT}" + echo " Context: ${MEMBER_CONTEXT}" + echo " Cluster Name: ${MEMBER_CLUSTER_NAME}" + echo "========================================" + echo "" + + # Load image into this member cluster + echo "Loading Docker image into ${MEMBER_CONTEXT}..." + kind load docker-image ${IMAGE_NAME}:${IMAGE_TAG} --name cluster-${i} + echo "✓ Image loaded into kind cluster" + echo "" + + # Step 1: Setup RBAC on hub cluster + echo "Step 1: Setting up RBAC on hub cluster..." + kubectl --context=${HUB_CONTEXT} create namespace ${HUB_NAMESPACE} --dry-run=client -o yaml | kubectl --context=${HUB_CONTEXT} apply -f - + kubectl --context=${HUB_CONTEXT} create serviceaccount metric-collector-sa -n ${HUB_NAMESPACE} --dry-run=client -o yaml | kubectl --context=${HUB_CONTEXT} apply -f - + + cat <= 2 { + if valueStr, ok := res.Value[1].(string); ok { + fmt.Sscanf(valueStr, "%f", &health) + } + } + + wm := placementv1beta1.WorkloadMetrics{ + Namespace: namespace, + WorkloadName: workloadName, + Health: health == 1.0, // Convert to boolean: 1.0 = true, 0.0 = false + } + workloadMetrics = append(workloadMetrics, wm) + } + + klog.V(2).InfoS("Collected metrics from Prometheus", "workloads", len(workloadMetrics)) + return workloadMetrics, nil +} + +// buildPromQLQuery builds a PromQL query for workload_health metric +func buildPromQLQuery(mc *placementv1beta1.MetricCollector) string { + // Query all workload_health metrics (MetricCollector is cluster-scoped) + return `workload_health` +} diff --git a/standalone-metric-collector/pkg/controller/controller.go b/standalone-metric-collector/pkg/controller/controller.go new file mode 100644 index 000000000..1293537fd --- /dev/null +++ b/standalone-metric-collector/pkg/controller/controller.go @@ -0,0 +1,288 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metriccollector + +import ( + "context" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + placementv1beta1 "github.com/kubefleet-dev/standalone-metric-collector/apis/v1beta1" +) + +const ( + // defaultCollectionInterval is the interval for collecting metrics (30 seconds) + defaultCollectionInterval = 30 * time.Second + + // metricCollectorFinalizer is the finalizer for cleaning up MetricCollectorReport + metricCollectorFinalizer = "kubernetes-fleet.io/metric-collector-report-cleanup" +) + +// Reconciler reconciles a MetricCollector object +type Reconciler struct { + // MemberClient is the client to access the member cluster + MemberClient client.Client + + // HubClient is the client to access the hub cluster + HubClient client.Client + + // recorder is the event recorder + recorder record.EventRecorder + + // PrometheusClient is the client to query Prometheus + PrometheusClient PrometheusClient +} + +// Reconcile reconciles a MetricCollector object +func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + startTime := time.Now() + klog.V(2).InfoS("MetricCollector reconciliation starts", "metricCollector", req.Name) + defer func() { + latency := time.Since(startTime).Milliseconds() + klog.V(2).InfoS("MetricCollector reconciliation ends", "metricCollector", req.Name, "latency", latency) + }() + + // Fetch the MetricCollector instance (cluster-scoped) + mc := &placementv1beta1.MetricCollector{} + if err := r.MemberClient.Get(ctx, client.ObjectKey{Name: req.Name}, mc); err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("MetricCollector not found, ignoring", "metricCollector", req.Name) + return ctrl.Result{}, nil + } + klog.ErrorS(err, "Failed to get MetricCollector", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Handle deletion - cleanup MetricCollectorReport on hub + if !mc.DeletionTimestamp.IsZero() { + if controllerutil.ContainsFinalizer(mc, metricCollectorFinalizer) { + klog.V(2).InfoS("Cleaning up MetricCollectorReport on hub", "metricCollector", req.Name) + + // Delete MetricCollectorReport from hub cluster + if err := r.deleteReportFromHub(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to delete MetricCollectorReport from hub", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Remove finalizer + controllerutil.RemoveFinalizer(mc, metricCollectorFinalizer) + if err := r.MemberClient.Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to remove finalizer", "metricCollector", req.Name) + return ctrl.Result{}, err + } + klog.V(2).InfoS("Successfully cleaned up MetricCollectorReport", "metricCollector", req.Name) + } + return ctrl.Result{}, nil + } + + // Add finalizer if not present + if !controllerutil.ContainsFinalizer(mc, metricCollectorFinalizer) { + controllerutil.AddFinalizer(mc, metricCollectorFinalizer) + if err := r.MemberClient.Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to add finalizer", "metricCollector", req.Name) + return ctrl.Result{}, err + } + klog.V(2).InfoS("Added finalizer to MetricCollector", "metricCollector", req.Name) + } + + // Collect metrics from Prometheus + collectedMetrics, collectErr := r.collectFromPrometheus(ctx, mc) + + // Update status with collected metrics + now := metav1.Now() + mc.Status.LastCollectionTime = &now + mc.Status.CollectedMetrics = collectedMetrics + mc.Status.WorkloadsMonitored = int32(len(collectedMetrics)) + mc.Status.ObservedGeneration = mc.Generation + + if collectErr != nil { + klog.ErrorS(collectErr, "Failed to collect metrics", "metricCollector", req.Name) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReady, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "CollectorConfigured", + Message: "Collector is configured", + }) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeCollecting, + Status: metav1.ConditionFalse, + ObservedGeneration: mc.Generation, + Reason: "CollectionFailed", + Message: fmt.Sprintf("Failed to collect metrics: %v", collectErr), + }) + } else { + klog.V(2).InfoS("Successfully collected metrics", "metricCollector", req.Name, "workloads", len(collectedMetrics)) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReady, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "CollectorConfigured", + Message: "Collector is configured and collecting metrics", + }) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeCollecting, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "MetricsCollected", + Message: fmt.Sprintf("Successfully collected metrics from %d workloads", len(collectedMetrics)), + }) + } + + if err := r.MemberClient.Status().Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to update MetricCollector status", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Sync MetricCollectorReport to hub cluster + if err := r.syncReportToHub(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to sync MetricCollectorReport to hub", "metricCollector", req.Name) + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReported, + Status: metav1.ConditionFalse, + ObservedGeneration: mc.Generation, + Reason: "ReportSyncFailed", + Message: fmt.Sprintf("Failed to sync report to hub: %v", err), + }) + } else { + meta.SetStatusCondition(&mc.Status.Conditions, metav1.Condition{ + Type: placementv1beta1.MetricCollectorConditionTypeReported, + Status: metav1.ConditionTrue, + ObservedGeneration: mc.Generation, + Reason: "ReportSyncSucceeded", + Message: "Successfully synced metrics to hub cluster", + }) + } + + // Update status with reporting condition + if err := r.MemberClient.Status().Update(ctx, mc); err != nil { + klog.ErrorS(err, "Failed to update MetricCollector status with reporting condition", "metricCollector", req.Name) + return ctrl.Result{}, err + } + + // Requeue after 30 seconds + return ctrl.Result{RequeueAfter: defaultCollectionInterval}, nil +} + +// syncReportToHub syncs the MetricCollectorReport to the hub cluster +func (r *Reconciler) syncReportToHub(ctx context.Context, mc *placementv1beta1.MetricCollector) error { + // Use the reportNamespace from the MetricCollector spec + reportNamespace := mc.Spec.ReportNamespace + if reportNamespace == "" { + return fmt.Errorf("reportNamespace is not set in MetricCollector spec") + } + + // Create or update MetricCollectorReport on hub + report := &placementv1beta1.MetricCollectorReport{ + ObjectMeta: metav1.ObjectMeta{ + Name: mc.Name, + Namespace: reportNamespace, + Labels: map[string]string{ + "metriccollector-name": mc.Name, + }, + }, + } + + // Check if report already exists + existingReport := &placementv1beta1.MetricCollectorReport{} + err := r.HubClient.Get(ctx, client.ObjectKey{Name: mc.Name, Namespace: reportNamespace}, existingReport) + + now := metav1.Now() + if err != nil { + if errors.IsNotFound(err) { + // Create new report + report.Conditions = mc.Status.Conditions + report.ObservedGeneration = mc.Status.ObservedGeneration + report.WorkloadsMonitored = mc.Status.WorkloadsMonitored + report.LastCollectionTime = mc.Status.LastCollectionTime + report.CollectedMetrics = mc.Status.CollectedMetrics + report.LastReportTime = &now + + if err := r.HubClient.Create(ctx, report); err != nil { + klog.ErrorS(err, "Failed to create MetricCollectorReport", "report", klog.KObj(report)) + return err + } + klog.V(2).InfoS("Created MetricCollectorReport on hub", "report", klog.KObj(report), "reportNamespace", reportNamespace) + return nil + } + return err + } + + // Update existing report + existingReport.Labels = report.Labels + existingReport.Conditions = mc.Status.Conditions + existingReport.ObservedGeneration = mc.Status.ObservedGeneration + existingReport.WorkloadsMonitored = mc.Status.WorkloadsMonitored + existingReport.LastCollectionTime = mc.Status.LastCollectionTime + existingReport.CollectedMetrics = mc.Status.CollectedMetrics + existingReport.LastReportTime = &now + + if err := r.HubClient.Update(ctx, existingReport); err != nil { + klog.ErrorS(err, "Failed to update MetricCollectorReport", "report", klog.KObj(existingReport)) + return err + } + klog.V(2).InfoS("Updated MetricCollectorReport on hub", "report", klog.KObj(existingReport), "reportNamespace", reportNamespace) + return nil +} + +// deleteReportFromHub deletes the MetricCollectorReport from the hub cluster +func (r *Reconciler) deleteReportFromHub(ctx context.Context, mc *placementv1beta1.MetricCollector) error { + // Use the reportNamespace from the MetricCollector spec + reportNamespace := mc.Spec.ReportNamespace + if reportNamespace == "" { + klog.V(2).InfoS("reportNamespace is not set, skipping deletion", "metricCollector", mc.Name) + return nil + } + + // Try to delete MetricCollectorReport on hub + report := &placementv1beta1.MetricCollectorReport{} + err := r.HubClient.Get(ctx, client.ObjectKey{Name: mc.Name, Namespace: reportNamespace}, report) + if err != nil { + if errors.IsNotFound(err) { + klog.V(2).InfoS("MetricCollectorReport not found on hub, already deleted", "report", mc.Name, "namespace", reportNamespace) + return nil + } + return fmt.Errorf("failed to get MetricCollectorReport: %w", err) + } + + if err := r.HubClient.Delete(ctx, report); err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete MetricCollectorReport: %w", err) + } + + klog.InfoS("Deleted MetricCollectorReport from hub", "report", mc.Name, "namespace", reportNamespace) + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { + r.recorder = mgr.GetEventRecorderFor("metriccollector-controller") + return ctrl.NewControllerManagedBy(mgr). + Named("metriccollector-controller"). + For(&placementv1beta1.MetricCollector{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Complete(r) +} diff --git a/test/e2e/kindconfigs/cluster-2.yaml b/test/e2e/kindconfigs/cluster-2.yaml index cbffc7443..84e29acee 100644 --- a/test/e2e/kindconfigs/cluster-2.yaml +++ b/test/e2e/kindconfigs/cluster-2.yaml @@ -7,4 +7,4 @@ apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane - role: worker -- role: worker +# - role: worker diff --git a/test/e2e/kindconfigs/cluster-3.yaml b/test/e2e/kindconfigs/cluster-3.yaml index d71759a35..e5eb1328b 100644 --- a/test/e2e/kindconfigs/cluster-3.yaml +++ b/test/e2e/kindconfigs/cluster-3.yaml @@ -7,5 +7,5 @@ apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane - role: worker -- role: worker -- role: worker +# - role: worker +# - role: worker diff --git a/tools/prometheus-test/README.md b/tools/prometheus-test/README.md new file mode 100644 index 000000000..10f3d510c --- /dev/null +++ b/tools/prometheus-test/README.md @@ -0,0 +1,69 @@ +# Prometheus Test Tool + +A simple command-line tool to test querying Prometheus API for metrics. + +## Build + +```bash +go build -o prometheus-test main.go +``` + +## Usage + +```bash +./prometheus-test [namespace] +``` + +### Examples + +Query all instances of `workload_health` metric: +```bash +./prometheus-test http://localhost:9090 workload_health +``` + +Query `workload_health` metric filtered by namespace: +```bash +./prometheus-test http://localhost:9090 workload_health test-ns +``` + +Query with port-forward to a Prometheus running in Kubernetes: +```bash +# In one terminal, forward Prometheus port +kubectl port-forward -n monitoring svc/prometheus 9090:9090 + +# In another terminal, query +./prometheus-test http://localhost:9090 workload_health default +``` + +## Output + +The tool will display: +- Query status +- Number of results +- For each result: + - All metric labels (pod, namespace, etc.) + - Timestamp + - Metric value + +## Testing with the Fleet Sample Metric App + +If you have the sample-metric-app deployed: + +```bash +# Port-forward Prometheus +kubectl port-forward -n monitoring svc/prometheus 9090:9090 + +# Query the workload_health metric +./prometheus-test http://localhost:9090 'workload_health{app="sample-metric-app"}' +``` + +## Implementation Notes + +This tool uses the same query approach that will be used in the MetricCollector controller: +1. Builds a Prometheus query URL with `/api/v1/query` endpoint +2. Adds query parameters +3. Makes HTTP GET request with context timeout +4. Parses JSON response +5. Extracts metric labels and values + +The code in this tool can be directly adapted for use in the MetricCollector controller. diff --git a/tools/prometheus-test/main.go b/tools/prometheus-test/main.go new file mode 100644 index 000000000..589d7ffdd --- /dev/null +++ b/tools/prometheus-test/main.go @@ -0,0 +1,127 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "time" +) + +// PrometheusQueryResult represents the Prometheus API response +type PrometheusQueryResult struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric map[string]string `json:"metric"` + Value []interface{} `json:"value"` + } `json:"result"` + } `json:"data"` +} + +func main() { + if len(os.Args) < 3 { + fmt.Println("Usage: prometheus-test ") + fmt.Println("Example: prometheus-test http://localhost:9090 workload_health") + os.Exit(1) + } + + prometheusURL := os.Args[1] + metricName := os.Args[2] + + // Build the query - get all instances of the specified metric + query := metricName + + // Optional: filter by namespace if provided + if len(os.Args) > 3 { + namespace := os.Args[3] + query = fmt.Sprintf(`%s{namespace="%s"}`, metricName, namespace) + } + + fmt.Printf("Querying Prometheus at: %s\n", prometheusURL) + fmt.Printf("Query: %s\n", query) + fmt.Println() + + // Query Prometheus + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + result, err := queryPrometheus(ctx, prometheusURL, query) + if err != nil { + fmt.Fprintf(os.Stderr, "Error querying Prometheus: %v\n", err) + os.Exit(1) + } + + // Print results + fmt.Printf("Status: %s\n", result.Status) + fmt.Printf("Result Type: %s\n", result.Data.ResultType) + fmt.Printf("Number of results: %d\n", len(result.Data.Result)) + fmt.Println() + + for i, res := range result.Data.Result { + fmt.Printf("Result %d:\n", i+1) + fmt.Printf(" Labels:\n") + for k, v := range res.Metric { + fmt.Printf(" %s: %s\n", k, v) + } + + if len(res.Value) >= 2 { + timestamp := res.Value[0] + value := res.Value[1] + fmt.Printf(" Timestamp: %v\n", timestamp) + fmt.Printf(" Value: %v\n", value) + } + fmt.Println() + } +} + +// queryPrometheus queries the Prometheus HTTP API +func queryPrometheus(ctx context.Context, prometheusURL, query string) (*PrometheusQueryResult, error) { + // Build the query URL + apiURL := fmt.Sprintf("%s/api/v1/query", prometheusURL) + + // Add query parameters + params := url.Values{} + params.Add("query", query) + + fullURL := fmt.Sprintf("%s?%s", apiURL, params.Encode()) + + // Create HTTP request + req, err := http.NewRequestWithContext(ctx, "GET", fullURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + // Execute request + client := &http.Client{ + Timeout: 10 * time.Second, + } + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to execute request: %w", err) + } + defer resp.Body.Close() + + // Check status code + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var result PrometheusQueryResult + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to decode response: %w", err) + } + + if result.Status != "success" { + return nil, fmt.Errorf("prometheus query failed with status: %s", result.Status) + } + + return &result, nil +} diff --git a/tools/prometheus-test/prometheus-test b/tools/prometheus-test/prometheus-test new file mode 100755 index 000000000..19bdced7a Binary files /dev/null and b/tools/prometheus-test/prometheus-test differ