move monitoring to services

This commit is contained in:
Márcio Fernandes
2026-06-07 21:43:49 +00:00
parent 8bf2f786d5
commit d8419c0e75
22 changed files with 1 additions and 0 deletions
@@ -0,0 +1,55 @@
# values.yaml to install only Prometheus Operator and CRDs
# Disable all components except the operator
defaultRules:
create: false
alertmanager:
enabled: false
grafana:
enabled: false
kubeStateMetrics:
enabled: false
nodeExporter:
enabled: false
prometheus:
enabled: false
coreDns:
enabled: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
kubeProxy:
enabled: false
kubeScheduler:
enabled: false
prometheusOperator:
enabled: true
createCustomResource: true
tls:
enabled: false
admissionWebhooks:
enabled: false
cleanupCustomResource: false
serviceMonitor:
selfMonitor: false
kubeletService:
enabled: true
# requires manual creation of service #prom-kublet-service
nodeSelector:
role: worker-node
# global:
# nodeSelector:
# dedicated: worker-node
@@ -0,0 +1,36 @@
kubeStateMetrics:
enabled: true
kube-state-metrics: # ok tested!
podLabels:
role: worker-node
nodeSelector:
role: worker-node
prometheus:
monitor:
interval: "60s"
relabelings:
- targetLabel: cluster
replacement: casa
additionalLabels:
app.kubernetes.io/name: prometheus-kube-state-metrics # !important: selector used by agent
coreDns: # ok tested!
enabled: true
serviceMonitor:
relabelings:
- targetLabel: cluster
replacement: casa
additionalLabels:
app.kubernetes.io/name: prometheus-stack-coredns # !important: selector used by agent
kubeApiServer: # ok tested!
enabled: true
serviceMonitor:
relabelings:
- targetLabel: cluster
replacement: casa
additionalLabels:
app.kubernetes.io/name: prometheus-stack-apiserver # !important: selector used by agent
@@ -0,0 +1,25 @@
# Deploy node exporter as a daemonset to all nodes
nodeExporter:
enabled: true
# job node exporter
prometheus-node-exporter:
prometheus:
monitor:
enabled: true
interval: "60s"
relabelings:
# https://github.com/dotdc/grafana-dashboards-kubernetes
- action: replace
sourceLabels: [__meta_kubernetes_pod_node_name]
targetLabel: nodename
# identification of cluster
- targetLabel: cluster
replacement: casa
# it seams to be an timestamp can not be an label!
- action: labeldrop
regex: __meta_kubernetes_endpoints_annotation_endpoints_kubernetes_io_last_change_trigger_time
@@ -0,0 +1,78 @@
# Used file to testing new options and configurations
# Should be the laste file to be loaded
kubelet:
enabled: true
namespace: kube-system
serviceMonitor:
interval: 60s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
enabled: true
## Enable scraping /metrics from kubelet's service
kubelet: true
cAdvisor: true
additionalLabels:
app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent
probesMetricRelabelings:
- targetLabel: cluster
replacement: casa
- sourceLabels: [__name__, image]
separator: ;
regex: container_([a-z_]+);
replacement: $1
action: drop
- sourceLabels: [__name__]
separator: ;
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
replacement: $1
action: drop
# # RelabelConfigs to apply to samples before scraping
# # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
# #
# # metrics_path is required to match upstream rules and charts
cAdvisorRelabelings:
- targetLabel: cluster
replacement: casa
- action: replace
sourceLabels: [__metrics_path__]
targetLabel: metrics_path
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: nodename
replacement: $1
action: replace
# # RelabelConfigs to apply to samples before scraping
# # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
# #
probesRelabelings:
- targetLabel: cluster
replacement: casa
- action: replace
sourceLabels: [__metrics_path__]
targetLabel: metrics_path
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: nodename
replacement: $1
action: replace
resourceRelabelings:
- targetLabel: cluster
replacement: casa
- action: replace
sourceLabels: [__metrics_path__]
targetLabel: metrics_path
@@ -0,0 +1,28 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: monitoring
spec:
releaseName: prometheus-stack
interval: 40h
chart:
spec:
chart: prometheus-stack
version: 86.x.x
sourceRef:
kind: HelmRepository
name: prometheus-stack
interval: 40h
valuesFrom:
- kind: Secret
name: prometheus-stack-helm-values
valuesKey: 01-only-crd-and-operator.yaml
- kind: Secret
name: prometheus-stack-helm-values
valuesKey: 02-kube-metrics.yaml
- kind: Secret
name: prometheus-stack-helm-values
valuesKey: 03-node-exporter.yaml
- kind: Secret
name: prometheus-stack-helm-values
valuesKey: 04-kubelet.yaml
@@ -0,0 +1,7 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: prometheus-stack
spec:
interval: 40h
url: https://prometheus-community.github.io/helm-charts
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,14 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- helm/helm-repo.yaml
- helm/helm-release.yaml
- prometheus-agent.yaml
secretGenerator:
- name: prometheus-stack-helm-values
files:
- 01-only-crd-and-operator.yaml=helm/01-only-crd-and-operator.yaml
- 02-kube-metrics.yaml=helm/02-kube-metrics.yaml
- 03-node-exporter.yaml=helm/03-node-exporter.yaml
- 04-kubelet.yaml=helm/helm/04-kubelet.yaml
@@ -0,0 +1,71 @@
apiVersion: monitoring.coreos.com/v1alpha1
kind: PrometheusAgent
metadata:
name: prometheus-agent
spec:
podMonitorSelector: null
podMonitorNamespaceSelector: null
serviceMonitorNamespaceSelector:
matchLabels:
prometheus-monitoring: enabled
serviceMonitorSelector:
matchLabels:
release: prometheus-stack
replicas: 1
remoteWrite:
- url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
scrapeInterval: 60s
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 200m
memory: 300Mi
serviceAccountName: prometheus-agent
nodeSelector:
role: worker-node
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-agent
rules:
- apiGroups: [""]
resources: ["nodes", "nodes/metrics", "nodes/proxy", "services", "endpoints", "pods"]
verbs: ["get", "list", "watch"]
- apiGroups: ["monitoring.coreos.com"]
resources: ["servicemonitors", "podmonitors"]
verbs: ["get", "list", "watch"]
- nonResourceURLs:
- /metrics
- /metrics/cadvisor
- /metrics/probes
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-agent-monitoring
roleRef:
kind: ClusterRole
name: prometheus-agent
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: prometheus-agent
namespace: monitoring
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-agent
namespace: monitoring