diff --git a/monitoring/prometheus/deploy/README.md b/monitoring/prometheus/deploy/README.md index b2abdda..a1716b5 100644 --- a/monitoring/prometheus/deploy/README.md +++ b/monitoring/prometheus/deploy/README.md @@ -29,6 +29,16 @@ helm upgrade --install prometheus-stack prometheus-community/kube-prometheus-sta ## deploy prometheus agent +**requirements:** + +On namespaces running prometheus monitors add label `prometheus-monitoring=enabled` + +**Example:** + +``` bash +kubectl label namespace monitoring prometheus-monitoring=enabled --overwrite +``` + ```bash kubectl apply -f ./prometheus-agent.yaml ``` diff --git a/monitoring/prometheus/deploy/helm/02-kube-metrics.yaml b/monitoring/prometheus/deploy/helm/02-kube-metrics.yaml index 198f630..a50aee6 100644 --- a/monitoring/prometheus/deploy/helm/02-kube-metrics.yaml +++ b/monitoring/prometheus/deploy/helm/02-kube-metrics.yaml @@ -8,6 +8,7 @@ kube-state-metrics: # ok tested! role: worker-node prometheus: monitor: + interval: "60s" relabelings: - targetLabel: cluster replacement: casa diff --git a/monitoring/prometheus/deploy/helm/03-node-exporter.yaml b/monitoring/prometheus/deploy/helm/03-node-exporter.yaml index 0965d76..562f04c 100644 --- a/monitoring/prometheus/deploy/helm/03-node-exporter.yaml +++ b/monitoring/prometheus/deploy/helm/03-node-exporter.yaml @@ -8,6 +8,7 @@ prometheus-node-exporter: prometheus: monitor: enabled: true + interval: "60s" relabelings: # https://github.com/dotdc/grafana-dashboards-kubernetes - action: replace @@ -17,3 +18,8 @@ prometheus-node-exporter: # identification of cluster - targetLabel: cluster replacement: casa + + # it seams to be an timestamp can not be an label! + - action: labeldrop + regex: __meta_kubernetes_endpoints_annotation_endpoints_kubernetes_io_last_change_trigger_time + diff --git a/monitoring/prometheus/deploy/helm/04-kubelet.yaml b/monitoring/prometheus/deploy/helm/04-kubelet.yaml index 5254d5a..39fdf3c 100644 --- a/monitoring/prometheus/deploy/helm/04-kubelet.yaml +++ b/monitoring/prometheus/deploy/helm/04-kubelet.yaml @@ -4,10 +4,11 @@ kubelet: enabled: true namespace: kube-system serviceMonitor: - interval: 30s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483 + interval: 60s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483 enabled: true ## Enable scraping /metrics from kubelet's service kubelet: true + cAdvisor: true additionalLabels: app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent diff --git a/monitoring/prometheus/deploy/prometheus-agent.yaml b/monitoring/prometheus/deploy/prometheus-agent.yaml index 684b89f..fb6f8e0 100644 --- a/monitoring/prometheus/deploy/prometheus-agent.yaml +++ b/monitoring/prometheus/deploy/prometheus-agent.yaml @@ -5,16 +5,17 @@ metadata: name: prometheus-agent namespace: monitoring spec: - serviceMonitorNamespaceSelector: {} - podMonitorNamespaceSelector: {} + + podMonitorSelector: null + podMonitorNamespaceSelector: null + serviceMonitorNamespaceSelector: + matchLabels: + prometheus-monitoring: enabled + serviceMonitorSelector: - matchExpressions: - - key: app.kubernetes.io/name - operator: Exists - podMonitorSelector: - matchExpressions: - - key: app.kubernetes.io/name - operator: Exists + matchLabels: + release: prometheus-stack + replicas: 1 remoteWrite: - url: https://prometheus.monitoring.limbosolutions.com/api/v1/write @@ -25,7 +26,7 @@ spec: memory: 128Mi limits: cpu: 200m - memory: 512Mi + memory: 300Mi serviceAccountName: prometheus-agent nodeSelector: role: worker-node