feat: migrated to k3s

This commit is contained in:
2025-11-15 11:54:06 +00:00
parent a595747c2c
commit 709d6de566
34 changed files with 5903 additions and 802 deletions

View File

@@ -0,0 +1,34 @@
# Prometheus Setup
- <https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack>
- <https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml>
## helm chart
```bash
#add repo
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
```
**This helm chart, installs:**
- crd
- Operator
- kubernetes services monitors
```bash
kubectl get namespace monitoring || kubectl create namespace monitoring
helm upgrade --install prometheus-stack prometheus-community/kube-prometheus-stack \
--namespace monitoring \
--values=./helm/01-only-crd-and-operator.yaml \
--values=./helm/02-kube-metrics.yaml \
--values=./helm/03-node-exporter.yaml \
--values=./helm/04-kubelet.yaml \
--values=./helm/10-testing-values.yaml
```
## deploy prometheus agent
```bash
kubectl apply -f ./prometheus-agent.yaml
```

View File

@@ -0,0 +1,48 @@
# values.yaml to install only Prometheus Operator and CRDs
# Disable all components except the operator
defaultRules:
create: false
alertmanager:
enabled: false
grafana:
enabled: false
kubeStateMetrics:
enabled: false
nodeExporter:
enabled: false
prometheus:
enabled: false
coreDns:
enabled: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
kubeProxy:
enabled: false
kubeScheduler:
enabled: false
prometheusOperator:
enabled: true
createCustomResource: true
tls:
enabled: false
admissionWebhooks:
enabled: false
cleanupCustomResource: false
serviceMonitor:
selfMonitor: false
kubeletService:
enabled: false

View File

@@ -0,0 +1,29 @@
kubeStateMetrics:
enabled: true
kube-state-metrics: # ok tested!
prometheus:
monitor:
relabelings:
- targetLabel: cluster
replacement: casa
additionalLabels:
app.kubernetes.io/name: prometheus-kube-state-metrics # !important: selector used by agent
coreDns: # ok tested!
enabled: true
serviceMonitor:
relabelings:
- targetLabel: cluster
replacement: casa
additionalLabels:
app.kubernetes.io/name: prometheus-stack-coredns # !important: selector used by agent
kubeApiServer: # ok tested!
enabled: true
serviceMonitor:
relabelings:
- targetLabel: cluster
replacement: casa
additionalLabels:
app.kubernetes.io/name: prometheus-stack-apiserver # !important: selector used by agent

View File

@@ -0,0 +1,19 @@
# Deploy node exporter as a daemonset to all nodes
nodeExporter:
enabled: true
# job node exporter
prometheus-node-exporter:
prometheus:
monitor:
enabled: true
relabelings:
# https://github.com/dotdc/grafana-dashboards-kubernetes
- action: replace
sourceLabels: [__meta_kubernetes_pod_node_name]
targetLabel: nodename
# identification of cluster
- targetLabel: cluster
replacement: casa

View File

@@ -0,0 +1,83 @@
prometheusOperator:
kubeletService:
enabled: true
# requires manual creation of service #prom-kublet-service
# Used file to testing new options and configurations
# Should be the laste file to be loaded
kubelet:
enabled: true
namespace: kube-system
serviceMonitor:
interval: 30s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
enabled: true
## Enable scraping /metrics from kubelet's service
kubelet: true
additionalLabels:
app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent
probesMetricRelabelings:
- targetLabel: cluster
replacement: casa
- sourceLabels: [__name__, image]
separator: ;
regex: container_([a-z_]+);
replacement: $1
action: drop
- sourceLabels: [__name__]
separator: ;
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
replacement: $1
action: drop
# # RelabelConfigs to apply to samples before scraping
# # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
# #
# # metrics_path is required to match upstream rules and charts
cAdvisorRelabelings:
- targetLabel: cluster
replacement: casa
- action: replace
sourceLabels: [__metrics_path__]
targetLabel: metrics_path
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: nodename
replacement: $1
action: replace
# # RelabelConfigs to apply to samples before scraping
# # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
# #
probesRelabelings:
- targetLabel: cluster
replacement: casa
- action: replace
sourceLabels: [__metrics_path__]
targetLabel: metrics_path
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: nodename
replacement: $1
action: replace
resourceRelabelings:
- targetLabel: cluster
replacement: casa
- action: replace
sourceLabels: [__metrics_path__]
targetLabel: metrics_path

View File

@@ -0,0 +1 @@
# use for testing

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,69 @@
apiVersion: monitoring.coreos.com/v1alpha1
kind: PrometheusAgent
metadata:
name: prometheus-agent
namespace: monitoring
spec:
serviceMonitorNamespaceSelector: {}
podMonitorNamespaceSelector: {}
serviceMonitorSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: Exists
podMonitorSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: Exists
replicas: 1
remoteWrite:
- url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
scrapeInterval: 60s
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 200m
memory: 180Mi
serviceAccountName: prometheus-agent
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-agent
rules:
- apiGroups: [""]
resources: ["nodes", "nodes/metrics", "nodes/proxy", "services", "endpoints", "pods"]
verbs: ["get", "list", "watch"]
- apiGroups: ["monitoring.coreos.com"]
resources: ["servicemonitors", "podmonitors"]
verbs: ["get", "list", "watch"]
- nonResourceURLs:
- /metrics
- /metrics/cadvisor
- /metrics/probes
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-agent-monitoring
roleRef:
kind: ClusterRole
name: prometheus-agent
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: prometheus-agent
namespace: monitoring
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-agent
namespace: monitoring

1
monitoring/promtail/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
**.local.**

View File

@@ -0,0 +1,12 @@
# Promtail
``` bash
kubectl get namespace monitoring || kubectl create namespace monitoring
# add repo
helm repo add grafana https://grafana.github.io/helm-charts
# Install & Upgrade
helm upgrade --install promtail grafana/promtail --namespace monitoring \
--values=./values.yaml \
--values=./values.local.yaml
```

View File

@@ -0,0 +1,46 @@
config:
clients:
- url: "????" #replaced values.local.yaml. Example: https://lokiserver/loki/api/v1/push
# by default all scrap configs had node_name
snippets:
extraScrapeConfigs: |
#scrape config for syslog
- job_name: host-journald
journal:
json: true
max_age: 24h
path: /var/log/host/journal
labels:
job: journald
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'journal_systemd_unit'
- source_labels: ['__journal_syslog_identifier']
target_label: 'journal_syslog_identifier'
- source_labels: ['__journal__hostname']
target_label: 'journal_hostname'
- target_label: 'host'
replacement: '${HOSTNAME}'
- target_label: 'cluster'
replacement: 'casa'
extraArgs:
- --config.expand-env=true
extraVolumes:
- name: node-logs
hostPath:
path: /var/log
extraVolumeMounts:
- name: node-logs
mountPath: /var/log/host
readOnly: true
resources:
limits:
cpu: 200m
memory: 100Mi
requests:
cpu: 100m
memory: 50Mi