Commit 8bc8e8c5 authored by Paul's avatar Paul
Browse files

move monitoring to freifunknord

parent 86aa0b03
Pipeline #5089 passed with stage
in 1 minute and 50 seconds
......@@ -20,8 +20,6 @@ service_hosts:
hosts:
srv02:
ansible_ssh_host: srv02.luebeck.freifunk.net
monitoring:
ansible_ssh_host: monitoring.net.ffhl.de
blackboxes:
vars:
ansible_ssh_user: root
......
......@@ -31,11 +31,6 @@
- role: ffhl_ns_auth
tags: [nameserver, nsauth]
- hosts: monitoring
roles:
- role: monitoring
tags: [monitoring]
- hosts: blackboxes
become: true
......
This diff is collapsed.
deb https://packages.grafana.com/oss/deb stable main
# Set the command-line arguments to pass to the server.
# Todo:
# - set listen address to something non-public
# - set storage duration with --storage.tsdb.retention=$something_that_makes_sense
# - maybe something toi increase performance
ARGS="--web.listen-address=[::]:9090 \
--storage.tsdb.retention.time=5y \
--storage.tsdb.min-block-duration=60m \
--query.timeout=30s \
--query.max-concurrency=5 \
--log.level=warn"
# Prometheus supports the following options:
# --config.file="/etc/prometheus/prometheus.yml"
# Prometheus configuration file path.
# --web.listen-address="0.0.0.0:9090"
# Address to listen on for UI, API, and telemetry.
# --web.read-timeout=5m Maximum duration before timing out read of the
# request, and closing idle connections.
# --web.max-connections=512 Maximum number of simultaneous connections.
# --web.external-url=<URL> The URL under which Prometheus is externally
# reachable (for example, if Prometheus is served
# via a reverse proxy). Used for generating
# relative and absolute links back to Prometheus
# itself. If the URL has a path portion, it will
# be used to prefix all HTTP endpoints served by
# Prometheus. If omitted, relevant URL components
# will be derived automatically.
# --web.route-prefix=<path> Prefix for the internal routes of web endpoints.
# Defaults to path of --web.external-url.
# --web.local-assets="/usr/share/prometheus/web/"
# Path to static asset/templates directory.
# --web.user-assets=<path> Path to static asset directory, available at
# /user.
# --web.enable-lifecycle Enable shutdown and reload via HTTP request.
# --web.enable-admin-api Enables API endpoints for admin control actions.
# --web.console.templates="/etc/prometheus/consoles"
# Path to the console template directory,
# available at /consoles.
# --web.console.libraries="/etc/prometheus/console_libraries"
# Path to the console library directory.
# --storage.tsdb.path="/var/lib/prometheus/metrics2/"
# Base path for metrics storage.
# --storage.tsdb.min-block-duration=2h
# Minimum duration of a data block before being
# persisted.
# --storage.tsdb.max-block-duration=<duration>
# Maximum duration compacted blocks may span.
# (Defaults to 10% of the retention period)
# --storage.tsdb.retention=15d
# How long to retain samples in the storage.
# --storage.tsdb.use-lockfile
# Create a lockfile in data directory.
# --alertmanager.notification-queue-capacity=10000
# The capacity of the queue for pending alert
# manager notifications.
# --alertmanager.timeout=10s
# Timeout for sending alerts to Alertmanager.
# --query.lookback-delta=5m The delta difference allowed for retrieving
# metrics during expression evaluations.
# --query.timeout=2m Maximum time a query may take before being
# aborted.
# --query.max-concurrency=20
# Maximum number of queries executed concurrently.
# --log.level=info Only log messages with the given severity or
# above. One of: [debug, info, warn, error]
groups:
- name: ffhl
rules:
- record: "fastd_peer_traffic_sum"
expr: 'sum by (key, name) (rate(fastd_peer_traffic{iface=~"ffhl_mesh_vpn.*", kind="bytes", type=~"rx|tx"}[1m]))'
- record: "ffhl_mesh_links"
expr: 'count by (link_type) (link_tq{link_type!="undefined"})'
- record: "ffhl_nodes_online_percentage"
expr: 'meshnodes_online_total{job="hopglass"} / meshnodes_total{job="hopglass"}'
- record: "ffhl_mesh_avg_link_quality"
expr: 'avg by (link_type)(link_tq{link_type!="undefined"})'
- record: "ffhl_mesh_connected_clients_24"
expr: 'avg_over_time(total_clients{job="hopglass",instance_!="hopglass"}[24h])'
- record: 'ffhl_firmware_distribution'
expr: 'count by (firmware)(online{firmware!="", instance_!="hopglass"})'
- record: 'ffhl_device_distribution'
expr: 'count by (model)(online{model!="", instance_!="hopglass"})'
- record: 'ffhl_gateway_distribution'
expr: 'count by (gateway) (online{gateway!="", job="hopglass"})'
- record: 'ffhl_mesh_sum_traffic_type'
expr: 'sum by (type) (rate(statistics_traffic[300s]))'
- record: "ffhl_mesh_sum_traffic_mtype"
expr: 'sum by (mtype) (rate(statistics_traffic[300s]))'
- record: 'ffhl_node_statistics_traffic'
expr: 'sum by (nodeid, mtype) (rate(statistics_traffic{site="ffhl", instance_!="hopglass"}[5m]) * 8)'
# Sample config for Prometheus.
global:
scrape_interval: 60s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 120s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ["localhost:9093"]
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "first_rules.yml"
#- "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "hopglass"
static_configs:
- targets: ["srv02.ffhl.de:4000"]
- job_name: "hosts"
static_configs:
- targets:
- "muehlentor.mesh.ffhl.chaotikum.org:9100"
- "holstentor.mesh.ffhl.chaotikum.org:9100"
- "kaisertor.mesh.ffhl.chaotikum.org:9100"
- "huextertor.mesh.ffhl.chaotikum.org:9100"
- "gw05.ffhl.chaotikum.org:9100"
- "builder.luebeck.freifunk.net:9100"
- "srv02.luebeck.freifunk.net:9100"
- "srv03.luebeck.freifunk.net:9100"
- "blueberry.luebeck.freifunk.net:9100"
- "strawberry.luebeck.freifunk.net:9100"
- job_name: powerdns
static_configs:
- targets:
- "kaisertor.luebeck.freifunk.net:8082"
- "huextertor.luebeck.freifunk.net:8082"
- "holstentor.luebeck.freifunk.net:8082"
- "muehlentor.luebeck.freifunk.net:8082"
- "gw05.ffhl.chaotikum.org:8082"
- job_name: fastd
static_configs:
- targets:
- "kaisertor.luebeck.freifunk.net:9281"
- "muehlentor.luebeck.freifunk.net:9281"
- "holstentor.luebeck.freifunk.net:9281"
- "huextertor.luebeck.freifunk.net:9281"
- "testgw.luebeck.freifunk.net:9281"
- job_name: bird
static_configs:
- targets:
- "kaisertor.luebeck.freifunk.net:9324"
- "huextertor.luebeck.freifunk.net:9324"
- "holstentor.luebeck.freifunk.net:9324"
- "muehlentor.luebeck.freifunk.net:9324"
- "gw05.ffhl.chaotikum.org:9324"
- job_name: "blackbox_ping"
metrics_path: /probe
params:
module: [icmp]
static_configs:
- targets:
- "google.com"
- "ipv6.google.com"
- "1.1.1.1"
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: target
- target_label: __address__
replacement: "blackbox.luebeck.freifunk.net:9115"
---
- name: install utils
apt:
update_cache: yes
state: latest
name:
- gnupg2
- name: setup monitoring
import_tasks: monitoring.yml
tags: [monitoring]
---
#
# install prometheus
#
- name: install prometheus
apt:
update_cache: yes
state: latest
name: prometheus
- name: copy prometheus config files
copy:
src: prometheus
dest: /etc/
- name: copy defaults config
copy:
src: prometheus-default
dest: /etc/default/prometheus
- name: restart prometheus
systemd:
enabled: yes
state: restarted
name: prometheus
#
# Install Grafana
#
- name: add grafana repo pubkey
shell:
cmd: wget -q -O - https://packages.grafana.com/gpg.key | apt-key add -
warn: false
- name: setup grafana repo
copy:
src: grafana.list
dest: /etc/apt/sources.list.d/
- name: install grafana
apt:
update_cache: yes
state: latest
name: grafana
- name: copy defaults config
copy:
src: grafana.ini
dest: /etc/grafana/
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment