cortex 水平扩展试用

cortex 支持多实例运行,可以灵活实际大规模的部署,以下demo,运行了三个cortex 实例,没有配置副本数(主要是ha )
同时对于三个cortex 使用haproxy 做为push 以及查询接口的lb,同时基于prometheus 对于haproxy 进行监控(基于haproxy 2.0 提供的promettheus
支持),基于docker-compose 运行系统依赖的组件,详细配置参考 github

环境准备

  • docker-compose文件
 
version: "3"
services:
    haproxy:
      image: haproxy:2.0.5-alpine
      volumes:
      - "./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg"
      ports:
      - "8404:8404"
      - "9009:9009"
    consul:
      image: consul
      ports:
      - "8500:8500"
    cortex1:
      image: cortexproject/cortex:master-7d13c2f0
      command: -config.file=/etc/single-process-config.yaml -ring.store=consul -consul.hostname=consul:8500
      ports:
      - "9001:9009"
      volumes:
      - "./single-process-config.yaml:/etc/single-process-config.yaml"
    cortex2:
      image: cortexproject/cortex:master-7d13c2f0
      command: -config.file=/etc/single-process-config.yaml -ring.store=consul -consul.hostname=consul:8500
      ports:
      - "9002:9009"
      volumes:
      - "./single-process-config.yaml:/etc/single-process-config.yaml"
    cortex3:
      image: cortexproject/cortex:master-7d13c2f0
      command: -config.file=/etc/single-process-config.yaml -ring.store=consul -consul.hostname=consul:8500
      ports:
      - "9003:9009"
      volumes:
      - "./single-process-config.yaml:/etc/single-process-config.yaml"
    granfan:
      image: grafana/grafana
      ports:
      - "3000:3000"
    node-exporter:
      image: basi/node-exporter
      ports:
      - "9100:9100"
    prometheus:
      image: prom/prometheus
      ports:
      - "9090:9090"
      volumes:
      - "./prometheus.yml:/etc/prometheus/prometheus.yml"
  • cortex 配置
    single-process-config.yaml 文件
 
# Configuration for running Cortex in single-process mode.
# This should not be used in production. It is only for getting started
# and development.
# Disable the requirement that every request to Cortex has a
# X-Scope-OrgID header. `fake` will be substituted in instead.
auth_enabled: false
server:
  http_listen_port: 9009
  # Configure the server to allow messages up to 100MB.
  grpc_server_max_recv_msg_size: 104857600
  grpc_server_max_send_msg_size: 104857600
  grpc_server_max_concurrent_streams: 1000
distributor:
  shard_by_all_labels: true
  pool:
    health_check_ingesters: true
ingester_client:
  grpc_client_config:
    # Configure the client to allow messages up to 100MB.
    max_recv_msg_size: 104857600
    max_send_msg_size: 104857600
    use_gzip_compression: true
ingester:
  #chunk_idle_period: 15m
  lifecycler:
    # The address to advertise for this ingester. Will be autodiscovered by
    # looking up address on eth0 or en0; can be specified if this fails.
    # address: 127.0.0.1
    # We want to start immediately and flush on shutdown.
    join_after: 0
    claim_on_rollout: false
    final_sleep: 0s
    num_tokens: 512
    # Use an in memory ring store, so we don't need to launch a Consul.
    ring:
      kvstore:
        store: inmemory
      replication_factor: 1
# Use local storage - BoltDB for the index, and the filesystem
# for the chunks.
schema:
  configs:
  - from: 2019-03-25
    store: boltdb
    object_store: filesystem
    schema: v10
    index:
      prefix: index_
      period: 168h
storage:
  boltdb:
    directory: /tmp/cortex/index
  filesystem:
    directory: /tmp/cortex/chunks
  • prometheus 配置
    prometheus.yml 文件,主要远端write 地址配置,同时配置了几个监控metrics
 
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).
remote_write:
- url: http://haproxy:9009/api/prom/push
# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'node-exporter'
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
    - targets: ['node-exporter:9100']
  - job_name: 'haproxy-exporter'
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
    - targets: ['haproxy:8404']
  • haproxy 配置
    主要是lb 后端cortex 服务9009 端口
 
global
    # master-worker required for `program` section
    # enable here or start with -Ws
    master-worker
    mworker-max-reloads 3
    # enable core dumps
    set-dumpable
    user root
    stats socket /run/haproxy.sock mode 600 level admin
    group root
    log stdout local0
defaults
    mode http
    log global
    timeout client 5s
    timeout server 5s
    timeout connect 5s
    option redispatch
    option httplog
resolvers dns
    parse-resolv-conf
    resolve_retries 3
    timeout resolve 1s
    timeout retry 1s
    hold other 30s
    hold refused 30s
    hold nx 30s
    hold timeout 30s
    hold valid 10s
    hold obsolete 30s
userlist api 
  user admin password $5$aVnIFECJ$2QYP64eTTXZ1grSjwwdoQxK/AP8kcOflEO1Q5fc.5aA
frontend stats
    bind *:8404
    # Enable Prometheus Exporter
    http-request use-service prometheus-exporter if { path /metrics }
    stats enable
    stats uri /stats
    stats refresh 10s
frontend fe_main
    bind :9009 
    log-format "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r cpu_calls:%[cpu_calls] cpu_ns_tot:%[cpu_ns_tot] cpu_ns_avg:%[cpu_ns_avg] lat_ns_tot:%[lat_ns_tot] lat_ns_avg:%[lat_ns_avg]"
    default_backend be_main
backend be_main
     # Enable Power of Two Random Choices Algorithm
    balance random(2)
    # Enable Layer 7 retries
    retry-on all-retryable-errors
    retries 3 
    server cortex1 cortex1:9009 check inter 2s
    server cortex2 cortex2:9009 check inter 2s
    server cortex3 cortex3:9009 check inter 2s
backend be_503
    errorfile 503 /usr/local/etc/haproxy/errors/503.http

启动&&效果

  • 启动
docker-compose up -d
  • prometheus 效果

cortex 水平扩展试用_编程

 

 cortex 水平扩展试用_编程_02

 

 



  • haproxy 监控

cortex 水平扩展试用_编程_03

 

 


cortex 水平扩展试用_编程_04

 

 

  • 配置grafana

主要是prometheus datasource 以及dashboard 的添加
datasource ,注意datasource 地址为cortex 通过haproxy lb 的地址
cortex 水平扩展试用_编程_05

 

 

dashboard, 可以参考项目,直接导入node exporter以及haproxy 的metrics dashboard

  • granfan 效果

cortex 水平扩展试用_编程_06

 

 

  • cortex ring 效果

独立
cortex 水平扩展试用_编程_07

 

 


haproxy
cortex 水平扩展试用_编程_08

 

 

说明

以上是一个简单的配置,实际上我们可以通过在cortex启动的时候指定-distributor.replication-factor=3 保证cortex 的ha,同时上边是比较简单的
实践,实际我们还需要其他后端存储做为数据的存储

参考资料

https://github.com/cortexproject/cortex/blob/master/docs/getting_started.md
https://github.com/rongfengliang/cortex-docker-compose-running