es.py脚本的意义

作为运维,是否经常需要去安装一些中间件,数据库等等应用,金老师进来就分享一个金老师自己写的自动化安装ES集群的脚本,也可以提高大家的工作效率。

下面是es.py具体的python代码
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import argparse
import socket
import subprocess
from time import sleep
from tqdm import tqdm

def run_command(command, check=True, capture_output=False):
    """Run a shell command and optionally capture its output."""
    result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    if check and result.returncode != 0:
        print(f"执行命令出错: {command}\n{result.stderr.strip()}")
        exit(1)
    if capture_output:
        return result.stdout.strip()
    return result

def check_root():
    """Ensure the script is run as root."""
    if os.geteuid() != 0:
        print("需要 root 用户执行.")
        exit(1)

def create_directories():
    """Create necessary directories for Elasticsearch."""
    if os.path.exists("/data/es"):
        run_command("rm -rf /data/es/data/* /data/es/logs/*", check=False)
    else:
        os.makedirs("/data/es/data", exist_ok=True)
        os.makedirs("/data/es/logs", exist_ok=True)
    print("已创建 Elasticsearch 数据和日志目录。")

def download_and_install_packages():
    """Download and install Elasticsearch and Kibana packages."""
    urls = [
        "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.3.1-x86_64.rpm",
        "https://artifacts.elastic.co/downloads/kibana/kibana-8.3.1-x86_64.rpm"
    ]
    for url in tqdm(urls, desc="下载软件包"):
        run_command(f"wget -P /tmp {url}")
    run_command("yum localinstall /tmp/elasticsearch-8.3.1-x86_64.rpm -y")
    run_command("yum localinstall /tmp/kibana-8.3.1-x86_64.rpm -y")
    print("下载并安装了 Elasticsearch 和 Kibana。")

def adjust_system_parameters():
    """Adjust system parameters for Elasticsearch performance."""
    limits_conf_changed = False
    with open("/etc/security/limits.conf", "r") as f:
        limits_conf = f.read()

    if "elasticsearch soft memlock unlimited" not in limits_conf:
        with open("/etc/security/limits.conf", "a") as f:
            f.write("elasticsearch soft memlock unlimited\n")
        limits_conf_changed = True
    if "elasticsearch hard memlock unlimited" not in limits_conf:
        with open("/etc/security/limits.conf", "a") as f:
            f.write("elasticsearch hard memlock unlimited\n")
        limits_conf_changed = True

    if limits_conf_changed:
        print("已调整 Elasticsearch 的系统限制。")

    if int(subprocess.getoutput("ulimit -n")) < 655360:
        with open("/etc/security/limits.conf", "a") as f:
            f.write("""
* soft nofile 102400
* hard nofile 102400
* soft nproc 102400
* hard nproc 102400
elasticsearch soft memlock unlimited
elasticsearch hard memlock unlimited
""")
        print("已更新系统限制配置。")

    swappiness_set = not subprocess.getoutput("grep 'vm.swappiness = 0' /etc/sysctl.conf")
    max_map_count_set = not subprocess.getoutput("grep 'vm.max_map_count = 655360' /etc/sysctl.conf")

    if swappiness_set:
        with open("/etc/sysctl.conf", "a") as f:
            f.write("vm.swappiness = 0\n")
    if max_map_count_set:
        with open("/etc/sysctl.conf", "a") as f:
            f.write("vm.max_map_count = 655360\n")

    if swappiness_set or max_map_count_set:
        run_command("swapoff -a")
        run_command("sysctl -p")
        print("已调整 sysctl 参数。")

def adjust_jvm_heap_size():
    """Adjust JVM heap size based on total memory."""
    mem_total_kib = int(subprocess.getoutput("grep MemTotal /proc/meminfo | awk '{print $2}'"))
    mem_total_gib = mem_total_kib / 1024 / 1024
    heap_size = int(mem_total_gib / 5)

    with open("/etc/elasticsearch/jvm.options", "r") as file:
        lines = file.readlines()

    with open("/etc/elasticsearch/jvm.options", "w") as file:
        for line in lines:
            if line.startswith("-Xmx") or line.startswith("-Xms"):
                continue
            if "-XX:+UseG1GC" in line:
                continue
            file.write(line)
        file.write(f"-Xmx{heap_size}g\n")
        file.write(f"-Xms{heap_size}g\n")
        file.write("-XX:+UseG1GC\n")

    print(f"JVM 堆大小已调整为 {heap_size}g。")

def adjust_elasticsearch_service():
    """Adjust Elasticsearch service configuration."""
    with open("/usr/lib/systemd/system/elasticsearch.service", "r") as file:
        lines = file.readlines()
    with open("/usr/lib/systemd/system/elasticsearch.service", "w") as file:
        for line in lines:
            if line.strip() == "[Service]":
                file.write(line)
                file.write("LimitMEMLOCK=infinity\n")
            else:
                file.write(line)
    print("已调整 Elasticsearch 服务。")

def clean_old_certificates():
    """Clean up old certificates and keystore files."""
    cert_dir = "/etc/elasticsearch/certs"
    if os.path.exists(cert_dir):
        run_command(f"rm -rf {cert_dir}/*", check=False)
    run_command(f"rm -f /etc/elasticsearch/elasticsearch.keystore", check=False)
    print("已删除旧证书和 keystore 文件。")

def generate_self_signed_certificates():
    """Generate self-signed SSL certificates for Elasticsearch."""
    cert_dir = "/etc/elasticsearch/certs"
    os.makedirs(cert_dir, exist_ok=True)

    # Generate Transport certificate
    run_command(f"openssl req -x509 -nodes -newkey rsa:4096 -keyout {cert_dir}/transport.key -out {cert_dir}/transport.crt -subj '/CN=elasticsearch-transport' -days 365")
    run_command(f"openssl pkcs12 -export -in {cert_dir}/transport.crt -inkey {cert_dir}/transport.key -out {cert_dir}/transport.p12 -name transport -passout pass:changeit")

    print("已生成自签名证书。")

def setup_keystore():
    """Set up Elasticsearch keystore and add sensitive information."""
    run_command("/usr/share/elasticsearch/bin/elasticsearch-keystore create", check=False)
    run_command(f"echo 'changeit' | /usr/share/elasticsearch/bin/elasticsearch-keystore add xpack.security.transport.ssl.keystore.secure_password --stdin")
    run_command(f"echo 'changeit' | /usr/share/elasticsearch/bin/elasticsearch-keystore add xpack.security.transport.ssl.truststore.secure_password --stdin")
    print("已设置 Elasticsearch Keystore。")

def generate_elasticsearch_config(ip_list, es_http_port, es_tcp_port, cluster_name, current_ip):
    """Generate the Elasticsearch configuration file."""
    initial_master_nodes = ",".join([f"\"{ip}\"" for ip in ip_list])
    seed_hosts = ",".join([f"\"{ip}:{es_tcp_port}\"" for ip in ip_list])

    # Determine the index for the current node's IP
    node_index = ip_list.index(current_ip) + 1  # 1-based index for rack_id

    config = f"""
path.data: /data/es/data
path.logs: /data/es/logs
cluster.name: {cluster_name}
node.name: {socket.gethostname()}
network.host: 0.0.0.0
network.publish_host: {current_ip}
http.port: {es_http_port}
transport.port: {es_tcp_port}
bootstrap.memory_lock: true
http.cors.enabled: true
http.cors.allow-origin: "*"
http.cors.allow-credentials: true
ingest.geoip.downloader.enabled: false
xpack.security.enabled: true
xpack.security.enrollment.enabled: true
xpack.security.http.ssl:
  enabled: false  # 确保 HTTP 层 SSL 被禁用
xpack.security.transport.ssl:
  enabled: true
  verification_mode: none
  keystore.path: certs/transport.p12
  truststore.path: certs/transport.p12
cluster.initial_master_nodes: [{initial_master_nodes}]
discovery.seed_hosts: [{seed_hosts}]
indices.memory.index_buffer_size: 20%
indices.recovery.max_bytes_per_sec: 1g
node.attr.rack_id: rack{node_index}
cluster.routing.allocation.awareness.attributes: rack_id
cluster.routing.allocation.same_shard.host: true
cluster.fault_detection.leader_check.interval: 20s
discovery.cluster_formation_warning_timeout: 30s
cluster.publish.timeout: 90s
action.destructive_requires_name: true
"""
    with open("/etc/elasticsearch/elasticsearch.yml", "w") as f:
        f.write(config)
    print("已生成 Elasticsearch 配置。")

def configure_slowlog(current_ip, es_http_port):
    """Configure slowlog settings for Elasticsearch."""
    slowlog_settings = """
{
    "index.search.slowlog.threshold.query.warn": "2s",
    "index.search.slowlog.threshold.query.info": "2s",
    "index.search.slowlog.threshold.query.debug": "1s",
    "index.search.slowlog.threshold.query.trace": "400ms",
    "index.search.slowlog.threshold.fetch.warn": "1s",
    "index.search.slowlog.threshold.fetch.info": "800ms",
    "index.search.slowlog.threshold.fetch.debug": "500ms",
    "index.search.slowlog.threshold.fetch.trace": "200ms",
    "index.indexing.slowlog.threshold.index.warn": "5s",
    "index.indexing.slowlog.threshold.index.info": "2s",
    "index.indexing.slowlog.threshold.index.debug": "1s",
    "index.indexing.slowlog.threshold.index.trace": "400ms"
}
"""
    run_command(f"curl -X PUT 'http://{current_ip}:{es_http_port}/_all/_settings' -H 'Content-Type: application/json' -d '{slowlog_settings}'")
    print("已配置慢查询日志设置。")

def change_permissions():
    """Change permissions for Elasticsearch directories."""
    run_command("sudo chown -R elasticsearch:elasticsearch /data/es")
    run_command("sudo chown -R elasticsearch:elasticsearch /etc/elasticsearch")
    print("已更改 Elasticsearch 目录的权限。")

def start_elasticsearch():
    """Start the Elasticsearch service."""
    run_command("sudo systemctl daemon-reload")
    run_command("sudo systemctl enable elasticsearch.service")
    run_command("systemctl start elasticsearch.service")
    print("已启动 Elasticsearch。")

def configure_kibana(kibana_ip, es_http_port, kibana_port):
    """Generate the Kibana configuration file."""
    config = f"""
server.port: {kibana_port}
server.host: "{kibana_ip}"
elasticsearch.hosts: ["http://{kibana_ip}:{es_http_port}"]
elasticsearch.username: "my_admin"
elasticsearch.password: "my_admin"
elasticsearch.requestTimeout: 180000
i18n.locale: "zh-CN"
"""
    with open("/etc/kibana/kibana.yml", "w") as f:
        f.write(config)
    print("已配置 Kibana。")

def start_kibana():
    """Start the Kibana service."""
    run_command("sudo systemctl daemon-reload")
    run_command("sudo systemctl enable kibana.service")
    run_command("systemctl start kibana.service")
    print("已启动 Kibana。")

def create_kibana_user():
    """Create a Kibana user in Elasticsearch."""
    run_command("/usr/share/elasticsearch/bin/elasticsearch-users useradd my_admin -p my_admin -r kibana_system")
    print("已创建 Kibana 用户 my_admin。")

def check_elasticsearch_port(port):
    """Check if the Elasticsearch port is open."""
    result = subprocess.run(["ss", "-tnlp"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    if str(port) in result.stdout:
        print(f"Elasticsearch 服务在端口 {port} 上运行。")
    else:
        print(f"Elasticsearch 服务未在端口 {port} 上运行,请检查 Elasticsearch 是否已启动。")
        exit(1)

def main(ip_list, ports, cluster_name):
    es_http_port, es_tcp_port, kibana_port = ports
    first_node = ip_list[0]
    current_ip = socket.gethostbyname(socket.gethostname())

    check_root()

    create_directories()
    download_and_install_packages()

    adjust_system_parameters()
    adjust_jvm_heap_size()
    adjust_elasticsearch_service()

    clean_old_certificates()
    generate_self_signed_certificates()
    setup_keystore()
    generate_elasticsearch_config(ip_list, es_http_port, es_tcp_port, cluster_name, current_ip)

    change_permissions()
    start_elasticsearch()

    sleep(10)  # 等待 Elasticsearch 启动

    check_elasticsearch_port(es_http_port)
    configure_slowlog(current_ip, es_http_port)

    if current_ip == first_node:
        create_kibana_user()
        configure_kibana(current_ip, es_http_port, kibana_port)
        start_kibana()

    print("Elasticsearch 和 Kibana 安装并配置成功。")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="安装 Elasticsearch 和 Kibana")
    parser.add_argument("-i", "--ips", required=True, help="逗号分隔的 IP 地址列表")
    parser.add_argument("-p", "--ports", required=True, help="逗号分隔的 HTTP, TCP 和 Kibana 端口列表")
    parser.add_argument("-c", "--cluster", required=True, help="集群名称")
    args = parser.parse_args()

    ip_list = args.ips.split(",")
    ports = list(map(int, args.ports.split(",")))
    cluster_name = args.cluster

    main(ip_list, ports, cluster_name)
脚本的主要功能包括:
  1. 权限检查:确保脚本以 root 用户执行。
  2. 目录创建:为 Elasticsearch 数据和日志创建必要的目录。
  3. 软件包下载和安装:从指定 URL 下载 Elasticsearch 和 Kibana 的 RPM 包,并通过 yum 安装。
  4. 系统参数调整:调整系统的 limits.confsysctl.conf 参数,以优化 Elasticsearch 的性能。
  5. JVM 堆内存调整:根据服务器的物理内存大小自动调整 JVM 堆大小。
  6. Elasticsearch 服务配置:调整 Elasticsearch 服务的 systemd 配置,以确保它能正确启动并运行。
  7. 证书管理:清理旧的证书和 keystore 文件,并生成新的自签名证书。
  8. Elasticsearch 配置生成:根据传入的 IP 列表生成适当的 Elasticsearch 配置文件。
  9. 慢查询日志配置:配置 Elasticsearch 的慢查询日志。
  10. 权限调整:确保 Elasticsearch 目录的权限正确。
  11. 启动 Elasticsearch:启动 Elasticsearch 服务,并检查服务是否正确运行。
  12. Kibana 配置和启动:仅在第一台节点上安装和配置 Kibana,并创建用于 Kibana 与 Elasticsearch 交互的用户。
  13. 安装成功确认:所有步骤完成后,确认安装和配置成功。
使用方法:
  • 运行前提:确保服务器已安装 tqdm 模块。如果服务器不能访问外网,可以通过提前下载 tqdm 包到本地并手动安装。
    如果可以访问外网,可以直接运行以下命令来安装 tqdm
pip3 install tqdm

执行脚本:将脚本保存为 es.py,并通过以下命令执行:

python3 es.py -i 172.16.128.58,172.16.128.59,172.16.128.60 -p 9200,9300,5601 -c yumgirl
  • 其中:
  • -i 参数指定集群中各节点的 IP 地址。
  • -p 参数指定 Elasticsearch HTTP 端口、TCP 端口以及 Kibana 端口。
  • -c 参数指定 Elasticsearch 集群的名称。
注意事项:
  • 集群版本一致性:在运行此脚本时,确保所有节点的系统环境一致,特别是 OS 版本、Python 版本等。
  • 监控和日志:脚本的运行会输出相关的安装和配置信息,建议在安装完成后检查 Elasticsearch 和 Kibana 的日志,确保没有错误。
  • 后续维护:安装成功后,建议定期检查集群的健康状态,并根据实际使用情况调整相关配置。
重置 elastic 用户的密码

在你的 Elasticsearch 节点上执行以下命令:


/usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
  • 执行该命令后,系统会提示你是否继续重置密码。确认后,新的密码将自动生成并显示在控制台中。
使用新密码检查 Elasticsearch 集群的健康状态

记录并保存好新密码后,你可以使用以下命令检查 Elasticsearch 集群的健康状态:


curl -u elastic:你的新密码 -X GET "http://localhost:9200/_cat/health?v"

如果在使用中遇到问题或需要进一步的帮助,请随时联系我!