一、所需环境
(1)MySQL数据库
(2)jdk 1.8以上
(3)zookeeper
(4)dolphinscheduler安装包(dolphinscheduler官网)
(5)Mysql数据库驱动文件(Mysql官网下载,mysql-connector-java-5.1.49-bin.jar,放到dolphinscheduler的lib目录下)
上面五个环境自己先安装好,下面说一下dolphinscheduler的安装步骤
二、解压安装dolphinscheduler,并配置环境变量
## 将安装包上传到local目录下
[root@hadoop local] tar -zxvf apache-dolphinscheduler-incubating-1.3.4-dolphinscheduler-bin.tar.gz -C /usr/local/
[root@hadoop local] mv apache-dolphinscheduler-incubating-1.3.4-dolphinscheduler-bin/ dolphinscheduler
## 将mysql-connector-java-5.1.49-bin.jar放置在dolphinscheduler下的lib目录下
##配置环境变量
[root@hadoop dolphinscheduler] vim /etc/profile
## 添加以下内容
export DOLP_HOME=/usr/local/dolphinscheduler
export PATH=$PATH:$DOLP_HOME/bin
[root@hadoop dolphinscheduler] source /etc/profile
三、创建部署用户,以创建dolphinscheduler用户为例
# 创建用户需使用root登录
useradd dolphinscheduler
# 添加密码
echo "dolphinscheduler" | passwd --stdin dolphinscheduler
# 如果是分布式的可以配置一下sudo(系统管理命令)免密
sed -i '$adolphinscheduler ALL=(ALL) NOPASSWD: NOPASSWD: ALL' /etc/sudoers
sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers
# 修改目录权限,使得部署用户对二进制包解压后的 dolphinscheduler 目录有操作权限
chown -R dolphinscheduler:dolphinscheduler dolphinscheduler
四、对部署用户配置免密
su dolphinscheduler
#生成密钥对(公钥和私钥)三次回车生成密钥
ssh-keygen -t rsa
#查看公钥
cat ~/.ssh/id_rsa.pub
#将密匙输出到/root/.ssh/authorized_keys
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
#注意:正常设置后,dolphinscheduler用户在执行命令ssh localhost 是不需要再输入密码的(请进行测试)
#以下是集群配置需要进行多机器免密,如果是单机请忽略
#追加密钥到主节点(需要操作及密码验证,追加完后查看一下该文件)--在主节点上操作,拷取从节点密匙
ssh 从节点机器IP cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
cat ~/.ssh/authorized_keys
#从主节点复制密钥到从节点
scp ~/.ssh/authorized_keys dolphinscheduler@从节点机器IP:~/.ssh/authorized_keys
ssh dolphinscheduler@172.xx.xx.xxx
ssh dolphinscheduler@172.xx.xx.xxx
五、MySQL配置
[root@hadoop dolphinscheduler] mysql -uroot -p密码
mysql> CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
Query OK, 1 row affected (0.01 sec)
mysql> CREATE USER 'dolphinscheduler'@'%' IDENTIFIED BY 'dolphinscheduler';
Query OK, 0 row affected (0.01 sec)
## MySQL8.0版本的需要去掉最后的 IDENTIFIED BY 'dolphinscheduler'
mysql> GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'%' IDENTIFIED BY 'dolphinscheduler';
ERROR 1819 (HY000): Your password does not satisfy the current policy requirements
## 更改策略设置为0,最低级的密码,只需大于8位即可
mysql> set global validate_password_policy=0;
Query OK, 0 rows affected (0.00 sec)
## MySQL8.0版本的需要去掉最后的 IDENTIFIED BY 'dolphinscheduler'
mysql> GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'%' IDENTIFIED BY 'dolphinscheduler';
Query OK, 0 rows affected, 1 warning (0.01 sec)
mysql> GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'localhost' IDENTIFIED BY 'dolphinscheduler';
Query OK, 0 rows affected, 1 warning (0.01 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.01 sec)
mysql> exit;
六、修改dolphinscheduler数据源
[root@hadoop /] vi /usr/local/dolphinscheduler/conf/datasource.properties
## 将原有pgsql的数据源配置注释掉,添加MySQL的
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.datasource.url=jdbc:mysql://***.***.**.**:3306/dolphinscheduler?useUnicode=true&characterEncoding=utf-8&&allowMultiQueries=true&autoReconnect=true&useSSL=false
spring.datasource.username=dolphinscheduler
spring.datasource.password=dolphinscheduler
七、初始化MySQL
[root@hadoop /] cd /usr/local/dolphinscheduler/script
[root@hadoop script] ll
total 36
-rw-rw-r-- 1 1004 1004 1577 Dec 16 2020 create-dolphinscheduler.sh
-rw-rw-r-- 1 1004 1004 4703 Dec 16 2020 dolphinscheduler-daemon.sh
-rw-rw-r-- 1 1004 1004 1807 Dec 16 2020 monitor-server.sh
-rw-rw-r-- 1 1004 1004 1701 Dec 16 2020 remove-zk-node.sh
-rw-rw-r-- 1 1004 1004 2247 Dec 16 2020 scp-hosts.sh
-rw-rw-r-- 1 1004 1004 2020 Dec 16 2020 start-all.sh
-rw-rw-r-- 1 1004 1004 2015 Dec 16 2020 stop-all.sh
-rw-rw-r-- 1 1004 1004 1578 Dec 16 2020 upgrade-dolphinscheduler.sh
[root@hadoop script] chmod +x *
[root@hadoop script] ll
total 36
-rwxrwxr-x 1 1004 1004 1577 Dec 16 2020 create-dolphinscheduler.sh
-rwxrwxr-x 1 1004 1004 4703 Dec 16 2020 dolphinscheduler-daemon.sh
-rwxrwxr-x 1 1004 1004 1807 Dec 16 2020 monitor-server.sh
-rwxrwxr-x 1 1004 1004 1701 Dec 16 2020 remove-zk-node.sh
-rwxrwxr-x 1 1004 1004 2247 Dec 16 2020 scp-hosts.sh
-rwxrwxr-x 1 1004 1004 2020 Dec 16 2020 start-all.sh
-rwxrwxr-x 1 1004 1004 2015 Dec 16 2020 stop-all.sh
-rwxrwxr-x 1 1004 1004 1578 Dec 16 2020 upgrade-dolphinscheduler.sh
[root@hadoop script] ./create-dolphinscheduler.sh
...
12:42:19.951 [main] INFO org.apache.dolphinscheduler.dao.upgrade.shell.CreateDolphinScheduler - upgrade DolphinScheduler finished
12:42:19.951 [main] INFO org.apache.dolphinscheduler.dao.upgrade.shell.CreateDolphinScheduler - create DolphinScheduler success
八、创建jdk链接
## 默认读取/usr/bin/java,将创建原有的jdk文件到整个目录
[root@hadoop dolphinscheduler] ln -s /usr/local/jdk1.8.0_311/ /usr/bin/java
## 如果提示文件已经存在,可以先将/usr/bin/java删除,然后再执行一遍
[root@hadoop dolphinscheduler] rm -rf /usr/bin/java
[root@hadoop dolphinscheduler] ln -s /usr/local/jdk1.8.0_311/ /usr/bin/java
九、修改配置文件
[root@hadoop config]# pwd
/usr/local/dolphinscheduler/conf/config
[root@hadoop_zxy config]# ls
install_config.conf
[root@hadoop_zxy config]# vi install_config.conf
修改install_config.conf文件
# 数据库类型
dbtype="mysql"
# 数据库IP+Port
dbhost="***.***.**.**:3306"
# 数据库用户名
username="dolphinscheduler"
# 数据库名
dbname="dolphinscheduler"
# 数据库用户密码
password="DophinScheduler123"
# zookeeper地址,这里安装的伪分布式
zkQuorum="***.***.**.**:2181,***.***.**.**:2182,***.***.**.**:2183"
# dolphinscheduler的install路径(重新创建自定义路径,不是DS的解压目录)
installPath="/zxy/apps/dolphinscheduler-1.3.4/dolphinscheduler"
# 使用哪个用户创建
deployUser="dolphinscheduler"
# 邮件配置协议,以下关于邮件内容,根据需要使用
mailServerHost="smtp.exmail.qq.com"
# 邮件服务端口
mailServerPort="25"
# 邮件发送者
mailSender="xxxxxxxxxx"
# 邮件接收者
mailUser="xxxxxxxxxx"
# 邮箱密码
mailPassword="xxxxxxxxxx"
# TSL协议邮箱设置为true,否则为false
starttlsEnable="true"
# 开启SSL协议配置为true,否则false,但是starttlsEnable和sslEnable不能同时为true
sslEnable="false"
# 邮件服务地址
sslTrust="smtp.exmail.qq.com"
# 业务用到的比如sql等资源文件上传到哪里,可以设置:HDFS,S3,NONE,单机如果想使用本地文件系统,请配置为HDFS,
# 因为HDFS支持本地文件系统;如果不需要资源上传功能请选择NONE。强调一点:使用本地文件系统不需要部署hadoop
resourceStorageType="NONE"
# 如果上传资源保存想保存在hadoop上,hadoop集群的NameNode启用了HA的话,需要将hadoop的配置文件core-site.xml和hdfs-site.xml放到安装路径的conf目录下,
# 本例即是放到/opt/soft/dolphinscheduler/conf下面,并配置namenode cluster名称;如果NameNode不是HA,则只需要将mycluster修改为具体的ip或者主机名即可
defaultFS="hdfs://mycluster:8020"
s3Endpoint="http://192.168.xx.xx:9010"
s3AccessKey="xxxxxxxxxx"
s3SecretKey="xxxxxxxxxx"
# 如果没有使用到Yarn,保持以下默认值即可;如果ResourceManager是HA,则配置为ResourceManager节点的主备ip或者hostname,
# 比如"192.168.xx.xx,192.168.xx.xx";如果是单ResourceManager请配置yarnHaIps=""即可
yarnHaIps="192.168.xx.xx,192.168.xx.xx"
# 如果ResourceManager是HA或者没有使用到Yarn保持默认值即可;如果是单ResourceManager,请配置真实的ResourceManager主机名或者ip
singleYarnIp="yarnIp1"
# 资源上传根路径,主持HDFS和S3,由于hdfs支持本地文件系统,需要确保本地文件夹存在且有读写权限
resourceUploadPath="/zxy/apps/dolphinscheduler-1.3.4/data/dolphinscheduler"
# 具备权限创建resourceUploadPath的用户
hdfsRootUser="hdfs"
kerberosStartUp="false"
krb5ConfPath="$installPath/conf/krb5.conf"
keytabUserName="hdfs-mycluster@ESZ.COM"
keytabPath="$installPath/conf/hdfs.headless.keytab"
# DS的api端口号
apiServerPort="12345"
# 单机服务,如果是多台服务器可以用','连接
ips="***.***.**.**"
# SSH端口,默认22
sshPort="22"
# 单机服务,如果是多台服务器,可以设置master启动在哪台服务器
masters="***.***.**.**"
# 选择worker部署在哪台服务器上,default为组名
workers="***.***.**.**:default"
# 报警服务部署在哪台服务器
alertServer="***.***.**.**"
# 后端AIP部署在哪台
apiServers="***.***.**.**"
十、修改运行参数
dolphinscheduler_env.sh
#修改 dolphinscheduler/conf/env 目录下的 dolphinscheduler_env.sh 环境变量
vi /usr/local/dolphinscheduler/conf/env/dolphinscheduler_env.sh
#export HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
#export HADOOP_CONF_DIR=/etc/hadoop/conf
#export SPARK_HOME1=/opt/cloudera/parcels/CDH/lib/spark
#export SPARK_HOME2=/opt/soft/spark2
#export PYTHON_HOME=/usr/bin/python
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
#export HIVE_HOME=/opt/cloudera/parcels/CDH/lib/hive
#export FLINK_HOME=/opt/soft/flink
#export DATAX_HOME=/opt/soft/datax/bin/datax.py
#exportPATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH
export PATH=$JAVA_HOME/bin:$PATH
十一、启动zookeeper
cd /usr/local/zookeeper/bin
./zkServer.sh start
十二、启动dolphinscheduler
## 启动ds [sh install.sh]
## 关闭ds [sh stop-all.sh]
[root@hadoop dolphinscheduler]# sh install.sh
[root@hadoop dolphinscheduler]# jps
6081 Jps
5763 ApiApplicationServer
5078 QuorumPeerMain
4983 QuorumPeerMain
5607 MasterServer
5643 WorkerServer
5679 LoggerServer
5183 QuorumPeerMain
十三、登录系统
http://192.168.xx.xx:12345/dolphinscheduler
默认用户名:admin
默认密码:dolphinscheduler123
十、启停操作
# 一键停止集群所有服务
sh ./bin/stop-all.sh
# 一键开启集群所有服务
sh ./bin/start-all.sh
# 启停 Master
sh ./bin/dolphinscheduler-daemon.sh stop master-server
sh ./bin/dolphinscheduler-daemon.sh start master-server
# 启停 Worker
sh ./bin/dolphinscheduler-daemon.sh start worker-server
sh ./bin/dolphinscheduler-daemon.sh stop worker-server
# 启停 Api
sh ./bin/dolphinscheduler-daemon.sh start api-server
sh ./bin/dolphinscheduler-daemon.sh stop api-server
# 启停 Logger
sh ./bin/dolphinscheduler-daemon.sh start logger-server
sh ./bin/dolphinscheduler-daemon.sh stop logger-server
# 启停 Alert
sh ./bin/dolphinscheduler-daemon.sh start alert-server
sh ./bin/dolphinscheduler-daemon.sh stop alert-server
# 启停 Python Gateway
sh ./bin/dolphinscheduler-daemon.sh start python-gateway-server
sh ./bin/dolphinscheduler-daemon.sh stop python-gateway-server
十一、dolphinscheduler的系统配置
如上图所示,点击租户管理,创建租户,租户编码和租户名称都填写成服务器上运行dolphinscheduler的用户名,队列选择default就可以,也可以自己配置,因为在dolphinscheduler的配置文件里已经配过了