安装参考地址:http://briansnelson.com/How_to_install_Sphinx_on_Centos_Server
yum install sphinx
如果失败的话使用下面的方式安装
wget http://sphinxsearch.com/files/sphinx-2.2.9-1.rhel6.x86_64.rpm
yum localinstall sphinx-2.2.9-1.rhel6.x86_64.rpm
安装完了出现下面的内容
Sphinx installed!
Now create a full-text index, start the search daemon, and you're all set.
To manage indexes:
editor /etc/sphinx/sphinx.conf
To rebuild all disk indexes:
sudo -u sphinx indexer --all --rotate
To start/stop search daemon:
service searchd start/stop
To query search daemon using MySQL client:
mysql -h 0 -P 9306
mysql> SELECT * FROM test1 WHERE MATCH('test');
See the manual at /usr/share/doc/sphinx-2.0.9 for details.
For commercial support please contact Sphinx Technologies Inc at
http://sphinxsearch.com/contacts.html
Verifying : postgresql-libs-8.4.20-2.el6_6.x86_64 1/2
Verifying : sphinx-2.0.9-1.rhel6.x86_64 2/2
Installed:
sphinx.x86_64 0:2.0.9-1.rhel6
Dependency Installed:
postgresql-libs.x86_64 0:8.4.20-2.el6_6
Complete!
安装完了之后, 现在要创建一个全文索引, 并且开启搜索的守护进程, 然后要设置
启动服务
service searchd start
or
/etc/init.d/searchd start
设置开机自启动
chkconfig searchd on
chkconfig --list searchd
配置实例, 先新建一个mysql的数据表
blog.sphinx_article
CREATE TABLE `sphinx_article` (
`id` int(11) UNSIGNED NOT NULL AUTO_INCREMENT,
`title` varchar(255) NULL,
`cat_id` tinyint(3) UNSIGNED NULL,
`member_id` int(11) UNSIGNED NULL,
`content` longtext NULL,
`created` int(11) UNSIGNED NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
配置文件
vim /etc/sphinx/sphinx.conf
~~~
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#
##索引源##
source article_src
{
type = mysql ##数据源类型
sql_host = rdsnnamnbnnamnbprivate.mysql.rds.aliyuncs.com ##mysql主机
sql_user = maxwelldu ##mysql 用户名
sql_pass = yu13jiu14 ##mysql密码
sql_db = blog ##mysql数据库名
sql_port = 3306 ##mysql端口
sql_query_pre = SET NAMES UTF8 ##mysql检索编码
sql_query = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article ##获取数据的sql
##过滤或条件查询的属性##
sql_attr_uint = cat_id
sql_attr_uint = member_id
sql_attr_timestamp = created
sql_query_info = SELECT * FROM sphinx_article WHERE id=$id
}
##索引
index article
{
source = article_src ##声明索引源
path = /var/lib/sphinx/article ##索引文件存放路径及索引的文件名
docinfo = extern ##文档信息存储方式
mlock = 0 ##缓存数据内存锁定
morphology = none ##形态学(对中文无效)
min_word_len = 1 ##索引的词最小长度
charset_type = utf-8 ##数据编码
min_prefix_len = 0
min_infix_len = 1
ngram_len = 1
}
index testrt
{
type = rt
rt_mem_limit = 32M
path = /var/lib/sphinx/testrt
charset_type = utf-8
rt_field = title
rt_field = content
rt_attr_uint = gid
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /var/log/sphinx/searchd.log
query_log = /var/log/sphinx/query.log
read_timeout = 5
max_children = 30
pid_file = /var/run/sphinx/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
workers = threads # for RT to work
binlog_path = /var/lib/sphinx/
}
~~~
建立索引文件
indexer -c /etc/sphinx/sphinx.conf article
在CLI上测试
search -c /etc/sphinx/sphinx.conf asdf
用php测试
<?php
$keyword = $_GET['keyword'];
$s = new SphinxClient;
$s->setServer("localhost", 9312);
$s->setMatchMode(SPH_MATCH_EXTENDED);
$s->setMaxQueryTime(3);
$result = $s->query($keyword, "article");
echo '<pre>';
print_r($result);
echo '</pre>';
?>
访问测试地址: http://123.56.135.230/sphinx.php?keyword=dfsa
如果还没有安装pshinx client 和 php扩展的话先安装扩展
参考地址: http://linux008.blog.51cto.com/2837805/622171
安装sphinx php扩展
cd /usr/local/src/
wget http://sphinxsearch.com/files/sphinx-2.2.9-release.tar.gz
tar zxf sphinx-2.2.9-release.tar.gz
cd sphinx-2.2.9-release/api/libsphinxclient/
安装sphinx扩展
cd /usr/local/src
wget http://sphinxsearch.com/files/sphinx-2.2.9-release.tar.gz
tar zxf sphinx-2.2.9-release.tar.gz
cd sphinx-2.2.9-release/api/libsphinxclient/
vim sphinxclient.c
./configure --prefix=/usr/local/sphinxclient
make
make install
cd /tmp/
wget http://pecl.php.net/get/sphinx-1.0.4.tgz
tar zxf sphinx-1.0.4.tgz
cd sphinx-1.0.4
/alidata/server/php/bin/phpize
./configure --with-php-config=/alidata/server/php/bin/php-config --with-sphinx=/usr/local/sphinxclient
make && make install
报错信息:
/tmp/sphinx-1.0.4/sphinx.c: In function ‘php_sphinx_client_read_property’:
/tmp/sphinx-1.0.4/sphinx.c:105: error: too few arguments to function ‘std_hnd->read_property’
/tmp/sphinx-1.0.4/sphinx.c: In function ‘zim_SphinxClient_setRankingMode’:
/tmp/sphinx-1.0.4/sphinx.c:767: error: too few arguments to function ‘sphinx_set_ranking_mode’
/tmp/sphinx-1.0.4/sphinx.c: In function ‘zm_startup_sphinx’:
/tmp/sphinx-1.0.4/sphinx.c:1786: warning: assignment from incompatible pointer type
make: *** [sphinx.lo] Error 1
retval = std_hnd->read_property(object, member, type TSRMLS_CC);
将这个函数最后添加一个参数NULL
retval = std_hnd->read_property(object, member, type TSRMLS_CC, NULL);
res = sphinx_set_ranking_mode(c->sphinx, (int)ranker);
最后加一个参数NULL
res = sphinx_set_ranking_mode(c->sphinx, (int)ranker, NULL);
make && make install
扩展安装好了之后修改php.ini, 然后重启php-fpm即可
vim /alidata/server/php/etc/php.ini
最后添加一行
extension=sphinx.so
重启
/etc/init.d/php-fpm restart
测试
http://123.56.135.230/sphinx.php?keyword=dfsa
cd /tmp
wget http://www.coreseek.cn/uploads/csft/3.2/coreseek-3.2.14.tar.gz
tar zxf coreseek-3.2.14.tar.gz
cd coreseek-3.2.14
安装mmseg中文分词
cd mmseg-3.2.14
./bootstrap #输出的warning信息可以忽略,如果出现error则需要解决
./configure --prefix=/usr/local/mmseg3
make && make install
cd ..
##安装coreseek
cd csft-3.2.14
sh buildconf.sh #输出的warning信息可以忽略,如果出现error则需要解决
./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql ##如果提示mysql问题,可以查看MySQL数据源安装说明
vim src/MakeFile
LIBS = -lodbc -lm -lz -lexpat -L/usr/local/lib -lrt -lpthread
修改为
LIBS = -lodbc -lm -lz -lexpat -liconv -L/usr/local/lib -lrt -lpthread
make install
cd ..
配置mysql数据源
cd /usr/local/coreseek
cp /etc/sphinx/sphinx.conf etc/csft.conf
vim /etc/csft.conf
~~~
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#
##索引源##
source article_src
{
type = mysql ##数据源类型
sql_host = rdsnnamnbnnamnbprivate.mysql.rds.aliyuncs.com ##mysql主机
sql_user = maxwelldu ##mysql 用户名
sql_pass = yu13jiu14 ##mysql密码
sql_db = blog ##mysql数据库名
sql_port = 3306 ##mysql端口
sql_query_pre = SET NAMES UTF8 ##mysql检索编码
sql_query = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article ##获取数据的sql
##过滤或条件查询的属性##
sql_attr_uint = cat_id
sql_attr_uint = member_id
sql_attr_timestamp = created
sql_query_info = SELECT * FROM sphinx_article WHERE id=$id
sql_query_info_pre = SET NAMES utf8
}
##索引
index article
{
source = article_src ##声明索引源
path = /usr/local/coreseek/var/data/article ##索引文件存放路径及索引的文件名
docinfo = extern ##文档信息存储方式
mlock = 0 ##缓存数据内存锁定
morphology = none ##形态学(对中文无效)
min_word_len = 1 ##索引的词最小长度
charset_type = zh_cn.utf-8 ##数据编码
charset_dictpath = /usr/local/mmseg3/etc/
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd_article.pid
log = /usr/local/coreseek/var/log/searchd_article.log
query_log = /usr/local/coreseek/var/log/query_article.log
}
~~~
创建索引和测试全文检索
测试indexer索引
/usr/local/coreseek/bin/indexer -c etc/csft.conf
测试sphinx索引文件
/usr/local/coreseek/bin/indexer -c etc/csft.conf —all
使用search程序测试全文检索(注意是search不是search)
/usr/local/coreseek/bin/search -c etc/csft.conf -a asdf北京
停止sphinx的searchd服务, 开启coreseek的searchd服务
service searchd stop
关闭sphinx的自启动服务
chkconfig searchd off
开启coreseek的自启动服务
echo "/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf" >> /etc/rc.d/rc.local
使用PHP API调用coreseek
复制api/sphinxapi.php到项目中, 然后写测试程序, (需要先将sphinx的php扩展关闭, 修改php.ini, 将sphinx.so注释, 然后重启php-fpm)
先建立一张增量索引记录表pre_coreseek(id,maxid)
CREATE TABLE `pre_coreseek` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`maxid` int(11) unsigned NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
修改配置
vim /usr/local/coreseek/etc/csft.conf
~~~
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#
##索引源##
source article_src
{
type = mysql ##数据源类型
sql_host = rdsnnamnbnnamnbprivate.mysql.rds.aliyuncs.com ##mysql主机
sql_user = maxwelldu ##mysql 用户名
sql_pass = yu13jiu14 ##mysql密码
sql_db = blog ##mysql数据库名
sql_port = 3306 ##mysql端口
sql_query_pre = SET NAMES UTF8 ##mysql检索编码
sql_query_pre = REPLACE INTO pre_coreseek SELECT 1,MAX(id) from sphinx_article
sql_query = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article WHERE id < (SELECT maxid FROM pre_coreseek WHERE id=1) ##获取数据的sql
##过滤或条件查询的属性##
sql_attr_uint = cat_id
sql_attr_uint = member_id
sql_attr_timestamp = created
sql_query_info = SELECT * FROM sphinx_article WHERE id=$id
sql_query_info_pre = SET NAMES utf8
}
source increment : article_src
{
sql_query_pre = SET NAMES utf8
sql_query = SELECT id, title, cat_id, member_id, content, created FROM sphinx_article WHERE id > (SELECT maxid FROM pre_coreseek WHERE id=1) ##获取数据的sql
}
##索引
index article
{
source = article_src ##声明索引源
path = /usr/local/coreseek/var/data/article ##索引文件存放路径及索引的文件名
docinfo = extern ##文档信息存储方式
mlock = 0 ##缓存数据内存锁定
morphology = none ##形态学(对中文无效)
min_word_len = 1 ##索引的词最小长度
charset_type = zh_cn.utf-8 ##数据编码
charset_dictpath = /usr/local/mmseg3/etc/
}
index increment : article
{
source = increment
path = /usr/local/coreseek/var/data/increment
}
indexer
{
mem_limit = 128M
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd_article.pid
log = /usr/local/coreseek/var/log/searchd_article.log
query_log = /usr/local/coreseek/var/log/query_article.log
}
~~~
停止searchd服务
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf --stop
第一次启动建立全索引
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --all
启动searchd 后台模式,启动前一定先建立好全索引,不然启动失败或增量索引部分会索引不到
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf ###启动
添加一些数据 , 尝试检索, 发现索引不到
增量索引
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf increment --rotate
合并索引
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --merge article increment --merge-dst-range deleted 0 0 --rotate
到现在为止可以检索到了
可以写个定时cron每隔30分钟作一次增量索引
可以写个定时cron每隔1天作一次全索引以确定保持数据的一致性
做实验的时候每隔一分钟做一次增量索引
*/1 * * * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf increment --rotate
*/1 * * * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --merge article increment --merge-dst-range deleted 0 0 —rotate
每天做一次全索引, 确保数据的一致性
* * */1 * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --all