

   /usr/local/coreseek/bin/indexer --config /usr/local/coreseek/etc/zl_sphinx.conf  --all



    /usr/local/coreseek/bin/indexer --config /usr/local/coreseek/etc/zl_sphinx.conf  delta --rotate

3.合并索引:加上 --merge-dst-range deleted 0 0 防止多个关键字指向同一个文档

/usr/local/coreseek/bin/indexer --merge zhl delta --config /usr/local/coreseek/etc/zl_sphinx.conf --rotate --merge-dst-range deleted 0 0


source zhl
        type                    = mysql
        sql_host                = localhost
        sql_user                = test
        sql_pass                =
        sql_db                  = test
        sql_port                = 3306  # optional, default is 3306
        sql_query_pre           = SET NAMES utf8
        #sql_query_pre          = SET SESSION query_cache_type=OFF
        sql_query_pre           = CREATE TABLE IF NOT EXISTS sph_counter (counter_id integer primary key not null, max_doc_id integer not null)
        sql_query_pre           = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM documents
        sql_query_range         = select 1,max(id) from documents
        # main document fetch query
        # mandatory, integer document ID field MUST be the first selected column
        sql_query               = \
                SELECT id, group_id, score, UNIX_TIMESTAMP(date_added) AS created_time, title, content, author\
                FROM documents where id >= $start and id<=$end and id <=(select max_doc_id from sph_counter where counter_id=1)
        sql_attr_uint           = group_id
        sql_attr_timestamp      = created_time
        sql_attr_string         = author
        sql_attr_string         = title
        sql_attr_float          = score
        ####sql_ranged_throttle = 0
        # document info query, ONLY for CLI search (ie. testing and debugging)
        # optional, default is empty
        # must contain $id macro and must fetch the document by that id
        ####sql_query_info              = SELECT * FROM documents WHERE id=$id
source delta:zhl
        sql_query_pre   = set names utf8
        sql_query       = SELECT id,group_id, score, UNIX_TIMESTAMP(date_added) as created_time,title,content,author FROM documents where \
                                 id>=$start and id <= $end and id > (SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
        sql_query_post_index  =replace into sph_counter select 1, max(id)  from documents
index zhl
        source                  = zhl
        path                    = /usr/local/coreseek/var/data/zhl
        mem_limit               = 128M
## searchd settings
        listen                  =
        log                     = /usr/local/coreseek/var/log/zhl_sphinx_searchd.log
        query_log               = /usr/local/coreseek/var/log/zhl_sphinx_query.log
        read_timeout            = 5
        ##client_timeout                = 300
        # 义子进程的最大数量
        max_children            = 30
        pid_file                = /usr/local/coreseek/var/log/zhl_sphinx_searchd.pid
        max_matches             = 1000
         #    #也就是说在任何时刻查询都可用,或者使用旧索引,或者使用新索引
        seamless_rotate         = 1
        #  #配置在启动时强制重新打开所有索引文件
        preopen_indexes         = 0
        # #设置索引轮转成功以后删除以.old为扩展名的索引拷贝
        unlink_old              = 1
        # #最大允许的包大小
        max_packet_size         = 8M
        # #最大允许的过滤器数
        max_filters             = 256
        max_filter_values       = 4096
        max_batch_queries       = 32
        workers                 = threads # for RT to work
        # SphinxQL compatibility mode (legacy columns and their names)
        # optional, default is 0 (SQL compliant syntax and result sets)
         compat_sphinxql_magics = 1

注意:上一篇在自定义中文分词(一)中,没有说明分词的编码,在这次的测试中同样犯了编码的错误,在自定义中文分词的时候,没有将他设置为utf8的编码,在配置文件中的  charset_type 中,如果设置utf8,指定分词路径之后,查询得到的结果是有误的,所以在自定义分词的情况下一定要注意编码,将其设置为utf8,那么在charset_type中就需要设置为zh_cn.utf8,希望大家注意。