sphinx原理
- 先创建数据源。
- 根据数据源创建索引,使用分词技术。
- php把查询的关键词给Sphinx服务器,Sphinx根据关键词查找到关键字在mysql表里面的记录的id.Sphinx把id返回给php查询端。
- php根据返回的id,查询mysql服务器。
安装gcc和gcc-c++
yum install gcc
yum install gcc-c++ libstdc++-devel
安装sphinx-for-chinese
下载并放在/usr/local目录下并解压
tar -zxvf sphinx-for-chinese.tar.gz
./configure --prefix=/usr/local/sphinx-for-chinese --with-mysql
make
make install
生成字典
将文件夹中的 xdict_1.1.txt(下载xdict_1.1.tar.gz)文件丢到 /usr/local/sphinxforchinese目录下
cd /usr/local/sphinx-for-chinese
/usr/local/sphinxforchinese/bin/mkdict xdict_1.1.txt etc/xdict
若 /usr/local/sphinx-for-chinese/etc/ 文件夹中有xdic代表成功
重建索引
创建一个sphinx.cof文件放到 /usr/local/sphinx-for-chinese/etc 在其中配置好相关信息
source main{
type = mysql
sql_host = 127.0.0.1
sql_user = ********** #数据库用户名
sql_pass = ********** #数据库密码
sql_db = ********** #使用数据库
sql_port = 3306
sql_sock=/tmp/mysql.sock #mysql文件
sql_query_pre = SET NAMES utf8
sql_query = \
SELECT id,ti FROM tiku
}
source src1throttled : main{
}
index tiku{
source=main
path=/usr/local/sphinx-for-chinese/var/data/tiku
charset_type = utf-8
chinese_dictionary = /usr/local/sphinx-for-chinese/etc/xdict
}
indexer{
mem_limit = 256M
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /usr/local/sphinx-for-chinese/var/log/searchd.log
query_log = /usr/local/sphinx-for-chinese/var/log/query.log
read_timeout = 5
max_children = 30
pid_file = /usr/local/sphinx-for-chinese/log/searchd.pid
seamless_rotate= 1
preopen_indexes= 1
unlink_old = 1
workers = threads # for RT to work
binlog_path = /usr/local/sphinx-fo-rchinese/var/data
}
配置说明:
# sphinx基本配置
# 索引源
source goods_src
{
# 数据库类型
type = mysql
# MySQL主机IP
sql_host = localhost
# MySQL用户名
sql_user = sphinxuser
# MySQL密码
sql_pass = sphinxpass
# MySQL数据库
sql_db = sphinx
# MySQL端口(如果防火墙有限制,请开启)
sql_port= 3306
# MySQL sock文件设置(默认为/tmp/mysql.sock,如果不一样,请指定)
sql_sock = /tmp/mysql.sock
# MySQL检索编码(数据库非utf8的很可能检索不到)
sql_query_pre = SET NAMES UTF8
# 获取数据的SQL语句
sql_query = SELECT goods_id,goods_id AS goods_id_new,goods_name,goods_color,goods_name AS goods_name_search,goods_color AS goods_color_search From goods_test
# 以下是用来过滤或条件查询的属性(以下字段显示在查询结果中,不在下面的字段就是搜索时要搜索的字段,如SQL语句中的goods_color_search,goods_name_search)
# 无符号整型
#goods_id为主键,如果加在这里在生成索引的时候会报attribute 'goods_id' not found,这里用goods_id_new来变通
sql_attr_uint = goods_id_new
# 字符串类型
sql_attr_string = goods_name
sql_attr_string = goods_color
# 用于命令界面端(CLI)调用的测试(一般来说不需要)
#sql_query_info = SELECT * FROM goods_test Where goods_id = $goods_id;
}
# 索引
index goods
{
# 索引源声明
source = goods_src
# 索引文件的存放位置
path = /usr/local/sphinx-for-chinese-1.10.1/var/data/goods
# 文件存储模式(默认为extern)
docinfo = extern
# 缓存数据内存锁定
mlock = 0
# 马氏形态学(对中文无效)
morphology = none
# 索引词最小长度
min_word_len = 1
# 数据编码(设置成utf8才能索引中文)
charset_type = utf-8
# 中文分词词典
chinese_dictionary = /usr/local/sphinx-for-chinese-1.10.1/etc/xdict_1.1
# 最小索引前缀长度
min_prefix_len = 0
# 最小索引中缀长度
min_infix_len = 1
# 对于非字母型数据的长度切割(for CJK indexing)
ngram_len = 1
# 对否对去除用户输入查询内容的html标签
html_strip = 0
}
# 索引器设置
indexer
{
# 内存大小限制 默认是 32M, 最大 2047M, 推荐为 256M 到 1024M之间
mem_limit = 256M
}
# sphinx服务进程search的相关配置
searchd
{
# 监测端口及形式,一下几种均可,默认为本机9312端口
# listen = 127.0.0.1
# listen = 192.168.0.1:9312
# listen = 9312
# listen = /var/run/searchd.sock
# search进程的日志路径
log = /usr/local/sphinx-for-chinese-1.10.1/var/log/searchd.log
# 查询日志地址
query_log = /usr/local/sphinx-for-chinese-1.10.1/var/log/query.log
# 读取超时时间
read_timeout = 5
# 请求超时市时间
client_timeout = 300
# searche进程的最大运行数
max_children = 30
# 进程ID文件
pid_file = /usr/local/sphinx-for-chinese-1.10.1/var/log/searchd.pid
# 最大的查询结果返回数
max_matches = 1000
# 是否支持无缝切换(做增量索引时需要)
seamless_rotate = 1
# 在启动运行时是否提前加载所有索引文件
preopen_indexes = 0
# 是否释放旧的索引文件
unlink_old = 1
# MVA跟新池大小(默认为1M)
mva_updates_pool = 1M
# 最大允许的网络包大小(默认8M)
max_packet_size = 8M
# 每个查询最大允许的过滤器数量(默认256)
max_filters = 256
#每个过滤器最大允许的值的个数(默认4096)
max_filter_values = 4096
# 每个组的最大查询数(默认为32)
max_batch_queries = 32
}
# Sphinx配置文件结束
/usr/local/sphinx-for-chinese/bin/indexer -c /usr/local/sphinx-for-chinese/etc/sphinx.conf --all --rotate
启动
/usr/local/sphinx-for-chinese/bin/searchd -c /usr/local/sphinx-for-chinese/etc/sphinx.conf
外部API调用(PHP调用)
两种方法,一种安装扩展,另一种引入api文件夹下的 sphinxapi.php 文件即可
<?php
error_reporting(0);
header("Content-type: text/html; charset=utf-8");
require_once 'sphinxapi.php';
$keyword = $_GET['w'];
$sphinx = new SphinxClient();
$sphinx->SetServer('localhost',9312);
$sphinx->setMatchMode(SPH_MATCH_ANY);//匹配模式 SPH_MATCH_ALL:完全匹配
$res = $sphinx->query($keyword,'*');//*表示在所有索引里面进行搜索
$ids = $res['matches'];
if($ids){
$id = array_keys($ids);
//$id = implode(',',$id);
$con=mysqli_connect("localhost","*************","************","*************");
$sql="select ti from tiku where id=$id[0] ";
$result = mysqli_query($con,$sql);
$row = mysqli_fetch_row($result);
echo $row[0];
mysqli_close($con);
}else{
echo "没搜到答案鸭,去掉标点选项字母等,使用关键词再试一下吧!";
}
?>
添加一个定时任务
#!/bin/bash
PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:~/bin
export PATH
/usr/local/sphinxforchinese/bin/indexer -c /usr/local/sphinxforchinese/etc/sphinx.conf --all
/usr/local/sphinxforchinese/bin/searchd -c /usr/local/sphinxforchinese/etc/sphinx.conf --stop
/usr/local/sphinxforchinese/bin/searchd -c /usr/local/sphinxforchinese/etc/sphinx.conf
echo "----------------------------------------------------------------------------"
endDate=`date +"%Y-%m-%d %H:%M:%S"`
echo "★[$endDate] Successful"
echo "----------------------------------------------------------------------------"