sphinx做搜题的引擎

sphinx原理

  • 先创建数据源。
  • 根据数据源创建索引,使用分词技术。
  • php把查询的关键词给Sphinx服务器,Sphinx根据关键词查找到关键字在mysql表里面的记录的id.Sphinx把id返回给php查询端。
  • php根据返回的id,查询mysql服务器。

安装gcc和gcc-c++

yum install gcc
yum install gcc-c++ libstdc++-devel

安装sphinx-for-chinese

下载sphinx-for-chinese.tar.gz

下载并放在/usr/local目录下并解压

tar -zxvf sphinx-for-chinese.tar.gz
./configure  --prefix=/usr/local/sphinx-for-chinese  --with-mysql
make
make install

生成字典

将文件夹中的 xdict_1.1.txt(下载xdict_1.1.tar.gz)文件丢到 /usr/local/sphinxforchinese目录下

cd /usr/local/sphinx-for-chinese
/usr/local/sphinxforchinese/bin/mkdict  xdict_1.1.txt  etc/xdict

若 /usr/local/sphinx-for-chinese/etc/ 文件夹中有xdic代表成功

重建索引

创建一个sphinx.cof文件放到 /usr/local/sphinx-for-chinese/etc 在其中配置好相关信息

source main{
	type            = mysql
    sql_host        = 127.0.0.1
    sql_user        = **********       #数据库用户名
    sql_pass        = **********   #数据库密码
    sql_db          = **********       #使用数据库
    sql_port        = 3306
    sql_sock=/tmp/mysql.sock       #mysql文件
    sql_query_pre   = SET NAMES utf8 
    sql_query       = \
	SELECT id,ti FROM tiku
}
 

source src1throttled : main{
 
}
 

index tiku{
source=main
path=/usr/local/sphinx-for-chinese/var/data/tiku
charset_type    = utf-8
chinese_dictionary = /usr/local/sphinx-for-chinese/etc/xdict
}

indexer{
mem_limit = 256M
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /usr/local/sphinx-for-chinese/var/log/searchd.log
query_log = /usr/local/sphinx-for-chinese/var/log/query.log
read_timeout = 5
max_children = 30
pid_file = /usr/local/sphinx-for-chinese/log/searchd.pid
seamless_rotate= 1
preopen_indexes= 1
unlink_old = 1
workers = threads # for RT to work
binlog_path = /usr/local/sphinx-fo-rchinese/var/data

}

配置说明:

# sphinx基本配置 
# 索引源 
source goods_src 
{ 
    # 数据库类型 
    type = mysql 
    # MySQL主机IP 
    sql_host = localhost 
    # MySQL用户名 
    sql_user = sphinxuser 
    # MySQL密码 
    sql_pass = sphinxpass 
    # MySQL数据库 
    sql_db = sphinx 
    # MySQL端口(如果防火墙有限制,请开启) 
    sql_port= 3306 
    # MySQL sock文件设置(默认为/tmp/mysql.sock,如果不一样,请指定) 
    sql_sock = /tmp/mysql.sock 
    # MySQL检索编码(数据库非utf8的很可能检索不到) 
    sql_query_pre = SET NAMES UTF8 
    # 获取数据的SQL语句 
    sql_query = SELECT goods_id,goods_id AS goods_id_new,goods_name,goods_color,goods_name AS goods_name_search,goods_color AS goods_color_search From goods_test 
    # 以下是用来过滤或条件查询的属性(以下字段显示在查询结果中,不在下面的字段就是搜索时要搜索的字段,如SQL语句中的goods_color_search,goods_name_search) 
    # 无符号整型 
    #goods_id为主键,如果加在这里在生成索引的时候会报attribute 'goods_id' not found,这里用goods_id_new来变通 
    sql_attr_uint = goods_id_new 
    # 字符串类型 
    sql_attr_string = goods_name 
    sql_attr_string = goods_color 
    # 用于命令界面端(CLI)调用的测试(一般来说不需要) 
    #sql_query_info = SELECT * FROM goods_test Where goods_id = $goods_id; 
} 
# 索引 
index goods 
{ 
    # 索引源声明 
    source = goods_src 
    # 索引文件的存放位置 
    path = /usr/local/sphinx-for-chinese-1.10.1/var/data/goods
    # 文件存储模式(默认为extern) 
    docinfo = extern 
    # 缓存数据内存锁定 
    mlock = 0 
    # 马氏形态学(对中文无效) 
    morphology = none 
    # 索引词最小长度 
    min_word_len = 1 
    # 数据编码(设置成utf8才能索引中文) 
    charset_type = utf-8 
    # 中文分词词典 
    chinese_dictionary = /usr/local/sphinx-for-chinese-1.10.1/etc/xdict_1.1 
    # 最小索引前缀长度 
    min_prefix_len = 0 
    # 最小索引中缀长度 
    min_infix_len = 1 
    # 对于非字母型数据的长度切割(for CJK indexing) 
    ngram_len = 1 
    # 对否对去除用户输入查询内容的html标签 
    html_strip = 0 
} 
# 索引器设置 
indexer 
{ 
    # 内存大小限制 默认是 32M, 最大 2047M, 推荐为 256M 到 1024M之间 
    mem_limit = 256M 
} 
# sphinx服务进程search的相关配置 
searchd 
{ 
    # 监测端口及形式,一下几种均可,默认为本机9312端口 
    # listen = 127.0.0.1 
    # listen = 192.168.0.1:9312 
    # listen = 9312 
    # listen = /var/run/searchd.sock 
    # search进程的日志路径 
    log = /usr/local/sphinx-for-chinese-1.10.1/var/log/searchd.log 
    # 查询日志地址 
    query_log = /usr/local/sphinx-for-chinese-1.10.1/var/log/query.log 
    # 读取超时时间 
    read_timeout = 5 
    # 请求超时市时间 
    client_timeout = 300 
    # searche进程的最大运行数 
    max_children = 30 
    # 进程ID文件 
    pid_file = /usr/local/sphinx-for-chinese-1.10.1/var/log/searchd.pid 
    # 最大的查询结果返回数 
    max_matches = 1000 
    # 是否支持无缝切换(做增量索引时需要) 
    seamless_rotate = 1 
    # 在启动运行时是否提前加载所有索引文件 
    preopen_indexes = 0 
    # 是否释放旧的索引文件 
    unlink_old = 1 
    # MVA跟新池大小(默认为1M) 
    mva_updates_pool = 1M 
    # 最大允许的网络包大小(默认8M) 
    max_packet_size = 8M 
    # 每个查询最大允许的过滤器数量(默认256) 
    max_filters = 256 
    #每个过滤器最大允许的值的个数(默认4096) 
    max_filter_values = 4096 
    # 每个组的最大查询数(默认为32) 
    max_batch_queries = 32 
} 
# Sphinx配置文件结束
/usr/local/sphinx-for-chinese/bin/indexer  -c  /usr/local/sphinx-for-chinese/etc/sphinx.conf  --all  --rotate

启动

/usr/local/sphinx-for-chinese/bin/searchd  -c  /usr/local/sphinx-for-chinese/etc/sphinx.conf

外部API调用(PHP调用)

两种方法,一种安装扩展,另一种引入api文件夹下的 sphinxapi.php 文件即可

 <?php

error_reporting(0);
header("Content-type: text/html; charset=utf-8");

require_once 'sphinxapi.php';

$keyword = $_GET['w'];

$sphinx = new SphinxClient();

$sphinx->SetServer('localhost',9312);

$sphinx->setMatchMode(SPH_MATCH_ANY);//匹配模式 SPH_MATCH_ALL:完全匹配

$res = $sphinx->query($keyword,'*');//*表示在所有索引里面进行搜索

$ids   = $res['matches'];

if($ids){
$id = array_keys($ids);
//$id = implode(',',$id);
  
$con=mysqli_connect("localhost","*************","************","*************");


$sql="select ti from tiku where id=$id[0] ";


$result = mysqli_query($con,$sql);
  
$row = mysqli_fetch_row($result);

echo $row[0];
mysqli_close($con);
}else{
  echo "没搜到答案鸭,去掉标点选项字母等,使用关键词再试一下吧!";
}

?>

添加一个定时任务

#!/bin/bash
PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:~/bin
export PATH
/usr/local/sphinxforchinese/bin/indexer  -c  /usr/local/sphinxforchinese/etc/sphinx.conf  --all  
/usr/local/sphinxforchinese/bin/searchd  -c  /usr/local/sphinxforchinese/etc/sphinx.conf  --stop
/usr/local/sphinxforchinese/bin/searchd  -c  /usr/local/sphinxforchinese/etc/sphinx.conf
echo "----------------------------------------------------------------------------"
endDate=`date +"%Y-%m-%d %H:%M:%S"`
echo "★[$endDate] Successful"
echo "----------------------------------------------------------------------------"
发布了8 篇原创文章 · 获赞 12 · 访问量 6040

猜你喜欢

转载自blog.csdn.net/weixin_43631579/article/details/105240003