I. Introduction
In the actual development process, fuzzy queries are often used. Fuzzy searches are based on a certain keyword, usually name like '%123%'. But what if the user does not remember a certain keyword and only knows the first letter? What about fuzzy queries? For example, if you query name like '%I am Chinese%', how can you perform a fuzzy search based on the initials wszgr?
2. Create a custom function
Because the system's built-in like query can no longer satisfy the fuzzy search in this scenario, a custom function needs to be used here to convert the Chinese characters in a certain field in the database into the first letter, and then carry out fuzzy search based on the letters.
Create a custom function CnFirstChar to convert Chinese strings into first letters.
CREATE OR REPLACE FUNCTION CnFirstChar(s character varying)
RETURNS character varying AS
$BODY$
declare
retval character varying;
c character varying;
l integer;
b bytea;
w integer;
begin
l=length(s);
retval='';
while l>0 loop
c=left(s,1);
b=convert_to(c,'GB18030')::bytea;
if get_byte(b,0)<127 then
retval=retval || upper(c);
elsif length(b)=2 then
begin
w=get_byte(b,0)*256+get_byte(b,1);
--汉字GBK编码按拼音排序,按字符数来查找,基于概率来说,效率应该比这个强:)
if w between 45217 and 45252 then --"A";45217;45252;35
retval=retval || 'a';
elsif w between 45253 and 45760 then --"B";45253;45760;507
retval=retval || 'b';
elsif w between 45761 and 46317 then --"C";45761;46317;556
retval=retval || 'c';
elsif w between 46318 and 46825 then --"D";46318;46825;507
retval=retval || 'd';
elsif w between 46826 and 47009 then--"E";46826;47009;183
retval=retval || 'e';
elsif w between 47010 and 47296 then--"F";47010;47296;286
retval=retval || 'f';
elsif w between 47297 and 47613 then --"G";47297;47613;316
retval=retval || 'g';
elsif w between 47614 and 48118 then --"H";47614;48118;504
retval=retval || 'h';
elsif w between 48119 and 49061 then --"J";48119;49061;942
retval=retval || 'j';
elsif w between 49062 and 49323 then--"K";49062;49323;261
retval=retval || 'k';
elsif w between 49324 and 49895 then --"L";49324;49895;571
retval=retval || 'l';
elsif w between 49896 and 50370 then --"M";49896;50370;474
retval=retval || 'm';
elsif w between 50371 and 50613 then --"N";50371;50613;242
retval=retval || 'n';
elsif w between 50614 and 50621 then --"O";50614;50621;7
retval=retval || 'o';
elsif w between 50622 and 50905 then--"P";50622;50905;283
retval=retval || 'p';
elsif w between 50906 and 51386 then --"Q";50906;51386;480
retval=retval || 'q';
elsif w between 51387 and 51445 then--"R";51387;51445;58
retval=retval || 'r';
elsif w between 51446 and 52216 then --"S";51446;52217;
retval=retval || 's';
elsif w between 52218 and 52697 then --"T";52218;52697;479
retval=retval || 't';
elsif w between 52698 and 52979 then--"W";52698;52979;281
retval=retval || 'w';
elsif w between 52980 and 53688 then --"X";52980;53688;
retval=retval || 'x';
elsif w between 53689 and 54480 then --"Y";53689;54480;791
retval=retval || 'y';
elsif w between 54481 and 55289 then --"Z";54481;55289;808
retval=retval || 'z';
end if;
end;
end if;
s=substring(s,2,l-1);
l=l-1;
end loop;
return retval;
end;
$BODY$
LANGUAGE plpgsql IMMUTABLE;
3. SQL fuzzy search uses custom functions
The above custom functions have been created in the postgresql database and can be used directly during SQL queries.
select * from user where CnFirstChar(user_name) like '%wszgr%';