今天接到一个新的业务需求,客户需要将指定的中文汉字转换成拼音(含:简拼、首全拼、尾全拼)。
1. 创建基础数据表
-- ---------------------------- -- Table structure for bst_wbjq -- ---------------------------- DROP TABLE IF EXISTS `bst_wbjq`; CREATE TABLE `bst_wbjq` ( `CHARACTOR` varchar(200) NOT NULL, `word` varchar(100) NOT NULL, `CODE` varchar(100) DEFAULT NULL, `STROKE` varchar(200) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- ---------------------------- -- Table structure for tbl_pinyin -- ---------------------------- DROP TABLE IF EXISTS `tbl_pinyin`; CREATE TABLE `tbl_pinyin` ( `SN` bigint(20) NOT NULL, `WORD` varchar(200) NOT NULL, `PY` varchar(200) NOT NULL, `PYLEVEL` int(11) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
2. 插入基础数据记录
-- ---------------------------- -- Records of bst_wbjq -- ---------------------------- truncate table bst_wbjq; INSERT INTO `bst_wbjq` VALUES ('禅', 'C', 'P', '^K'); INSERT INTO `bst_wbjq` VALUES ('讶', 'Y', 'Y', '2T'); INSERT INTO `bst_wbjq` VALUES ('焉', 'Y', 'G', 'Pa'); INSERT INTO `bst_wbjq` VALUES ('阉', 'Y', 'U', 'V2'); INSERT INTO `bst_wbjq` VALUES ('烟', 'Y', 'O', 'Ng'); INSERT INTO `bst_wbjq` VALUES ('淹', 'Y', 'I', 'V^'); INSERT INTO `bst_wbjq` VALUES ('圊', 'Q', 'L', 'Rz'); INSERT INTO `bst_wbjq` VALUES ('圉', 'Y', 'L', 'S?'); INSERT INTO `bst_wbjq` VALUES ('帔', 'P', 'M', ';~'); .... .... commit;
-- ---------------------------- -- Records of tbl_pinyin -- ---------------------------- truncate table tbl_pinyin; INSERT INTO `tbl_pinyin` VALUES ('33641', '胫', 'jing4', '0'); INSERT INTO `tbl_pinyin` VALUES ('30749', '箅', 'bi4', '0'); INSERT INTO `tbl_pinyin` VALUES ('30750', '箢', 'yuan1', '0'); INSERT INTO `tbl_pinyin` VALUES ('30751', '篁', 'huang2', '0'); INSERT INTO `tbl_pinyin` VALUES ('30752', '篦', 'bi4', '0'); INSERT INTO `tbl_pinyin` VALUES ('30753', '篾', 'mie4', '0'); INSERT INTO `tbl_pinyin` VALUES ('30754', '簋', 'gui3', '0'); INSERT INTO `tbl_pinyin` VALUES ('30755', '簪', 'zan1', '0'); INSERT INTO `tbl_pinyin` VALUES ('30756', '籀', 'zhou4', '0'); INSERT INTO `tbl_pinyin` VALUES ('30757', '舄', 'xi4', '0'); INSERT INTO `tbl_pinyin` VALUES ('30758', '舢', 'shan1', '0'); INSERT INTO `tbl_pinyin` VALUES ('30759', '舨', 'ban3', '0'); .... .... commit; |
3. 创建汉字转拼音函数(存储过程、函数)
3.1. 创建存储过程:PRC_GET_PYM
-- ---------------------------- -- procedure structure for PRC_GET_PYM -- ---------------------------- delimiter $$ drop procedure if exists PRC_GET_PYM; $$ create procedure PRC_GET_PYM(IN V_NAME varchar(256),OUT V_PYM varchar(256)) begin declare i int default 1; declare j int default 0; declare V_PINYIN_TEMP VARCHAR(70); declare V_NAME_TEMP varchar(200); declare V_NAME_SIN varchar(10); declare V_PINYIN_SIN varchar(10); declare v_counter1 int(8);
#替换各种特殊符号 select replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(V_NAME, 'A','A'),'B','B'),'C','C'),'D','D'),'E','E'),'F','F'),'G','G'),'H','H'), 'I','I'),'J','J'),'K','K'),'L','L'),'M','M'),'N','N'),'O','O'),'P','P'), 'Q','Q'),'R','R'),'S','S'),'T','T'),'U','U'),'V','V'),'W','W'),'X','X'), 'Y','Y'),'Z','Z'),'+',''),'-',''),'*',''),'/',''), '[',''),']',''),'{',''),'}',''),'(',''),')',''), '<',''),'>',''),'《',''),'》',''),'(',''),')',''),'"',''), '“',''),'”',''),'.',''),'。',''),'-',''),'-',''),'/',''), '/',''),' ',''),' ',''),'1','一'),'2','二'),'3','三'), '4','四'),'5','五'),'6','六'),'7','七'),'8','八'),'9','九'),'0','零') R3 into V_NAME_TEMP from dual;
#循环获得字符串拼音码 myloop:loop if V_NAME is null then leave myloop; end if;
select substr(V_NAME_TEMP, i, 1) into V_NAME_SIN from dual; set i=i+1; if V_NAME_SIN <> ' ' then select count(*) into v_counter1 from bst_wbjq where bst_wbjq.charactor=v_name_sin; if v_counter1 > 0 then select WORD into V_PINYIN_SIN from bst_wbjq where bst_wbjq.CHARACTOR=V_NAME_SIN limit 1; select concat_ws('',V_PINYIN_TEMP,V_PINYIN_SIN) into V_PINYIN_TEMP from dual; end if; end if; select char_length(V_NAME) into j from dual; if i > j then leave myloop; end if; end loop; #截取32位长度字符 if char_length(V_PINYIN_TEMP) > 32 then select substr(V_PINYIN_TEMP, 1, 32) into V_PYM from dual; else select V_PINYIN_TEMP into V_PYM from dual; end if; end; $$ delimiter ; |
3.2. 创建存储过程:SP_PINYIN
-- ---------------------------- -- procedure structure for SP_PINYIN -- ---------------------------- delimiter $$ drop procedure if exists SP_PINYIN; $$ create procedure SP_PINYIN(IN hanzi varchar(256),OUT pinyin varchar(256)) begin declare aword varchar(200); declare aresult varchar(200); declare temp1 varchar(20); declare len int default 0; declare point int default 1; declare charword varchar(20); declare charlen int default 1; #定义游标标志变量 declare done int default false; #定义游标 declare cur_pinyin cursor for select PY from TBL_PINYIN where word=substr(aword, point, charlen); #指定游标循环结束时的返回值 declare continue HANDLER for not found set done=true; select ltrim(rtrim(hanzi)) into aword from dual; select char_length(aword) into len from dual; #<<LABEL1>> while point <= len do select '' into temp1 from dual; select substr(aword, point, 1) into charword from dual; if (charword is not null and charword != ' ') then select concat_ws(' ',aresult,charword) into aresult from dual; else select 2 into charlen from dual; end if; #打开游标 open cur_pinyin; #开始循环处理游标里的数据 read_loop:loop #获得游标当前指向的一条数据 fetch cur_pinyin into temp1; #判断游标的循环是否结束 if done then leave read_loop; end if; end loop; #结束游标循环 #关闭游标 close cur_pinyin; if (point = 1) then set aresult = temp1; else select concat_ws(' ',aresult,temp1) into aresult from dual; end if; select point+charlen into point from dual; end while; #输出结果 select aresult into pinyin from dual; end; $$ delimiter ; |
3.3. 创建函数:to_pinyin
-- ---------------------------- -- function structure for to_pinyin -- ---------------------------- delimiter $$ drop function if exists to_pinyin; $$ create function to_pinyin(v_hanzi varchar(256),v_type int) returns varchar(256) begin declare strTemp VARCHAR(200); declare strResult VARCHAR(200); declare strHanzi VARCHAR(200); declare strTemp1 VARCHAR(200); declare v_subb VARCHAR(100); declare V_NAME_TEMP VARCHAR(200); declare v_pinyin VARCHAR(200); #替换各种特殊符号 select replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( replace(replace(replace(replace(replace(replace(replace(replace(v_hanzi, 'A','A'),'B','B'),'C','C'),'D','D'),'E','E'),'F','F'),'G','G'),'H','H'), 'I','I'),'J','J'),'K','K'),'L','L'),'M','M'),'N','N'),'O','O'),'P','P'), 'Q','Q'),'R','R'),'S','S'),'T','T'),'U','U'),'V','V'),'W','W'),'X','X'), 'Y','Y'),'Z','Z'),'+',''),'-',''),'*',''),'/',''), '[',''),']',''),'{',''),'}',''),'(',''),')',''), '<',''),'>',''),'《',''),'》',''),'(',''),')',''),'"',''), '“',''),'”',''),'.',''),'。',''),'-',''),'-',''),'/',''), '/',''),' ',''),' ',''),'1','一'),'2','二'),'3','三'), '4','四'),'5','五'),'6','六'),'7','七'),'8','八'),'9','九'),'0','零') R3 into V_NAME_TEMP from dual; if v_type = '1' then #简拼 set @V_NAME_TEMP=V_NAME_TEMP; set @strResult=null; call Prc_Get_Pym(@V_NAME_TEMP, @strResult); elseif v_type = '2' then #尾全拼 #判断结尾字符是否是中文 select ltrim(rtrim(substr(V_NAME_TEMP, char_length(V_NAME_TEMP), char_length(V_NAME_TEMP)))) into v_subb from dual; if v_subb is null then #如果不是中文则直接生成开口码 set @V_NAME_TEMP=V_NAME_TEMP; set @strResult=null; call Prc_Get_Pym(@V_NAME_TEMP, @strResult); else select substr(V_NAME_TEMP, 1, char_length(V_NAME_TEMP)-1) into strHanzi from dual; set @strHanzi=strHanzi; set @strTemp1=null; call Prc_Get_Pym(@strHanzi, @strTemp1); select substr(V_NAME_TEMP, char_length(V_NAME_TEMP), char_length(V_NAME_TEMP)) into strHanzi from dual; set @strHanzi=strHanzi; set @strTemp=null; call Sp_Pinyin(@strHanzi, @strTemp); select substr(@strTemp, 1, char_length(@strTemp) - 1) into @strResult from dual; select concat_ws('',@strTemp1,@strResult) into @strResult from dual; end if; elseif v_type = '3' then #首全拼 #判断开头字符是否是中文 select ltrim(rtrim(substr(V_NAME_TEMP, 1, 1))) into v_subb from dual; if v_subb is null then #如果不是中文则直接生成开口码 set @V_NAME_TEMP=V_NAME_TEMP; set @strResult=null; call Prc_Get_Pym(@V_NAME_TEMP, @strResult); else select substr(V_NAME_TEMP, 2, char_length(V_NAME_TEMP)) into strHanzi from dual; set @strHanzi=strHanzi; set @strResult=null; call Prc_Get_Pym(@strHanzi, @strResult); select substr(V_NAME_TEMP, 1, 1) into strHanzi from dual; set @strHanzi=strHanzi; set @strTemp=null; call Sp_Pinyin(@strHanzi, @strTemp); select concat_ws('',substr(@strTemp, 1, char_length(@strTemp) - 1),@strResult) into @strResult from dual; end if; end if; set v_pinyin=UPPER(@strResult); return v_pinyin; end; $$ delimiter ; |
4. 使用方法/案例