import math from typing import Callable, List, Tuple
import math from typing import Callable, List, Tuple def fuzzy_match_and_score( count: int, query: str, pred: Callable[[int], str] ) -> List[Tuple[int, int]]: """模糊匹配并评分 :param count: 候选字符串数量 :param query: cha询字符串 :param pred: 获取候选字符串的函数,接受索引返回字符串 :return: 按得分降序排列的(索引, 得分)列表 """ def fuzzy_match_score(q: str, t: str) -> int: """基于原始C++算法的模糊匹配评分""" score = 0 last_match = -1 qi = ti = 0 q_len, t_len = len(q), len(t) q = q.lower() t = t.lower() while qi < q_len and ti < t_len: if q[qi] == t[ti]: score += 10 # 基础匹配分 # 位置相关奖励 if ti == 0: if qi == 0: score += 50 # 双首字母匹配 else: score += 30 # 目标首字母匹配 elif qi == 0: score += 20 # cha询首字母出现在目标中 # 单词边界检测 if ti > 0 and ( t[ti - 1] in ('_', ' ', '-') or (t[ti - 1].islower() and t[ti].isupper()) ): score += 25 # 边界匹配奖励 # 连续匹配检测 if last_match >= 0: gap = ti - last_match - 1 if gap == 0: score += 25 # 连续匹配奖励 if last_match > 0 and ti - last_match == 1: score += 10 else: penalty = min(int(math.pow(gap, 1.5)), 15) score -= penalty # 跳跃惩罚 last_match = ti qi += 1 ti += 1 if qi == q_len: # 长度匹配系数 (0.5-1.0) length_ratio = q_len / t_len score = int(score * (0.5 + 0.5 * length_ratio)) # 位置系数 (0.7-1.0) position_ratio = 1.0 - (ti / t_len) score = int(score * (0.7 + 0.3 * position_ratio)) return max(score, 1) return 0 results = [] for i in range(count): target = pred(i) if score := fuzzy_match_score(query, target): results.append((i, score)) # 按得分降序,同分按索引升序 results.sort(key=lambda x: (-x[1], x[0])) return results #使用示例 # 候选数据源 candidates = [ "MB_YESNO", "MB_ICONQUESTION", "MB_ABORTRETRYIGNORE", "IDOK", "IDCANCEL", "MESSAGEBOX_YESNO" ] # cha询字符串 query = "m_no" # 执行模糊匹配 results = fuzzy_match_and_score( count=len(candidates), query=query, pred=lambda i: candidates<i> ) print(f"总共匹配到 {len(results)} 项:") for index, score in results: print(f"\t{candidates[index]}(得分: {score})") # print(results) #长度匹配因素 = cha询文本长度 / 目标文本长度 #匹配位置因素 = 1.0 - (目标文本cha询位置 / 目标文本长度) #计算公式: (总分 (0.5 + 0.5 长度匹配因素)) (0.7 + 0.3 匹配位置因素) #总分: 10+50+10-1+10-5+10+25+10=119 #长度匹配因素: 4 / 8 = 0.5 #匹配位置因素: 1.0 - (8 / 8) = 0.0 #最终得分:套入长度/位置因素, 总得分62 #1 m 0 基础+10, 双首字母匹配+50 #2 _ 2 基础+10, 跳跃1个字符惩罚-1 #3 n 6 基础+10, 跳跃3个字符惩罚-5 #4 o 7 基础+10, 连续匹配+25, 二次连续+10 # 输出:[(0, 75), (1, 61), (3, 44), (4, 1)]</i>
本文由青舟模板网发布,如若转载,请注明出处:http://qingzo.com/jishu/20250504150105.html