python 文本字符串模糊匹配算法+打分机制代码分享

import math from typing import Callable, List, Tuple
import math
from typing import Callable, List, Tuple
 
 
def fuzzy_match_and_score(
        count: int,
        query: str,
        pred: Callable[[int], str]
) -> List[Tuple[int, int]]:
    """模糊匹配并评分
 
    :param count: 候选字符串数量
    :param query: cha询字符串
    :param pred: 获取候选字符串的函数，接受索引返回字符串
    :return: 按得分降序排列的(索引, 得分)列表
    """
 
    def fuzzy_match_score(q: str, t: str) -> int:
        """基于原始C++算法的模糊匹配评分"""
        score = 0
        last_match = -1
        qi = ti = 0
        q_len, t_len = len(q), len(t)
        q = q.lower()
        t = t.lower()
 
        while qi < q_len and ti < t_len:
            if q[qi] == t[ti]:
                score += 10  # 基础匹配分
 
                # 位置相关奖励
                if ti == 0:
                    if qi == 0:
                        score += 50  # 双首字母匹配
                    else:
                        score += 30  # 目标首字母匹配
                elif qi == 0:
                    score += 20  # cha询首字母出现在目标中
 
                # 单词边界检测
                if ti > 0 and (
                        t[ti - 1] in ('_', ' ', '-') or
                        (t[ti - 1].islower() and t[ti].isupper())
                ):
                    score += 25  # 边界匹配奖励
 
                # 连续匹配检测
                if last_match >= 0:
                    gap = ti - last_match - 1
                    if gap == 0:
                        score += 25  # 连续匹配奖励
                        if last_match > 0 and ti - last_match == 1:
                            score += 10
                    else:
                        penalty = min(int(math.pow(gap, 1.5)), 15)
                        score -= penalty  # 跳跃惩罚
 
                last_match = ti
                qi += 1
            ti += 1
 
        if qi == q_len:
            # 长度匹配系数 (0.5-1.0)
            length_ratio = q_len / t_len
            score = int(score * (0.5 + 0.5 * length_ratio))
 
            # 位置系数 (0.7-1.0)
            position_ratio = 1.0 - (ti / t_len)
            score = int(score * (0.7 + 0.3 * position_ratio))
 
            return max(score, 1)
        return 0
 
    results = []
    for i in range(count):
        target = pred(i)
        if score := fuzzy_match_score(query, target):
            results.append((i, score))
 
    # 按得分降序，同分按索引升序
    results.sort(key=lambda x: (-x[1], x[0]))
    return results
 
 
#使用示例
# 候选数据源
candidates = [
    "MB_YESNO",
    "MB_ICONQUESTION",
    "MB_ABORTRETRYIGNORE",
    "IDOK",
    "IDCANCEL",
    "MESSAGEBOX_YESNO"
]
 
# cha询字符串
query = "m_no"
 
# 执行模糊匹配
results = fuzzy_match_and_score(
    count=len(candidates),
    query=query,
    pred=lambda i: candidates<i>
)
print(f"总共匹配到 {len(results)} 项：")
for index, score in results:
    print(f"\t{candidates[index]}（得分: {score}）")
# print(results)
#长度匹配因素 = cha询文本长度 / 目标文本长度
#匹配位置因素 = 1.0 - (目标文本cha询位置 / 目标文本长度)
#计算公式: (总分 (0.5 + 0.5 长度匹配因素)) (0.7 + 0.3 匹配位置因素)
#总分: 10+50+10-1+10-5+10+25+10=119
#长度匹配因素: 4 / 8 = 0.5
#匹配位置因素: 1.0 - (8 / 8) = 0.0
#最终得分：套入长度/位置因素, 总得分62
#1        m        0        基础+10, 双首字母匹配+50
#2        _        2        基础+10, 跳跃1个字符惩罚-1
#3        n        6        基础+10, 跳跃3个字符惩罚-5
#4        o        7        基础+10, 连续匹配+25, 二次连续+10
# 输出：[(0, 75), (1, 61), (3, 44), (4, 1)]</i>
本文由青舟模板网发布，如若转载，请注明出处：http://qingzo.com/jishu/20250504150105.html