设为首页收藏本站
查看: 140|回复: 0

[PHP] Rmm分词算法代码片段-PHP源码

[复制链接]

论坛元老

Rank: 6Rank: 6

积分
34274
主题
17031
UID
1347
M币
67
贡献
17176

  • 发表于 2018-7-16 21:17:00 | 显示全部楼层 |阅读模式
    function SplitRMM($str=""){
    if($str!="") $this->SetSource(trim($str));
    if($this->SourceString=="") return "";
    //对文本进行粗分
    $this->SourceString = $this->ReviseString($this->SourceString);
    //对特定文本进行分离
    $spwords = explode(" ",$this->SourceString);
    $spLen = count($spwords);
    $spc = $this->SplitChar;
    for($i=($spLen-1);$i>=0;$i--){
    if(trim($spwords[$i])=="") continue;
    if($this->NotGBK($spwords[$i])){
    if(ereg("[^0-9.+-]",$spwords[$i]))
    { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
    else
    {
    $nextword = "";
    @$nextword = substr($this->ResultString,0,strpos($this->ResultString," "));
    if(ereg("^".$this->CommonUnit,$nextword)){
    $this->ResultString = $spwords[$i].$this->ResultString;
    }else{
    $this->ResultString = $spwords[$i].$spc.$this->ResultString;
    }
    }
    }
    else
    {
    $c = $spwords[$i][0].$spwords[$i][1];
    $n = hexdec(bin2hex($c));
    if($c=="《") //书名
    { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
    else if($n>0xA13F && $n ResultString = $spwords[$i].$spc.$this->ResultString; }
    else //正常短句
    {
    if(strlen($spwords[$i]) SplitLen)
    {
    //如果结束符为特殊分割词,分离处理
    if(ereg($this->EspecialChar."$",$spwords[$i],$regs)){
    $spwords[$i] = ereg_replace($regs[0]."$","",$spwords[$i]).$spc.$regs[0];
    }
    //是否为常用单位
    if(!ereg("^".$this->CommonUnit,$spwords[$i]) || $i==0){
    $this->ResultString = $spwords[$i].$spc.$this->ResultString;
    }else{
    $this->ResultString = $spwords[$i-1].$spwords[$i].$spc.$this->ResultString;
    $i--;
    }
    }
    else
    {
    $this->ResultString = $this->RunRMM($spwords[$i]).$spc.$this->ResultString;
    }
    }
    }
    }
    return $this->ResultString;
    }
    [/td] [/tr] [/table]
    回复

    使用道具 举报

    您需要登录后才可以回帖 登录 | 立即注册

    本版积分规则

    在我站开通SVIP可同时获得17个站点VIP资源 立即登录 立即注册
    快速回复 返回顶部 返回列表