形態素解析

なんちゃって形態素解析

<?php
class MorphologicalAnalysis
{
  private $yahooApplicationId_ = null;

  function setYahooApplicationID($id)
  {
    $this->yahooApplicationId_ = $id;
  }

  function parse($text)
  {
    $url = 'http://jlp.yahooapis.jp/MAService/V1/parse' 
      . '?appid=' . $this->yahooApplicationId_
      . '&results=ma'
      . '&response=surface,reading,pos,baseform,feature'
      . '&sentence=' . urlencode($text);

    $contents = file_get_contents($url);

    $document = new DOMDocument();
    $document->loadXML($contents);

    $maResult = $document->getElementsByTagName('ma_result');
    if ($maResult->length != 1) {
      return false;
    }
    $maResult = $maResult->item(0);
    $maResult = $this->__parseMaResult($maResult);

    return $maResult;
  }

  function __parseMaResult($maResult)
  {
    $count = 0;
    $list = array();

    $item = $maResult->firstChild;
    while (!empty($item)) {
      if ($item->nodeName == 'total_count') {
	$count = $item->textContent;
      }

      if ($item->nodeName == 'word_list') {
	$word = $item->firstChild;
	while (!empty($word)) {
	  $entry = array();
	  $node = $word->firstChild;
	  while (!empty($node)) {
	    $entry[$node->nodeName] = $node->textContent;
	    $node = $node->nextSibling;
	  }
	  $list[] = $entry;
	  $word = $word->nextSibling;
	}
      }

      $item = $item->nextSibling;
    }

    if (count($list) != $count) {
      return false;
    }

    return $list;
  }

}
?>

こんな風に使えます。

<?php
require_once('MorphologicalAnalysis.php');

$analyzer = new MorphologicalAnalysis();
$analyzer->setYahooApplicationID(アプリケーションID);
$result = $analyzer->parse('庭には二羽ニワトリがいる。');
print_r ($result);
?>

その結果

Array
(
    [0] => Array
        (
            [surface] => 庭
            [reading] => にわ
            [pos] => 名詞
            [baseform] => 庭
            [feature] => 名詞,名詞,*,庭,にわ,庭
        )

    [1] => Array
        (
            [surface] => に
            [reading] => に
            [pos] => 助詞
            [baseform] => に
            [feature] => 助詞,格助詞,*,に,に,に
        )

    [2] => Array
        (
            [surface] => は
            [reading] => は
            [pos] => 助詞
            [baseform] => は
            [feature] => 助詞,係助詞,*,は,は,は
        )

    [3] => Array
        (
            [surface] => 二
            [reading] => 2
            [pos] => 名詞
            [baseform] => 2
            [feature] => 名詞,数詞,*,二,2,2
        )

    [4] => Array
        (
            [surface] => 羽
            [reading] => わ
            [pos] => 接尾辞
            [baseform] => 羽
            [feature] => 接尾辞,助数,*,羽,わ,羽
        )

    [5] => Array
        (
            [surface] => ニワトリ
            [reading] => にわとり
            [pos] => 名詞
            [baseform] => ニワトリ
            [feature] => 名詞,名詞,*,ニワトリ,にわとり,ニワトリ
        )

    [6] => Array
        (
            [surface] => が
            [reading] => が
            [pos] => 助詞
            [baseform] => が
            [feature] => 助詞,格助詞,*,が,が,が
        )

    [7] => Array
        (
            [surface] => いる
            [reading] => いる
            [pos] => 動詞
            [baseform] => いる
            [feature] => 動詞,一段,基本形,いる,いる,いる
        )

    [8] => Array
        (
            [surface] => 。
            [reading] => 。
            [pos] => 特殊
            [baseform] => 。
            [feature] => 特殊,句点,*,。,。,。
        )

)