形態素解析
なんちゃって形態素解析
<?php class MorphologicalAnalysis { private $yahooApplicationId_ = null; function setYahooApplicationID($id) { $this->yahooApplicationId_ = $id; } function parse($text) { $url = 'http://jlp.yahooapis.jp/MAService/V1/parse' . '?appid=' . $this->yahooApplicationId_ . '&results=ma' . '&response=surface,reading,pos,baseform,feature' . '&sentence=' . urlencode($text); $contents = file_get_contents($url); $document = new DOMDocument(); $document->loadXML($contents); $maResult = $document->getElementsByTagName('ma_result'); if ($maResult->length != 1) { return false; } $maResult = $maResult->item(0); $maResult = $this->__parseMaResult($maResult); return $maResult; } function __parseMaResult($maResult) { $count = 0; $list = array(); $item = $maResult->firstChild; while (!empty($item)) { if ($item->nodeName == 'total_count') { $count = $item->textContent; } if ($item->nodeName == 'word_list') { $word = $item->firstChild; while (!empty($word)) { $entry = array(); $node = $word->firstChild; while (!empty($node)) { $entry[$node->nodeName] = $node->textContent; $node = $node->nextSibling; } $list[] = $entry; $word = $word->nextSibling; } } $item = $item->nextSibling; } if (count($list) != $count) { return false; } return $list; } } ?>
こんな風に使えます。
<?php require_once('MorphologicalAnalysis.php'); $analyzer = new MorphologicalAnalysis(); $analyzer->setYahooApplicationID(アプリケーションID); $result = $analyzer->parse('庭には二羽ニワトリがいる。'); print_r ($result); ?>
その結果
Array ( [0] => Array ( [surface] => 庭 [reading] => にわ [pos] => 名詞 [baseform] => 庭 [feature] => 名詞,名詞,*,庭,にわ,庭 ) [1] => Array ( [surface] => に [reading] => に [pos] => 助詞 [baseform] => に [feature] => 助詞,格助詞,*,に,に,に ) [2] => Array ( [surface] => は [reading] => は [pos] => 助詞 [baseform] => は [feature] => 助詞,係助詞,*,は,は,は ) [3] => Array ( [surface] => 二 [reading] => 2 [pos] => 名詞 [baseform] => 2 [feature] => 名詞,数詞,*,二,2,2 ) [4] => Array ( [surface] => 羽 [reading] => わ [pos] => 接尾辞 [baseform] => 羽 [feature] => 接尾辞,助数,*,羽,わ,羽 ) [5] => Array ( [surface] => ニワトリ [reading] => にわとり [pos] => 名詞 [baseform] => ニワトリ [feature] => 名詞,名詞,*,ニワトリ,にわとり,ニワトリ ) [6] => Array ( [surface] => が [reading] => が [pos] => 助詞 [baseform] => が [feature] => 助詞,格助詞,*,が,が,が ) [7] => Array ( [surface] => いる [reading] => いる [pos] => 動詞 [baseform] => いる [feature] => 動詞,一段,基本形,いる,いる,いる ) [8] => Array ( [surface] => 。 [reading] => 。 [pos] => 特殊 [baseform] => 。 [feature] => 特殊,句点,*,。,。,。 ) )