集合知プログラミング 2.3.2をperl化してみた(ピアソン相関係数)

成績の大盤振る舞いの誤差を考慮してくれる、ピアソン相関係数
pythonでかかれてたのでperlでry

#!/usr/bin/perl -w

use warnings;
use strict;
use Dumpvalue;

my %prefs = (
  'Lisa Rose' => {
    'Lady in the Water'  => 2.5,
    'Snakes on a Plane'  => 3.5,
    'Just My Luck'       => 3.0,
    'Superman Returns'   => 3.5,
    'You, Me and Dupree' => 2.5,
    'The Night Listner'  => 3.0,
  },
  'Gene Seymour' => {
    'Lady in the Water'  => 3.0,
    'Snakes on a Plane'  => 3.5,
    'Just My Luck'       => 1.5,
    'Superman Returns'   => 5.0,
    'You, Me and Dupree' => 3.5,
    'The Night Listner'  => 3.0,
  },
  'Michael Pillips' => {
    'Lady in the Water'  => 2.5,
    'Snakes on a Plane'  => 3.0,
    'Superman Returns'   => 3.5,
    'The Night Listner'  => 4.0,
  },
  'Claudia Puig' => {
    'Snakes on a Plane'  => 3.5,
    'Just My Luck'       => 3.0,
    'The Night Listner'  => 4.5,
    'Superman Returns'   => 4.0,
    'You, Me and Dupree' => 2.5,
  },
  'Mick LaSalle' => {
    'Lady in the Water'  => 3.0,
    'Snakes on a Plane'  => 4.0,
    'Just My Luck'       => 2.0,
    'Superman Returns'   => 3.0,
    'The Night Listner'  => 3.0,
    'You, Me and Dupree' => 2.0,
  },
  'Jack Matthew' => {
    'Lady in the Water'  => 3.0,
    'Snakes on a Plane'  => 4.0,
    'The Night Listner'  => 3.0,
    'Superman Returns'   => 5.0,
    'You, Me and Dupree' => 3.5,
  },
  'Toby' => {
    'Snakes on a Plane'  => 4.5,
    'You, Me and Dupree' => 1.0,
    'Superman Returns'   => 4.0,
  },
);

# pearson
sub sim_pearson {
  my($user1, $user2) = @_;
  my %si = ();
  
  # 両者が互いに評価しているアイテムのリストを取得
  foreach my $key(keys(%{$prefs{$user1}})) {
    if ($prefs{$user2}{$key}) {
      $si{$key} = 1;
    }
  }
  # 要素の数
  my $n = keys %si;
  if ($n == 0) {
    die('not mutched.');
  }
  
  my($sum1,$sum2,$sum1Sq,$sum2Sq,$pSum) = (0,0,0,0,0);
  foreach my $key(keys(%si)) {
    # 全ての嗜好を合計
    $sum1   += $prefs{$user1}{$key};
    $sum2   += $prefs{$user2}{$key};
    # 平方を合計
    $sum1Sq += $prefs{$user1}{$key} * $prefs{$user1}{$key};
    $sum2Sq += $prefs{$user2}{$key} * $prefs{$user2}{$key};
    # 積を合計
    $pSum += $prefs{$user1}{$key} * $prefs{$user2}{$key};
  }
  
  # ピアソンによるスコア計算
  my $num = $pSum - ($sum1 * $sum2 / $n);
  my $den = sqrt(($sum1Sq - ($sum1 * $sum1) / $n) * ($sum2Sq - ($sum2 * $sum2) / $n));
  if ($den == 0) {
    die('calc result is 0');
  }
  my $r = $num / $den;
  return $r;
}

print sim_pearson('Lisa Rose', 'Gene Seymour');

1;

ユークリッド距離みたいに直感的じゃないけど、
読んでいくとなんとなく理解できるアルゴリズムです。


・出力結果

0.39605901719067