集合知プログラミング 2.3.4をperl化してみた(評価者のランキング)

#!/usr/bin/perl -w

use warnings;
use strict;
use Dumpvalue;

require 'pearson.pl';

my %prefs = (
  'Lisa Rose' => {
    'Lady in the Water'  => 2.5,
    'Snakes on a Plane'  => 3.5,
    'Just My Luck'       => 3.0,
    'Superman Returns'   => 3.5,
    'You, Me and Dupree' => 2.5,
    'The Night Listner'  => 3.0,
  },
  'Gene Seymour' => {
    'Lady in the Water'  => 3.0,
    'Snakes on a Plane'  => 3.5,
    'Just My Luck'       => 1.5,
    'Superman Returns'   => 5.0,
    'You, Me and Dupree' => 3.5,
    'The Night Listner'  => 3.0,
  },
  'Michael Pillips' => {
    'Lady in the Water'  => 2.5,
    'Snakes on a Plane'  => 3.0,
    'Superman Returns'   => 3.5,
    'The Night Listner'  => 4.0,
  },
  'Claudia Puig' => {
    'Snakes on a Plane'  => 3.5,
    'Just My Luck'       => 3.0,
    'The Night Listner'  => 4.5,
    'Superman Returns'   => 4.0,
    'You, Me and Dupree' => 2.5,
  },
  'Mick LaSalle' => {
    'Lady in the Water'  => 3.0,
    'Snakes on a Plane'  => 4.0,
    'Just My Luck'       => 2.0,
    'Superman Returns'   => 3.0,
    'The Night Listner'  => 3.0,
    'You, Me and Dupree' => 2.0,
  },
  'Jack Matthew' => {
    'Lady in the Water'  => 3.0,
    'Snakes on a Plane'  => 4.0,
    'The Night Listner'  => 3.0,
    'Superman Returns'   => 5.0,
    'You, Me and Dupree' => 3.5,
  },
  'Toby' => {
    'Snakes on a Plane'  => 4.5,
    'You, Me and Dupree' => 1.0,
    'Superman Returns'   => 4.0,
  },
);

# ディクショナリprefsからpersonにもっともマッチするものたちを返す
# 結果の数(3)を類似性関数(sim_pearson)はオプションのパラメータ
sub topMatches {
  my($person, $n, $sim) = @_;
  
  my @scores = ();
  foreach my $other(keys(%prefs)) {
    if ($other ne $person) {
      push(@scores, [$sim->($person, $other), $other]);
    }
  }
  
  # スコアが高い順にソート
  @scores = sort { $b->[0] <=> $a->[0] } @scores;
  
  return \@scores;
}

# 評価者をランキング
Dumpvalue->new->dumpValue(topMatches('Toby', 3, \&sim_pearson));

1;

前の記事のピアソンの相関関数を利用して計算してます。


・出力結果

[yuki@sorauta corrective]$ perl topMatches.pl
0  ARRAY(0xaa6b240)
   0  0.99124070716193
   1  'Lisa Rose'
1  ARRAY(0xaa755d0)
   0  0.924473451641905
   1  'Mick LaSalle'
2  ARRAY(0xaa6b390)
   0  0.893405147441565
   1  'Claudia Puig'
3  ARRAY(0xaa75590)
   0  0.66284898035987
   1  'Jack Matthew'
4  ARRAY(0xaa6a6f0)
   0  0.381246425831512
   1  'Gene Seymour'
5  ARRAY(0xaa6b3c0)
   0  '-1'
   1  'Michael Pillips'