У меня работает (не без странностей) такой.
#!/usr/bin/perl -w
#
# This perl script is intended to perform movie data lookups based on
# the popular russian www.kinopoisk.ru website
#
# For more information on MythVideo's external movie lookup mechanism, see
# the README file in this directory.
#
# Author: Tim Harvey (tharvey AT alumni.calpoly DOT edu)
# Modified: Sergei Gurjev, Andrei Rjeousski
# v1.1
# - Fix for changed website
#
# $Id: kinopoisk.pl 25 2009-07-13 21:00:20Z tipok $
# $Date: 2009-07-14 00:00:20 +0300 (Вто, 14 Июл 2009) $
#
my $http_local_dir = "/var/lib/mythtv/videoimages/";
my $http_remote_dir = "http://localhost/mythvideoimages/";
use HTML::Entities;
use HTML::Strip;
my $hs = HTML::Strip->new();
use URI::Escape;
use Switch;
use LWP::UserAgent; # libwww-perl providing HTML get-post actions
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)");
$ua->default_header('Host' => "www.kinopoisk.ru");
$ua->default_header('Accept' => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
$ua->default_header('Accept-Language' => "ru-ru,ru;q=0.5");
$ua->default_header('Connection' => "close");
use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_M $opt_P);
use Getopt::Std;
use Text::Iconv;
$converter = Text::Iconv->new("cp1251", "utf8");
$back_converter = Text::Iconv->new("utf8", "cp1251");
$title = "KINOPOISK Query";
$version = '$Rev: 25 $';
$author = "Sergei Gurjev, Tim Harvey, Andrei Rjeousski";
my @countries = qw(UA RU BY);
#binmode(STDOUT, ":utf8");
# display usage
sub usage {
print "usage: $0 -hdrviMPD [parameters]
";
print " -h help
";
print " -d debug
";
print " -r dump raw query result data only
";
print " -v display version
";
print " -i display info
";
print "
";
print " -M [options] <query> get movie list
";
print " some known options are:
";
print " clean=[yes|no] clean input data (remove unneeded text) default: yes
";
print " Note: multiple options must be separated by ','
";
print " -P <movieid> get movie poster
";
print " -D <movieid> get movie data
";
exit(-1);
}
# display 1-line of info that describes the version of the program
sub version {
print "$title ($version) by $author
"
}
# display 1-line of info that can describe the type of query used
sub info {
print "Performs queries using the www.kinopoisk.ru website.
";
}
# display detailed help
sub help {
version();
info();
usage();
}
sub trim {
my ($str) = @_;
$str =~ s/^s+//;
$str =~ s/s+$//;
return $str;
}
# returns text within 'data' between 'beg' and 'end' matching strings
sub parseBetween {
my ($data, $beg, $end)=@_; # grab parameters
my $ldata = lc($data);
my $start = index($ldata, lc($beg)) + length($beg);
my $finish = index($ldata, lc($end), $start);
if ($start != (length($beg) -1) && $finish != -1) {
my $result = substr($data, $start, $finish - $start);
# return w/ decoded numeric character references
# (see http://www.w3.org/TR/html4/charset.html#h-5.3.1)
decode_entities($result);
return $result;
}
return "";
}
# get Movie Data
sub getMovieData {
my ($movieid)=@_; # grab movieid parameter
if (defined $opt_d) { printf("# looking for movie id: '%s'
", $movieid);}
# get the search results page
my $request = "http://www.kinopoisk.ru/level/1/film/" . $movieid . "/";
if (defined $opt_d) { printf("# request: '%s'
", $request); }
my $req = HTTP::Request->new(GET => $request);
my $res = $ua->request($req);
# Check the outcome of the response
if ($res->is_success) {
$response = $converter->convert($res->content);
$response =~ s/ / /gi;
} else {
if (defined $opt_d) { printf("# status: '%s'
", $res->status_line); }
exit(0);
}
if (defined $opt_r) { printf("%s", $response); }
if (!defined $response) {return;}
#parse title
my $data = parseBetween($response, "<H1 style="padding:0px;margin:0px" class="moviename-big">", "</H1>");
my $title = trim($hs->parse( $data ));
$data = parseBetween($response, "<span style="font-size:13px;color:#666">","</span>");
my $title_eng = trim($hs->parse( $data ));
if (!($title_eng eq "")){
$title = $title."/".$title_eng;
}
## parse director
$data = parseBetween($response, ">режиссер<", "</a><");
my $director = trim($hs->parse( "<".$data ));
$director =~ s/s,/,/gi;
## parse year
$data = parseBetween($response, ">год<", "</a>");
my $year = trim($hs->parse( "<".$data ));
# parse writer
$data = parseBetween($response, ">сценарий<", "</a><");
my $writer = trim($hs->parse( "<".$data ));
$writer =~ s/s,/,/gi;
$data = parseBetween($response, "<span class="_reachbanner_">", "</span>");
my $plot = $data;
$plot =~ s/&[a-zA-Z]{3,};/ /gi;
$plot =~ s/[x09
]/ /gi;
$plot =~ s/s{2,}/ /gi;
$plot = trim ($plot);
$plot = $hs->parse( $plot );
$plot =~ s/([^xD1xD0]{1})x97/$1-/gi;
$plot =~ s/([^xD1xD0]{1})x85/$1.../gi;
if (!$plot) {
$plot = "NA";
}
# my $ratings = parseBetween($response, "Рейтинг фильма</b>","об оценках и Top-250</a>");
# parse KINOPOISK rating
# my $userrating="";
if ( $response=~ m/>([d]+).([d]+)</){
$userrating = "KP: ".$1.".".$2 if ($1 && $2);
}
# parse IMDB rating
my $imdbrating = parseBetween($response, ">IMDB: ","<");
#6.20 (1 921)
if ($imdbrating) {
$imdbrating =~ s/([d.]+).*/$1/esg;
$userrating .= ", " if ($userrating);
$userrating .= "IMDB: ".$imdbrating;
}
# parse MPAA rating
my $movierating = trim(parseBetween($response, ">рейтинг MPAA<", "</a><"));
#<a href='/level/38/film/273296/rn/R/' class='all'><img src='/images/mpaa/R.gif' height=11 alt='рейтинг R' border=0></a>
#<a href='/level/38/film/220619/rn/PG-13/' class='all'><img src='/images/mpaa/PG-13.gif' height=11 alt='рейтинг PG-13' border=0></a>
if ( $movierating =~ m/<img.*alt="Рейтинг (.+?)"/){
$movierating = $1 if ($1);
} else {
$movierating = "";
}
# parse movie length
$data = trim(parseBetween($response, ">время<",".</"));
my $runtime = trim($hs->parse( "<".$data ));
$runtime =~ s/ мин.?//g;
# parse cast
my $cast = "";
$data = parseBetween($response, ">В главных ролях:<", "</table>");
if ($data) {
$data =~ s/</a>/,/gi;
$cast = trim($hs->parse( "<".$data ));
}
$cast =~ s/s{2,}/ /gi;
$cast =~ s/
/ /gi;
$cast =~ s/s,/,/gi;
#no need others
$cast =~ s/, ...,//gi;
# parse genres
$data = parseBetween($response, ">жанр<", "</a><");
my $lgenres = trim($hs->parse( "<".$data ));
$lgenres =~ s/s,/,/gi;
# parse countries
$data = parseBetween($response, ">страна<", "</a><");
my $lcountries = trim($hs->parse( "<".$data ));
$lcountries =~ s/s,/,/gi;
# output fields (these field names must match what MythVideo is looking for)
print "Title:$title
";
print "Year:$year
";
print "Director:$director
";
#bug workaround.
# print "Plot:".pack( 'U*', unpack( 'U*', $plot))."
";
print "Plot:$plot
";
print "UserRating:$userrating
";
print "MovieRating:$movierating
";
print "Runtime:$runtime
";
print "Writers: $writer
";
print "Cast: $cast
";
print "Genres: $lgenres
";
print "Countries: $lcountries
";
}
# dump Movie Poster
sub getMoviePoster {
my ($movieid)=@_; # grab movieid parameter
if (defined $opt_d) { printf("# looking for movie id: '%s'
", $movieid);}
my $uri = "";
my $img_uri = "";
# get the search results page
# http://www.kinopoisk.ru/level/1/film/252156/
my $request = "http://www.kinopoisk.ru/level/17/film/" . $movieid . "/";
if (defined $opt_d) { printf("# request: '%s'
", $request); }
my $req = HTTP::Request->new(GET => $request);
my $res = $ua->request($req);
# Check the outcome of the response
if ($res->is_success) {
$response = $converter->convert($res->content);
} else {
if (defined $opt_d) { printf("# status: '%s'
", $res->status_line); }
exit(0);
}
if (defined $opt_r) { printf("%s", $response); }
if (!defined $response) {return;}
my $matches = parseBetween($response, "
x09x09x09x09x09<table cellspacing=0 cellpadding=0 width=100% border=0>
x09x09x09x09x09x09<tr>
",
"
x09x09x09x09x09x09</tr>
x09x09x09x09x09</table>
");
my $beg = "<td align=center>
";
my $end = "
</td>";
my $count = 0;
my @images;
my @img;
if ($matches eq "") {
if (defined $opt_d) { printf("# no results
"); }
# return;
} else {
my $start = index($matches, $beg);
my $finish = index($matches, $end, $start);
while ($start != -1 && $start < length($matches)) {
$start += length($beg);
my $entry = substr($matches, $start, $finish - $start);
$start = index($matches, $beg, $finish + 1);
$finish = index($matches, $end, $start);
if ($entry =~ m/<a href="/picture/(d+)/"><img src='/images/poster/sm_.+?<small><font color="#777777">(d*)x(d*)</font></small>.+?<small><font color="#777777">(d*) Кб</font></small>/s){
$img[0] = $1 if ($1); #url
$img[1] = $2 if ($2); #width
$img[2] = $3 if ($3); #height
$img[3] = $4 if ($4); #filesize
$images[$count++] = [ @img ];
} else {
}
}
my $goodposter=0;
my $goodposterfounded=0;
for $count ( 0 .. $#images ) {
if ($goodposterfounded == 0 && $images[$count][2] > $images[$count][1]){
$goodposter=$count;
$goodposterfounded = 1;
}
}
if ($images[$goodposter][0] eq ""){
return;
} else {
$http_url = "http://www.kinopoisk.ru/picture/".$images[$goodposter][0]."/";
}
$request = $http_url;
if (defined $opt_d) { printf("# request: '%s'
", $request); }
$req = HTTP::Request->new(GET => $request);
$res = $ua->request($req);
# Check the outcome of the response
if ($res->is_success) {
$response = $converter->convert($res->content);
} else {
if (defined $opt_d) { printf("# status: '%s'
", $res->status_line); }
exit(0);
}
if (defined $opt_r) { printf("%s", $response); }
if (!defined $response) {return;}
$matches = parseBetween($response, "
x09x09<td colspan=3 valign='top' style="padding-left:20px">
x09x09x09<table cellpadding=0 cellspacing=0>
x09x09x09x09<tr>
",
"
x09x09x09x09</tr>
x09x09x09x09x09x09x09x09
x09x09x09</table>x09x09x09
x09x09x09
x09x09</td>
");
if ( $matches =~ m/<td width=d*?><a href=".*?"><img.+?src='(.+?)'.*?></a></td>/s){
$img_url = $1 if ($1);
}
if ($img_url){
$img_uri = "http://www.kinopoisk.ru".$img_url;
} else {return;}
if (defined $opt_r) { printf("%s bytes of image data", length($response) ); }
}
if ($img_uri eq "") {$img_uri = "http://www.kinopoisk.ru/images/film/".$movieid.".jpg"}
if (defined $opt_d) { printf("# request: '%s'
", $img_uri); }
$req = HTTP::Request->new(GET => $img_uri);
$req->header( 'Referer' => $http_url ),
$req->header( 'Host' => "www.kinopoisk.ru" ),
$res = $ua->request($req);
# Check the outcome of the response
if ($res->is_success) {
switch ($res->header('Content-Type')) {
case /image/gif/i { $img_ext="gif"; }
case /image/jpeg/i { $img_ext="jpg"; }
case /image/png/i { $img_ext="png"; }
case /image/x-ms-bmp/i { $img_ext="bmp"; }
case /image/tiff/i { $img_ext="tif"; }
else {
if ($img_url =~ m/.+.(.+)$/) {
$img_ext = $1 if ($1);
} else {
$img_ext="";
}
}
}
open(OFile, "> ".$http_local_dir.int($movieid).".".$img_ext) || die "No directory created";
print OFile $res->content;
close(OFile);
$uri = $http_remote_dir.int($movieid).".".$img_ext;
} else {
if (defined $opt_d) { printf("# status: '%s'
", $res->status_line); }
exit(0);
}
print "$uri
";
}
# dump Movie list: 1 entry per line, each line as 'movieid:Movie Title'
sub getMovieList {
my ($filename, $options)=@_; # grab parameters
# If we wanted to inspect the file for any reason we can do that now
#
# Convert filename into a query string
# (use same rules that Metadata::guesTitle does)
my @releaseGroups = ("DVDRip",
"CAMRip",
"DVDScr",
"HDRip",
"XviD",
"DivX",
"AC3",
"H264",
"x264",
" L2 ",
" TS ",
"ELEKTRI4KA",
"INTERFILM",
"KINODOME",
"HQ-ViDEO",
"STOP.SNYATO",
"BINMOVIE",
"PUZKARAPUZ",
"KiNOFACK",
"Epidem.ru",
"uniongang.ru",
"LostFilm.TV",
"torrents.ru",
"BiNuRaL.Ru",
"ShareReactor.ru",
"SenatorInfo.com"
);
my $query = $filename;
$query = uri_unescape($query); # in case it was escaped
my @op_arr = split(",", $options);
my %op;
foreach my $a_elem (@op_arr) {
my ($key, $val) = split("=", $a_elem, 2);
$op{$key} = $val;
}
if (!defined $op{'clean'} || $op{'clean'} ne "no") {
# Strip off the file extension
if (rindex($query, '.') != -1) {
$query = substr($query, 0, rindex($query, '.'));
}
# Strip off anything following '(' - people use this for general comments
if (rindex($query, '(') != -1) {
$query = substr($query, 0, rindex($query, '('));
}
# Strip off anything following '[' - people use this for general comments
if (rindex($query, '[') != -1) {
$query = substr($query, 0, rindex($query, '['));
}
$query =~ s/[._]/ /g;
#Strip off anything about release-groups and quality of video
foreach (@releaseGroups) {
$query =~ s/$_/ /gi;
}
# KINOPOISK searches do better if any trailing ,The is left off
$query =~ /(.*), The$/i;
if ($1) { $query = $1; }
$query =~ s/s{2,}/ /g;
if (defined $opt_d) { printf("# q: '%s'
", $query); }
}
# prepare the url
$query = $back_converter->convert(trim($query));
$query = uri_escape($query);
#http://www.kinopoisk.ru/index.php?kp_query=%C1%E5%EB%EE%E5+%F1%EE%EB%ED%F6%E5+%EF%F3%F1%F2%FB%ED%E8&x=55&y=9
#http://www.kinopoisk.ru/index.php?kp_query=%C1%E5%EB%EE%E5+%F1%EE%EB%ED%F6%E5+%EF%F3%F1%F2%FB%ED%E8&x=0&y=0
$query =~ s/%20/+/g;
if (!$options) { $options = "" ;}
if (defined $opt_d) {
printf("# query: '%s', options: '%s'
", $query, $options);
}
#TODO: add query options
# my $request = "http://www.kinopoisk.ru/index.php?kp_query=$query$options";
my $request = "http://www.kinopoisk.ru/index.php?kp_query=$query";
if (defined $opt_d) { printf("# request: '%s'
", $request); }
my $req = HTTP::Request->new(GET => $request);
my $res = $ua->request($req);
# Check the outcome of the response
if ($res->is_success) {
$response = $converter->convert($res->content);
} else {
if (defined $opt_d) { printf("# status: '%s'
", $res->status_line); }
exit(0);
}
if (defined $opt_r) {
print $response;
exit(0);
}
# check to see if we got a results page or a movie page
my $movienum = parseBetween($response, "src="/images/film/",".jpg");
if ($movienum) {
if (defined $opt_d) { printf("# redirected to movie page
"); }
my $movietitle = parseBetween($response, "<tr><td><H1 style="padding:0px;margin:0px" class="moviename-big">"," </H1></td></tr>");
my $movietitle_eng = parseBetween($response, "<span style="font-size:13px;color:#666">","</span>");
# $movietitle =~ m#(.+) ((d+))#;
# $movietitle = $1;
if ($movietitle_eng) {
print "$movienum:$movietitle / $movietitle_eng
";
} else {
print "$movienum:$movietitle
";
}
exit(0);
}
# extract possible matches
# possible matches are grouped in several catagories:
# exact, partial, and approximate
my $popular_results = parseBetween($response, "Скорее всего вы ищете:",
"<tr><td colspan=2 height=5><spacer type=block height=5></td></tr>");
my $exact_matches = parseBetween($response, "Похожие результаты:",
"<tr><td colspan=2 height=5><spacer type=block height=5></td></tr>x09");
# print $popular_results."
".$exact_matches;
# exit;
# my $partial_matches = parseBetween($response, "<b>Список найденных имён:</b>",
# "<tr><td colspan=2 align="right">");
# print "
-------------------------------------
".$partial_matches."
----------------------------------------------
";
my $beg = "class="news">";
my $end = "</font>";
my $count = 0;
my @movies;
# my $data = $exact_matches.$partial_matches;
my $data = $popular_results.$exact_matches;
# resort to partial matches if no exact
# if ($data eq "") { $data = $partial_matches; }
# print $data; exit;
if ($data eq "") {
if (defined $opt_d) { printf("# no results
"); }
return;
}
my $start = index($data, $beg);
my $finish = index($data, $end, $start);
my $year;
my $type;
my $title;
while ($start != -1 && $start < length($data)) {
$start += length($beg);
my $entry = substr($data, $start, $finish - $start);
$start = index($data, $beg, $finish + 1);
$finish = index($data, $end, $start);
my $title = "";
my $year = "";
my $type = "";
my $movieurl = "";
# print $entry;
if ($entry =~ m/.*<a class="all" href="(.*?)">(.*?)</a>.*?>(d*?)</a>.*?<font color="#999999">(.*)/s){
$movieurl = $1;
$title = $2;
$year = $3;
$engl = $4;
#print $movieurl."----".$title."----"."----".$year."----".$engl."-----
";
if ($movieurl =~ m/level/1/film/(d+?)/.*/){
$movienum = $1;
} else {
next;
}
$engl =~ s/.+s+//s;
if ($engl) {
$title = $title."/".$engl;
}
# http://www.kinopoisk.ru/level/1/film/252156/
} else {
if (defined $opt_d) {
print("Unrecognized entry format ($entry)
");
}
next;
}
my $skip = 0;
# add to array
if (!$skip) {
my $moviename = $title;
if ($year ne "") {
$moviename .= " ($year)";
}
# $movies[$count++] = $movienum . ":" . $title;
$movies[$count++] = $movienum . ":" . $moviename;
}
}
# display array of values
for $movie (@movies) { print "$movie
"; }
}
#
# Main Program
#
# parse command line arguments
getopts('ohrdivDMP');
# print out info
if (defined $opt_v) { version(); exit 1; }
if (defined $opt_i) { info(); exit 1; }
# print out usage if needed
if (defined $opt_h || $#ARGV<0) { help(); }
if (defined $opt_D) {
# take movieid from cmdline arg
$movieid = shift || die "Usage : $0 -D <movieid>
";
getMovieData($movieid);
}
elsif (defined $opt_P) {
# take movieid from cmdline arg
$movieid = shift || die "Usage : $0 -P <movieid>
";
getMoviePoster($movieid);
}
elsif (defined $opt_M) {
# take query from cmdline arg
$options = shift || die "Usage : $0 -M [options] <query>
";
$query = shift;
if (!$query) {
$query = $options;
$options = "";
}
getMovieList($query, $options);
}
# vim: set expandtab ts=3 sw=3 :
Он не очень свежий, но работает вроде.
Насчет VDPAU. В 9.10 и соответственно в MythTV 0.22 оно должно работать из коробки.