433 lines
16 KiB
PHP
Executable File
433 lines
16 KiB
PHP
Executable File
<?php
|
|
class SimilarImages
|
|
{
|
|
const SEARCH_CACHE_DIR = "/data/search";
|
|
|
|
static public function get_services($services = null)
|
|
{
|
|
!$services && $services = "local";
|
|
|
|
if ($services == "all") {
|
|
$services = array_keys(CONFIG()->image_service_list);
|
|
} else {
|
|
$services = explode(',', $services);
|
|
}
|
|
|
|
foreach (array_keys($services) as $i) {
|
|
if ($services[$i] == "local")
|
|
$services[$i] = CONFIG()->local_image_service;
|
|
}
|
|
return $services;
|
|
}
|
|
|
|
static public function similar_images($options = [])
|
|
{
|
|
$errors = [];
|
|
$local_service = CONFIG()->local_image_service;
|
|
$services = $options['services'];
|
|
$services_by_server = [];
|
|
|
|
foreach ($services as $service) {
|
|
if (!isset(CONFIG()->image_service_list[$service]) || !($server = CONFIG()->image_service_list[$service])) {
|
|
$errors[] = ['services' => [$service], 'message' => $service . " is an unknown service"];
|
|
continue;
|
|
}
|
|
if (!isset($services_by_server[$server]))
|
|
$services_by_server[$server] = [];
|
|
$services_by_server[$server][] = $service;
|
|
}
|
|
|
|
if (!$services_by_server)
|
|
return ['posts' => new Rails\ActiveRecord\Collection(), 'posts_external' => new Rails\ActiveRecord\Collection(), 'similarity' => [], 'services' => [], 'errors' => 'No service selected/no local service'];
|
|
|
|
# If the source is a local post, read the preview and send it with the request.
|
|
if ($options['type'] == 'post') {
|
|
$source_file = $options['source']->preview_path();
|
|
} elseif ($options['type'] == 'file') {
|
|
$source_file = $options['source'];
|
|
}
|
|
|
|
$server_threads = [];
|
|
$server_responses = [];
|
|
$curl_opts = [
|
|
CURLOPT_TIMEOUT => 5,
|
|
CURLOPT_POST => true,
|
|
CURLOPT_RETURNTRANSFER => true
|
|
];
|
|
$mh = curl_multi_init();
|
|
$chk = -1;
|
|
|
|
foreach ($services_by_server as $services_list) {
|
|
$chk++;
|
|
|
|
$search_url = null;
|
|
|
|
if ($options['type'] == 'url')
|
|
$search_url = $options['source'];
|
|
if ($options['type'] == 'post' && CONFIG()->image_service_local_searches_use_urls)
|
|
$search_url = $options['source']['preview_url'];
|
|
|
|
$params = [];
|
|
if ($search_url) {
|
|
$params['url'] = $search_url;
|
|
} else {
|
|
if (function_exists('curl_file_create')) { // PHP v5.5.* fix
|
|
$params['file'] = curl_file_create($source_file);
|
|
} else {
|
|
$params['file'] = '@' . $source_file;
|
|
}
|
|
}
|
|
|
|
foreach ($services_list as $k => $s)
|
|
$params["service[$k]"] = $s;
|
|
|
|
$chn = 'ch' . $chk;
|
|
$$chn = curl_init($server);
|
|
|
|
curl_setopt_array($$chn, $curl_opts);
|
|
curl_setopt($$chn, CURLOPT_POSTFIELDS, $params);
|
|
|
|
curl_setopt($$chn, CURLOPT_CONNECTTIMEOUT, 4);
|
|
curl_setopt($$chn, CURLOPT_HTTPHEADER, ['Host: ' . parse_url($server)['host']]);
|
|
|
|
curl_multi_add_handle($mh, $$chn);
|
|
}
|
|
$ch_count = $chk;
|
|
|
|
$active = null;
|
|
|
|
do {
|
|
$ret = curl_multi_exec($mh, $active);
|
|
} while ($ret == CURLM_CALL_MULTI_PERFORM);
|
|
|
|
|
|
while ($active && $ret == CURLM_OK) {
|
|
if (curl_multi_select($mh) != -1) {
|
|
usleep(100);
|
|
}
|
|
do {
|
|
$mrc = curl_multi_exec($mh, $active);
|
|
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
|
|
}
|
|
|
|
$posts = new Rails\ActiveRecord\Collection();
|
|
$posts_external = new Rails\ActiveRecord\Collection();
|
|
$similarity = [];
|
|
$preview_url = "";
|
|
$next_id = 1;
|
|
$server_list = array_keys($services_by_server);
|
|
|
|
/**
|
|
* Is there a class for PHP that can nicely handle XML?
|
|
*/
|
|
$get_attr = function($xml, $attr) {
|
|
$obj = $xml->attributes()->$attr;
|
|
if ($obj) {
|
|
$obj = (array)$obj;
|
|
return $obj[0];
|
|
}
|
|
return null;
|
|
};
|
|
|
|
foreach(range(0, $ch_count) as $i) {
|
|
$chn = 'ch' . $i;
|
|
$server = $server_list[$i];
|
|
|
|
$resp = curl_multi_getcontent($$chn);
|
|
|
|
if (!$resp) {
|
|
$curl_err = curl_error($$chn);
|
|
if (preg_match('/^Operation timed out/', $curl_err)) {
|
|
$err_msg = 'timed out';
|
|
Rails::log()->notice(
|
|
"[SimilarImages] cURL timed out: " . $curl_err
|
|
);
|
|
} else {
|
|
$err_msg = 'empty response';
|
|
Rails::log()->warning(sprintf(
|
|
"[SimilarImages] cURL error: (%s) %s", curl_errno($$chn), $curl_err
|
|
));
|
|
}
|
|
$errors[$server] = [ 'message' => $err_msg ];
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
$doc = new SimpleXMLElement($resp);
|
|
} catch (Exception $e) {
|
|
ob_start();
|
|
var_dump(curl_getinfo($$chn));
|
|
$info = ob_get_clean();
|
|
Rails::log()->error("Similar Images Error\ncURL Error: " . curl_error($$chn) . "\ncURL Info:\n" . $info);
|
|
|
|
Rails::log()->exception($e);
|
|
$errors[$server] = [ 'message' => 'parse error' ];
|
|
continue;
|
|
}
|
|
|
|
if ($doc->getName() == 'error') {
|
|
$errors[$server] = [ 'message' => $doc->message ];
|
|
continue;
|
|
} elseif ($doc->getName() != 'matches') {
|
|
$errors[$server] = [ 'message' => 'invalid response' ];
|
|
continue;
|
|
}
|
|
|
|
$threshold = !empty($options['threshold']) ? $options['threshold'] : (float)$get_attr($doc, 'threshold');
|
|
|
|
foreach ($doc->match as $element) {
|
|
$sim = (float)$get_attr($element, 'sim');
|
|
|
|
if ($sim >= $threshold and $sim > 0) {
|
|
$service = $get_attr($element, 'service');
|
|
$image = $element->post;
|
|
|
|
$id = $get_attr($image, 'id');
|
|
$md5 = $get_attr($image, 'md5');
|
|
|
|
if ($service == $local_service) {
|
|
$post = Post::where('id = ?', $id);
|
|
if ($post && is_object($options['source']) && $post->id != $options['source']->id) {
|
|
$posts[] = $post;
|
|
$similarity[spl_object_hash($post)] = $sim;
|
|
}
|
|
} elseif ($service) {
|
|
$post = new ExternalPost();
|
|
$post->id = (string)$next_id;
|
|
$next_id++;
|
|
$post->md5 = $md5;
|
|
$post->preview_url = $get_attr($element, 'preview');
|
|
if ($service == 'gelbooru.com') # hack
|
|
$post->url = "http://" . $service . "/index.php?page=post&s=view&id=" . $id;
|
|
elseif ($service == "e-shuushuu.net") # hack
|
|
$post->url = "http://" . $service . "/image/" . $id . "/";
|
|
else
|
|
$post->url = "http://" . $service . "/post/show/" . $id;
|
|
$post->sample_url = $get_attr($image, 'sample_url') ?: $post->url;
|
|
$post->service = $service;
|
|
$post->width = $get_attr($image, 'width');
|
|
$post->height = $get_attr($image, 'height');
|
|
$post->tags = $get_attr($image, 'tags') ?: '';
|
|
|
|
if (empty($options['data_search']))
|
|
$post->rating = $get_attr($image, 'rating') ?: 's';
|
|
else
|
|
$post->rating = $get_attr($image, 'rating') ?: false;
|
|
|
|
# Extra attributes.
|
|
if (!empty($options['data_search'])) {
|
|
$post->original_preview_url = $get_attr($image, 'preview_url');
|
|
$post->id = $get_attr($image, 'id');
|
|
$post->author = $get_attr($image, 'author');
|
|
$post->created_at = $get_attr($image, 'created_at');
|
|
$post->creator_id = $get_attr($image, 'creator_id');
|
|
$post->file_size = $get_attr($image, 'file_size');
|
|
$post->file_url = $get_attr($image, 'file_url');
|
|
$post->score = $get_attr($image, 'score');
|
|
$post->source = $get_attr($image, 'source');
|
|
$post->icon_path = ExternalPost::get_service_icon($service);
|
|
if (preg_match('/\.png$/', $post->file_url))
|
|
$post->has_png = true;
|
|
}
|
|
|
|
$posts_external[] = $post;
|
|
|
|
$similarity[spl_object_hash($post)] = $sim;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$posts->sort(function($a, $b) {
|
|
$aid = spl_object_hash($a);
|
|
$bid = spl_object_hash($b);
|
|
if ($similarity[$aid] == $similarity[$bid])
|
|
return 0;
|
|
elseif ($similarity[$aid] > $similarity[$bid])
|
|
return 1;
|
|
return -1;
|
|
});
|
|
|
|
foreach ($errors as $server => $error) {
|
|
if (empty($error['services']))
|
|
$error['services'] = !empty($services_by_server[$server]) ? $services_by_server[$server] : $server;
|
|
}
|
|
$ret = ['posts' => $posts, 'posts_external' => $posts_external, 'similarity' => $similarity, 'services' => $services, 'errors' => $errors];
|
|
if ($options['type'] == 'post') {
|
|
$ret['source'] = $options['source'];
|
|
$ret['similarity'][spl_object_hash($options['source'])] = 'Original';
|
|
$ret['search_id'] = $ret['source']->id;
|
|
} else {
|
|
$post = new ExternalPost();
|
|
# $post->md5 = $md5;
|
|
$post->preview_url = $options['source_thumb'];
|
|
if (!empty($options['full_url']))
|
|
$post->url = $options['full_url'];
|
|
elseif (!empty($options['url']))
|
|
$post->url = $options['url'];
|
|
elseif (!empty($options['source_thumb']))
|
|
$post->url = $options['source_thumb'];
|
|
$post->id = 'source';
|
|
$post->rating = 'q';
|
|
$ret['search_id'] = 'source';
|
|
|
|
# Don't include the source URL if it's a data: url; it can be very large and isn't useful.
|
|
if (substr($post->url, 0, 5) == "data:")
|
|
$post->url = "";
|
|
|
|
list ($source_width, $source_height) = getimagesize($source_file);
|
|
|
|
# Since we lose access to the original image when we redirect to a saved search,
|
|
# the original dimensions can be passed as parameters so we can still display
|
|
# the original size. This can also be used by user scripts to include the
|
|
# size of the real image when a thumbnail is passed.
|
|
$post->width = !empty($options['width']) ? $options['width'] : $source_width;
|
|
$post->height = !empty($options['height']) ? $options['height'] : $source_height;
|
|
|
|
$ret['external_source'] = $post;
|
|
$ret['similarity'][spl_object_hash($post)] = "Original";
|
|
}
|
|
|
|
return $ret;
|
|
}
|
|
|
|
# Save a file locally to be searched for. Returns the path to the saved file, and
|
|
# the search ID which can be passed to find_saved_search.
|
|
#
|
|
# MyImouto: this method receives the file contents, not a path to a file.
|
|
static public function save_search($file_contents)
|
|
{
|
|
$tempfile_path_resize = $tempfile_path = $file_path = null;
|
|
try {
|
|
if (!is_dir(self::search_cache_dir()))
|
|
mkdir(self::search_cache_dir());
|
|
|
|
while (true) {
|
|
$tempfile_path = self::search_cache_dir() . "/" . uniqid('', true) . ".upload";
|
|
if (!is_file($tempfile_path))
|
|
break;
|
|
}
|
|
$fh = fopen($tempfile_path, 'a');
|
|
fclose($fh);
|
|
|
|
file_put_contents($tempfile_path, $file_contents);
|
|
|
|
# Use the resizer to validate the file and convert it to a thumbnail-size JPEG.
|
|
$imgsize = getimagesize($tempfile_path);
|
|
|
|
$exts = [
|
|
false,
|
|
'gif',
|
|
'jpg',
|
|
'png',
|
|
'swf',
|
|
'psd',
|
|
'bmp',
|
|
'tiff',
|
|
'tiff',
|
|
'jpc',
|
|
'jp2',
|
|
'jpx',
|
|
'jb2',
|
|
'swc',
|
|
'iff',
|
|
'wbmp',
|
|
'xbm'
|
|
];
|
|
|
|
if (!$imgsize || !$imgsize[2] || !isset($exts[$imgsize[2]])) {
|
|
throw new Moebooru\Exception\ResizeErrorException("Unrecognized image format");
|
|
}
|
|
|
|
$ret = [];
|
|
$ret['original_width'] = $imgsize[0];
|
|
$ret['original_height'] = $imgsize[1];
|
|
$size = Moebooru\Resizer::reduce_to(['width' => $ret['original_width'], 'height' => $ret['original_height']], ['width' => 150, 'height' => 150]);
|
|
$ext = $exts[$imgsize[2]];
|
|
|
|
$tempfile_path_resize = $tempfile_path . ".2";
|
|
Moebooru\Resizer::resize($ext, $tempfile_path, $tempfile_path_resize, $size, 95);
|
|
rename($tempfile_path_resize, $tempfile_path);
|
|
|
|
$md5 = md5_file($tempfile_path);
|
|
$id = $md5 . "." . $ext;
|
|
$file_path = self::search_cache_dir() . "/" . $id;
|
|
|
|
rename($tempfile_path, $file_path);
|
|
|
|
# Finally block
|
|
if (is_dir($tempfile_path))
|
|
rmdir($tempfile_path);
|
|
if (is_file($tempfile_path_resize))
|
|
rmdir($tempfile_path_resize);
|
|
|
|
// chmod($file_path, 0664);
|
|
} catch (Exception $e) {
|
|
# Finally block
|
|
if (is_dir($tempfile_path))
|
|
rmdir($tempfile_path);
|
|
if (is_file($tempfile_path_resize))
|
|
rmdir($tempfile_path_resize);
|
|
|
|
if (is_dir($file_path))
|
|
rmdir($file_path);
|
|
throw $e;
|
|
}
|
|
/*
|
|
TODO:
|
|
finally {
|
|
if (is_dir($tempfile_path))
|
|
rmdir($tempfile_path);
|
|
if (is_file($tempfile_path_resize))
|
|
rmdir($tempfile_path_resize);
|
|
}
|
|
*/
|
|
$ret['file_path'] = $file_path;
|
|
$ret['search_id'] = $id;
|
|
return $ret;
|
|
}
|
|
|
|
static public function valid_saved_search($id)
|
|
{
|
|
return (bool)preg_match('/\A[a-zA-Z0-9]{32}\.[a-z]+\Z/', $id);
|
|
}
|
|
|
|
# Find a saved file.
|
|
static public function find_saved_search($id)
|
|
{
|
|
if (!self::valid_saved_search($id))
|
|
return;
|
|
|
|
$file_path = self::search_cache_dir() . "/" . $id;
|
|
if (!is_file($file_path))
|
|
return;
|
|
|
|
# Touch the file to delay its deletion.
|
|
fopen($file_path, 'a');
|
|
return $file_path;
|
|
}
|
|
|
|
# Delete old searches.
|
|
static public function cull_old_searches()
|
|
{
|
|
$dh = opendir(self::search_cache_dir());
|
|
|
|
while (false !== ($path = readdir($dh))) {
|
|
if ($path == '.' || $path == '..' || !self::valid_saved_search($path))
|
|
continue;
|
|
$file = self::search_cache_dir() . '/' . $path;
|
|
$mtime = Rails\Toolbox\FileTools::modTime($file);
|
|
$age = time() - $mtime;
|
|
if ($age > 60*60*24)
|
|
unlink($file);
|
|
}
|
|
|
|
closedir($dh);
|
|
}
|
|
|
|
static public function search_cache_dir()
|
|
{
|
|
return Rails::publicPath() . self::SEARCH_CACHE_DIR;
|
|
}
|
|
}
|