b77be4a59a
- Correctly set-up cURL multi. - Removed exception thrown if one request returned empty response. Instead, the error is logged and the script continues.
428 lines
16 KiB
PHP
Executable File
428 lines
16 KiB
PHP
Executable File
<?php
|
|
class SimilarImages
|
|
{
|
|
const SEARCH_CACHE_DIR = "/data/search";
|
|
|
|
static public function get_services($services = null)
|
|
{
|
|
!$services && $services = "local";
|
|
|
|
if ($services == "all") {
|
|
$services = array_keys(CONFIG()->image_service_list);
|
|
} else {
|
|
$services = explode(',', $services);
|
|
}
|
|
|
|
foreach (array_keys($services) as $i) {
|
|
if ($services[$i] == "local")
|
|
$services[$i] = CONFIG()->local_image_service;
|
|
}
|
|
return $services;
|
|
}
|
|
|
|
static public function similar_images($options = [])
|
|
{
|
|
$errors = [];
|
|
$local_service = CONFIG()->local_image_service;
|
|
$services = $options['services'];
|
|
$services_by_server = [];
|
|
|
|
foreach ($services as $service) {
|
|
if (!isset(CONFIG()->image_service_list[$service]) || !($server = CONFIG()->image_service_list[$service])) {
|
|
$errors[] = ['services' => [$service], 'message' => $service . " is an unknown service"];
|
|
continue;
|
|
}
|
|
if (!isset($services_by_server[$server]))
|
|
$services_by_server[$server] = [];
|
|
$services_by_server[$server][] = $service;
|
|
}
|
|
|
|
if (!$services_by_server)
|
|
return ['posts' => new Rails\ActiveRecord\Collection(), 'posts_external' => new Rails\ActiveRecord\Collection(), 'similarity' => [], 'services' => [], 'errors' => 'No service selected/no local service'];
|
|
|
|
# If the source is a local post, read the preview and send it with the request.
|
|
if ($options['type'] == 'post') {
|
|
$source_file = $options['source']->preview_path();
|
|
} elseif ($options['type'] == 'file') {
|
|
$source_file = $options['source'];
|
|
}
|
|
|
|
$server_threads = [];
|
|
$server_responses = [];
|
|
$curl_opts = [
|
|
CURLOPT_TIMEOUT => 5,
|
|
CURLOPT_POST => true,
|
|
CURLOPT_RETURNTRANSFER => true
|
|
];
|
|
$mh = curl_multi_init();
|
|
$chk = -1;
|
|
|
|
foreach ($services_by_server as $services_list) {
|
|
$chk++;
|
|
|
|
$search_url = null;
|
|
|
|
if ($options['type'] == 'url')
|
|
$search_url = $options['source'];
|
|
if ($options['type'] == 'post' && CONFIG()->image_service_local_searches_use_urls)
|
|
$search_url = $options['source']['preview_url'];
|
|
|
|
$params = [];
|
|
if ($search_url) {
|
|
$params['url'] = $search_url;
|
|
} else {
|
|
$params['file'] = '@' . $source_file;
|
|
}
|
|
|
|
foreach ($services_list as $k => $s)
|
|
$params["service[$k]"] = $s;
|
|
|
|
$chn = 'ch' . $chk;
|
|
$$chn = curl_init($server);
|
|
|
|
curl_setopt_array($$chn, $curl_opts);
|
|
curl_setopt($$chn, CURLOPT_POSTFIELDS, $params);
|
|
|
|
curl_setopt($$chn, CURLOPT_CONNECTTIMEOUT, 4);
|
|
curl_setopt($$chn, CURLOPT_HTTPHEADER, ['Host: ' . parse_url($server)['host']]);
|
|
|
|
curl_multi_add_handle($mh, $$chn);
|
|
}
|
|
$ch_count = $chk;
|
|
|
|
$active = null;
|
|
|
|
do {
|
|
$ret = curl_multi_exec($mh, $active);
|
|
} while ($ret == CURLM_CALL_MULTI_PERFORM);
|
|
|
|
|
|
while ($active && $ret == CURLM_OK) {
|
|
if (curl_multi_select($mh) != -1) {
|
|
usleep(100);
|
|
}
|
|
do {
|
|
$mrc = curl_multi_exec($mh, $active);
|
|
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
|
|
}
|
|
|
|
$posts = new Rails\ActiveRecord\Collection();
|
|
$posts_external = new Rails\ActiveRecord\Collection();
|
|
$similarity = [];
|
|
$preview_url = "";
|
|
$next_id = 1;
|
|
$server_list = array_keys($services_by_server);
|
|
|
|
/**
|
|
* Is there a class for PHP that can nicely handle XML?
|
|
*/
|
|
$get_attr = function($xml, $attr) {
|
|
$obj = $xml->attributes()->$attr;
|
|
if ($obj) {
|
|
$obj = (array)$obj;
|
|
return $obj[0];
|
|
}
|
|
return null;
|
|
};
|
|
|
|
foreach(range(0, $ch_count) as $i) {
|
|
$chn = 'ch' . $i;
|
|
$server = $server_list[$i];
|
|
|
|
$resp = curl_multi_getcontent($$chn);
|
|
|
|
if (!$resp) {
|
|
$curl_err = curl_error($$chn);
|
|
if (preg_match('/^Operation timed out/', $curl_err)) {
|
|
$err_msg = 'timed out';
|
|
Rails::log()->notice(
|
|
"[SimilarImages] cURL timed out: " . $curl_err
|
|
);
|
|
} else {
|
|
$err_msg = 'empty response';
|
|
Rails::log()->warning(sprintf(
|
|
"[SimilarImages] cURL error: (%s) %s", curl_errno($$chn), $curl_err
|
|
));
|
|
}
|
|
$errors[$server] = [ 'message' => $err_msg ];
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
$doc = new SimpleXMLElement($resp);
|
|
} catch (Exception $e) {
|
|
ob_start();
|
|
var_dump(curl_getinfo($$chn));
|
|
$info = ob_get_clean();
|
|
Rails::log()->error("Similar Images Error\ncURL Error: " . curl_error($$chn) . "\ncURL Info:\n" . $info);
|
|
|
|
Rails::log()->exception($e);
|
|
$errors[$server] = [ 'message' => 'parse error' ];
|
|
continue;
|
|
}
|
|
|
|
if ($doc->getName() == 'error') {
|
|
$errors[$server] = [ 'message' => $doc->message ];
|
|
continue;
|
|
} elseif ($doc->getName() != 'matches') {
|
|
$errors[$server] = [ 'message' => 'invalid response' ];
|
|
continue;
|
|
}
|
|
|
|
$threshold = !empty($options['threshold']) ? $options['threshold'] : (float)$get_attr($doc, 'threshold');
|
|
|
|
foreach ($doc->match as $element) {
|
|
$sim = (float)$get_attr($element, 'sim');
|
|
|
|
if ($sim >= $threshold and $sim > 0) {
|
|
$service = $get_attr($element, 'service');
|
|
$image = $element->post;
|
|
|
|
$id = $get_attr($image, 'id');
|
|
$md5 = $get_attr($image, 'md5');
|
|
|
|
if ($service == $local_service) {
|
|
$post = Post::where('id = ?', $id);
|
|
if ($post && is_object($options['source']) && $post->id != $options['source']->id) {
|
|
$posts[] = $post;
|
|
$similarity[spl_object_hash($post)] = $sim;
|
|
}
|
|
} elseif ($service) {
|
|
$post = new ExternalPost();
|
|
$post->id = (string)$next_id;
|
|
$next_id++;
|
|
$post->md5 = $md5;
|
|
$post->preview_url = $get_attr($element, 'preview');
|
|
if ($service == 'gelbooru.com') # hack
|
|
$post->url = "http://" . $service . "/index.php?page=post&s=view&id=" . $id;
|
|
elseif ($service == "e-shuushuu.net") # hack
|
|
$post->url = "http://" . $service . "/image/" . $id . "/";
|
|
else
|
|
$post->url = "http://" . $service . "/post/show/" . $id;
|
|
$post->sample_url = $get_attr($image, 'sample_url') ?: $post->url;
|
|
$post->service = $service;
|
|
$post->width = $get_attr($image, 'width');
|
|
$post->height = $get_attr($image, 'height');
|
|
$post->tags = $get_attr($image, 'tags') ?: '';
|
|
|
|
if (empty($options['data_search']))
|
|
$post->rating = $get_attr($image, 'rating') ?: 's';
|
|
else
|
|
$post->rating = $get_attr($image, 'rating') ?: false;
|
|
|
|
# Extra attributes.
|
|
if (!empty($options['data_search'])) {
|
|
$post->original_preview_url = $get_attr($image, 'preview_url');
|
|
$post->id = $get_attr($image, 'id');
|
|
$post->author = $get_attr($image, 'author');
|
|
$post->created_at = $get_attr($image, 'created_at');
|
|
$post->creator_id = $get_attr($image, 'creator_id');
|
|
$post->file_size = $get_attr($image, 'file_size');
|
|
$post->file_url = $get_attr($image, 'file_url');
|
|
$post->score = $get_attr($image, 'score');
|
|
$post->source = $get_attr($image, 'source');
|
|
$post->icon_path = ExternalPost::get_service_icon($service);
|
|
if (preg_match('/\.png$/', $post->file_url))
|
|
$post->has_png = true;
|
|
}
|
|
|
|
$posts_external[] = $post;
|
|
|
|
$similarity[spl_object_hash($post)] = $sim;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$posts->sort(function($a, $b) {
|
|
$aid = spl_object_hash($a);
|
|
$bid = spl_object_hash($b);
|
|
if ($similarity[$aid] == $similarity[$bid])
|
|
return 0;
|
|
elseif ($similarity[$aid] > $similarity[$bid])
|
|
return 1;
|
|
return -1;
|
|
});
|
|
|
|
foreach ($errors as $server => $error) {
|
|
if (empty($error['services']))
|
|
$error['services'] = !empty($services_by_server[$server]) ? $services_by_server[$server] : $server;
|
|
}
|
|
$ret = ['posts' => $posts, 'posts_external' => $posts_external, 'similarity' => $similarity, 'services' => $services, 'errors' => $errors];
|
|
if ($options['type'] == 'post') {
|
|
$ret['source'] = $options['source'];
|
|
$ret['similarity'][spl_object_hash($options['source'])] = 'Original';
|
|
$ret['search_id'] = $ret['source']->id;
|
|
} else {
|
|
$post = new ExternalPost();
|
|
# $post->md5 = $md5;
|
|
$post->preview_url = $options['source_thumb'];
|
|
if (!empty($options['full_url']))
|
|
$post->url = $options['full_url'];
|
|
elseif (!empty($options['url']))
|
|
$post->url = $options['url'];
|
|
elseif (!empty($options['source_thumb']))
|
|
$post->url = $options['source_thumb'];
|
|
$post->id = 'source';
|
|
$post->rating = 'q';
|
|
$ret['search_id'] = 'source';
|
|
|
|
# Don't include the source URL if it's a data: url; it can be very large and isn't useful.
|
|
if (substr($post->url, 0, 5) == "data:")
|
|
$post->url = "";
|
|
|
|
list ($source_width, $source_height) = getimagesize($source_file);
|
|
|
|
# Since we lose access to the original image when we redirect to a saved search,
|
|
# the original dimensions can be passed as parameters so we can still display
|
|
# the original size. This can also be used by user scripts to include the
|
|
# size of the real image when a thumbnail is passed.
|
|
$post->width = !empty($options['width']) ? $options['width'] : $source_width;
|
|
$post->height = !empty($options['height']) ? $options['height'] : $source_height;
|
|
|
|
$ret['external_source'] = $post;
|
|
$ret['similarity'][spl_object_hash($post)] = "Original";
|
|
}
|
|
|
|
return $ret;
|
|
}
|
|
|
|
# Save a file locally to be searched for. Returns the path to the saved file, and
|
|
# the search ID which can be passed to find_saved_search.
|
|
#
|
|
# MyImouto: this method receives the file contents, not a path to a file.
|
|
static public function save_search($file_contents)
|
|
{
|
|
$tempfile_path_resize = $tempfile_path = $file_path = null;
|
|
try {
|
|
if (!is_dir(self::search_cache_dir()))
|
|
mkdir(self::search_cache_dir());
|
|
|
|
while (true) {
|
|
$tempfile_path = self::search_cache_dir() . "/" . uniqid('', true) . ".upload";
|
|
if (!is_file($tempfile_path))
|
|
break;
|
|
}
|
|
$fh = fopen($tempfile_path, 'a');
|
|
fclose($fh);
|
|
|
|
file_put_contents($tempfile_path, $file_contents);
|
|
|
|
# Use the resizer to validate the file and convert it to a thumbnail-size JPEG.
|
|
$imgsize = getimagesize($tempfile_path);
|
|
|
|
$exts = [
|
|
false,
|
|
'gif',
|
|
'jpg',
|
|
'png',
|
|
'swf',
|
|
'psd',
|
|
'bmp',
|
|
'tiff',
|
|
'tiff',
|
|
'jpc',
|
|
'jp2',
|
|
'jpx',
|
|
'jb2',
|
|
'swc',
|
|
'iff',
|
|
'wbmp',
|
|
'xbm'
|
|
];
|
|
|
|
if (!$imgsize || !$imgsize[2] || !isset($exts[$imgsize[2]])) {
|
|
throw new Moebooru\Exception\ResizeErrorException("Unrecognized image format");
|
|
}
|
|
|
|
$ret = [];
|
|
$ret['original_width'] = $imgsize[0];
|
|
$ret['original_height'] = $imgsize[1];
|
|
$size = Moebooru\Resizer::reduce_to(['width' => $ret['original_width'], 'height' => $ret['original_height']], ['width' => 150, 'height' => 150]);
|
|
$ext = $exts[$imgsize[2]];
|
|
|
|
$tempfile_path_resize = $tempfile_path . ".2";
|
|
Moebooru\Resizer::resize($ext, $tempfile_path, $tempfile_path_resize, $size, 95);
|
|
rename($tempfile_path_resize, $tempfile_path);
|
|
|
|
$md5 = md5_file($tempfile_path);
|
|
$id = $md5 . "." . $ext;
|
|
$file_path = self::search_cache_dir() . "/" . $id;
|
|
|
|
rename($tempfile_path, $file_path);
|
|
|
|
# Finally block
|
|
if (is_dir($tempfile_path))
|
|
rmdir($tempfile_path);
|
|
if (is_file($tempfile_path_resize))
|
|
rmdir($tempfile_path_resize);
|
|
|
|
// chmod($file_path, 0664);
|
|
} catch (Exception $e) {
|
|
# Finally block
|
|
if (is_dir($tempfile_path))
|
|
rmdir($tempfile_path);
|
|
if (is_file($tempfile_path_resize))
|
|
rmdir($tempfile_path_resize);
|
|
|
|
if (is_dir($file_path))
|
|
rmdir($file_path);
|
|
throw $e;
|
|
}
|
|
/*
|
|
TODO:
|
|
finally {
|
|
if (is_dir($tempfile_path))
|
|
rmdir($tempfile_path);
|
|
if (is_file($tempfile_path_resize))
|
|
rmdir($tempfile_path_resize);
|
|
}
|
|
*/
|
|
$ret['file_path'] = $file_path;
|
|
$ret['search_id'] = $id;
|
|
return $ret;
|
|
}
|
|
|
|
static public function valid_saved_search($id)
|
|
{
|
|
return (bool)preg_match('/\A[a-zA-Z0-9]{32}\.[a-z]+\Z/', $id);
|
|
}
|
|
|
|
# Find a saved file.
|
|
static public function find_saved_search($id)
|
|
{
|
|
if (!self::valid_saved_search($id))
|
|
return;
|
|
|
|
$file_path = self::search_cache_dir() . "/" . $id;
|
|
if (!is_file($file_path))
|
|
return;
|
|
|
|
# Touch the file to delay its deletion.
|
|
fopen($file_path, 'a');
|
|
return $file_path;
|
|
}
|
|
|
|
# Delete old searches.
|
|
static public function cull_old_searches()
|
|
{
|
|
$dh = opendir(self::search_cache_dir());
|
|
|
|
while (false !== ($path = readdir($dh))) {
|
|
if ($path == '.' || $path == '..' || !self::valid_saved_search($path))
|
|
continue;
|
|
$file = self::search_cache_dir() . '/' . $path;
|
|
$mtime = Rails\Toolbox\FileTools::modTime($file);
|
|
$age = time() - $mtime;
|
|
if ($age > 60*60*24)
|
|
unlink($file);
|
|
}
|
|
|
|
closedir($dh);
|
|
}
|
|
|
|
static public function search_cache_dir()
|
|
{
|
|
return Rails::publicPath() . self::SEARCH_CACHE_DIR;
|
|
}
|
|
} |