remove all tesseract traces

This commit is contained in:
Zankaria 2025-03-28 15:05:01 +01:00 committed by Zankaria
parent fe301f9b9e
commit 6dc2736f67
3 changed files with 0 additions and 58 deletions

View File

@ -1022,15 +1022,6 @@
// Set this to true if you're using Linux and you can execute `md5sum` binary.
$config['gnu_md5'] = false;
// Use Tesseract OCR to retrieve text from images, so you can use it as a spamfilter.
$config['tesseract_ocr'] = false;
// Tesseract parameters
$config['tesseract_params'] = '';
// Tesseract preprocess command
$config['tesseract_preprocess_command'] = 'convert -monochrome %s -';
// Number of posts in a "View Last X Posts" page
$config['noko50_count'] = 50;
// Number of posts a thread needs before it gets a "View Last X Posts" page.

View File

@ -125,29 +125,6 @@ function download_file_from_url(HttpDriver $http, $file_url, $request_timeout, $
);
}
/**
* Try extract text from the given image.
*
* @param array $config Instance configuration.
* @param string $img_path The file path to the image.
* @return string|false Returns a string with the extracted text on success (if any).
* @throws RuntimeException Throws if executing tesseract fails.
*/
function ocr_image(array $config, string $img_path): string {
// The default preprocess command is an ImageMagick b/w quantization.
$ret = shell_exec_error(
sprintf($config['tesseract_preprocess_command'], escapeshellarg($img_path))
. ' | tesseract stdin stdout 2>/dev/null'
. $config['tesseract_params']
);
if ($ret === false) {
throw new RuntimeException('Unable to run tesseract');
}
return trim($ret);
}
/**
* Trim an image's EXIF metadata
*
@ -1183,27 +1160,6 @@ if (isset($_POST['delete'])) {
$dont_copy_file = false;
}
if ($config['tesseract_ocr'] && $file['thumb'] != 'file') { // Let's OCR it!
$fname = $file['tmp_name'];
if ($file['height'] > 500 || $file['width'] > 500) {
$fname = $file['thumb'];
}
if ($fname !== 'spoiler') { // We don't have that much CPU time, do we?
try {
$txt = ocr_image($config, $fname);
if ($txt !== '') {
// This one has an effect, that the body is appended to a post body. So you can write a correct
// spamfilter.
$post['body_nomarkup'] .= "<tinyboard ocr image $key>" . htmlspecialchars($txt) . "</tinyboard>";
}
} catch (RuntimeException $e) {
$context->get(LogDriver::class)->log(LogDriver::ERROR, "Could not OCR image: {$e->getMessage()}");
}
}
}
if (!$dont_copy_file) {
if (isset($file['file_tmp'])) {
if (!@rename($file['tmp_name'], $file['file']))
@ -1243,11 +1199,6 @@ if (isset($_POST['delete'])) {
}
}
// Do filters again if OCRing
if ($config['tesseract_ocr'] && !hasPermission($config['mod']['bypass_filters'], $board['uri']) && !$dropped_post) {
do_filters($context, $post);
}
if (!hasPermission($config['mod']['postunoriginal'], $board['uri']) && $config['robot_enable'] && checkRobot($post['body_nomarkup']) && !$dropped_post) {
undoImage($post);
if ($config['robot_mute']) {

View File