remove all tesseract traces

2025-03-28 15:05:01 +01:00 · 2025-03-28 15:05:01 +01:00 · 6dc2736f67
commit 6dc2736f67
parent fe301f9b9e
3 changed files with 0 additions and 58 deletions
--- a/inc/config.php
+++ b/inc/config.php
@ -1022,15 +1022,6 @@
 	// Set this to true if you're using Linux and you can execute `md5sum` binary.
 	$config['gnu_md5'] = false;

-	// Use Tesseract OCR to retrieve text from images, so you can use it as a spamfilter.
-	$config['tesseract_ocr'] = false;
-
-	// Tesseract parameters
-	$config['tesseract_params'] = '';
-
-	// Tesseract preprocess command
-	$config['tesseract_preprocess_command'] = 'convert -monochrome %s -';
-
 	// Number of posts in a "View Last X Posts" page
 	$config['noko50_count'] = 50;
 	// Number of posts a thread needs before it gets a "View Last X Posts" page.
--- a/post.php
+++ b/post.php
@ -125,29 +125,6 @@ function download_file_from_url(HttpDriver $http, $file_url, $request_timeout, $
 	);
 }

-/**
- * Try extract text from the given image.
- *
- * @param array $config Instance configuration.
- * @param string $img_path The file path to the image.
- * @return string|false Returns a string with the extracted text on success (if any).
- * @throws RuntimeException Throws if executing tesseract fails.
- */
-function ocr_image(array $config, string $img_path): string {
-	// The default preprocess command is an ImageMagick b/w quantization.
-	$ret = shell_exec_error(
-		sprintf($config['tesseract_preprocess_command'], escapeshellarg($img_path))
-		 . ' | tesseract stdin stdout 2>/dev/null'
-		 . $config['tesseract_params']
-	);
-	if ($ret === false) {
-		throw new RuntimeException('Unable to run tesseract');
-	}
-
-	return trim($ret);
-}
-
-
 /**
 * Trim an image's EXIF metadata
 *
@ -1183,27 +1160,6 @@ if (isset($_POST['delete'])) {
 			$dont_copy_file = false;
 		}

-		if ($config['tesseract_ocr'] && $file['thumb'] != 'file') { // Let's OCR it!
-			$fname = $file['tmp_name'];
-
-			if ($file['height'] > 500 || $file['width'] > 500) {
-				$fname = $file['thumb'];
-			}
-
-			if ($fname !== 'spoiler') { // We don't have that much CPU time, do we?
-				try {
-					$txt = ocr_image($config, $fname);
-					if ($txt !== '') {
-						// This one has an effect, that the body is appended to a post body. So you can write a correct
-						// spamfilter.
-						$post['body_nomarkup'] .= "<tinyboard ocr image $key>" . htmlspecialchars($txt) . "</tinyboard>";
-					}
-				} catch (RuntimeException $e) {
-					$context->get(LogDriver::class)->log(LogDriver::ERROR, "Could not OCR image: {$e->getMessage()}");
-				}
-			}
-		}
-
 		if (!$dont_copy_file) {
 			if (isset($file['file_tmp'])) {
 				if (!@rename($file['tmp_name'], $file['file']))
@ -1243,11 +1199,6 @@ if (isset($_POST['delete'])) {
 		}
 		}

-	// Do filters again if OCRing
-	if ($config['tesseract_ocr'] && !hasPermission($config['mod']['bypass_filters'], $board['uri']) && !$dropped_post) {
-		do_filters($context, $post);
-	}
-
 	if (!hasPermission($config['mod']['postunoriginal'], $board['uri']) && $config['robot_enable'] && checkRobot($post['body_nomarkup']) && !$dropped_post) {
 		undoImage($post);
 		if ($config['robot_mute']) {
--- a/tmp/tesseract/.gitkeep
+++ b/tmp/tesseract/.gitkeep