-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage-text
executable file
·57 lines (44 loc) · 1.18 KB
/
image-text
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env perl
use strict;
use utf8;
use File::Copy;
$ENV{'PATH'} = $ENV{'PATH'}.':/usr/local/bin';
# Can't just use this because of the PATH issue.
# Hazel doesn't have it in PATH, so we put it there.
eval
{
require Image::OCR::Tesseract;
Image::OCR::Tesseract->import();
};
unless($@)
{
my $file = shift(@ARGV);
# Handle the tempfile so we don't accidentally trigger Hazel.
my $time = time();
my $tempfile = "/var/tmp/tesseract-$time.jpg";
copy($file,$tempfile) or die("Copy failed: $!");
my $output = Image::OCR::Tesseract::get_ocr($tempfile);
die('No Tesseract Output!') unless $output;
unlink($tempfile);
# Remove leading whitespace and newlines.
$output =~ s/^(\n|\s)+//;
# Remove trailing newlines and whitespace.
$output =~ s/(\n|\s)+$//;
# Remove all newlines
$output =~ s/\n/ /g;
# Replace / with -
$output =~ s/\//-/g;
# Replace newlines with |
$output =~ s/\n/|/g;
# Remove non-ascii characters.
$output =~ s/[^[:ascii:]]//g;
# Remove quotes and commas.
$output =~ s/(\'|\"|\,)//g;
# Remove redirect operators.
$output =~ s/(\<|\>)//g;
# Remove pound signs
$output =~ s/\#//g;
utf8::encode($output);
print $output;
}
exit;