Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1427605
captcha.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
captcha.py
View Options
#!/usr/bin/python3
#
# Script to generate distorted text images for a captcha system.
#
# Copyright (C) 2005 Neil Harris
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Further tweaks by Brion Vibber <brion@pobox.com>:
# 2006-01-26: Add command-line options for the various parameters
# 2007-02-19: Add --dirs param for hash subdirectory splits
# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
# 2008-01-06: Add regex check to skip words containing other than a-z
from
optparse
import
OptionParser
import
hashlib
import
json
import
math
import
multiprocessing
import
os
import
random
import
re
import
sys
try
:
from
PIL
import
Image
,
ImageDraw
,
ImageEnhance
,
ImageFont
,
ImageOps
except
ImportError
:
sys
.
exit
(
"This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/"
)
# regex to test for suitability of words
nonalpha
=
re
.
compile
(
"[^a-z]"
)
# when il beside each other, hard to read
confusedletters
=
re
.
compile
(
"[ijtlr][ijtl]|r[nompqr]|[il]"
)
# Pillow 9.2 added getbbox to replace getsize, and getsize() was removed in Pillow 10
# https://pillow.readthedocs.io/en/stable/releasenotes/10.0.0.html#font-size-and-offset-methods
# We don't have a requirements.txt, and therefore don't declare any specific supported or min version...
IMAGEFONT_HAS_GETBBOX
=
hasattr
(
ImageFont
.
ImageFont
,
"getbbox"
)
# Does X-axis wobbly copy, sandwiched between two rotates
def
wobbly_copy
(
src
,
wob
,
col
,
scale
,
ang
):
x
,
y
=
src
.
size
f
=
random
.
uniform
(
4
*
scale
,
5
*
scale
)
p
=
random
.
uniform
(
0
,
math
.
pi
*
2
)
rr
=
ang
+
random
.
uniform
(
-
30
,
30
)
# vary, but not too much
int_d
=
Image
.
new
(
"RGB"
,
src
.
size
,
0
)
# a black rectangle
rot
=
src
.
rotate
(
rr
,
Image
.
BILINEAR
)
# Do a cheap bounding-box op here to try to limit work below
bbx
=
rot
.
getbbox
()
if
bbx
is
None
:
return
src
else
:
l
,
t
,
r
,
b
=
bbx
# and only do lines with content on
for
i
in
range
(
t
,
b
+
1
):
# Drop a scan line in
xoff
=
int
(
math
.
sin
(
p
+
(
i
*
f
/
y
))
*
wob
)
xoff
+=
int
(
random
.
uniform
(
-
wob
*
0.5
,
wob
*
0.5
))
int_d
.
paste
(
rot
.
crop
((
0
,
i
,
x
,
i
+
1
)),
(
xoff
,
i
))
# try to stop blurring from building up
int_d
=
int_d
.
rotate
(
-
rr
,
Image
.
BILINEAR
)
enh
=
ImageEnhance
.
Sharpness
(
int_d
)
return
enh
.
enhance
(
2
)
def
gen_captcha
(
text
,
fontname
,
fontsize
,
file_name
):
"""Generate a captcha image"""
# white text on a black background
bgcolor
=
0x0
fgcolor
=
0xFFFFFF
# create a font object
font
=
ImageFont
.
truetype
(
fontname
,
fontsize
)
# determine dimensions of the text
if
IMAGEFONT_HAS_GETBBOX
:
dim
=
font
.
getbbox
(
text
)[
2
:]
else
:
dim
=
font
.
getsize
(
text
)
# create a new image significantly larger that the text
edge
=
max
(
dim
[
0
],
dim
[
1
])
+
2
*
min
(
dim
[
0
],
dim
[
1
])
im
=
Image
.
new
(
"RGB"
,
(
edge
,
edge
),
bgcolor
)
d
=
ImageDraw
.
Draw
(
im
)
x
,
y
=
im
.
size
# add the text to the image
# Using between 5-6 pixels of negative kerning seemed
# enough to confuse tesseract but still be very readable
offset
=
0
for
c
in
text
:
d
.
text
(
(
x
/
2
-
dim
[
0
]
/
2
+
offset
,
y
/
2
-
dim
[
1
]
/
2
+
random
.
uniform
(
-
3
,
7
)),
c
,
font
=
font
,
fill
=
fgcolor
,
)
if
IMAGEFONT_HAS_GETBBOX
:
offset
+=
font
.
getbbox
(
c
)[
2
:][
0
]
else
:
offset
+=
font
.
getsize
(
c
)[
0
]
offset
-=
random
.
uniform
(
5
,
6
)
for
i
in
range
(
10
):
x0
=
int
(
offset
*
((
i
/
2
)
-
1
)
/
5
+
x
/
2
-
dim
[
0
]
/
2
+
random
.
uniform
(
0
,
10
)
)
y0
=
int
(
y
/
2
-
dim
[
1
]
+
30
+
random
.
uniform
(
-
10
,
15
))
x1
=
int
(
offset
*
i
/
7
+
x
/
2
-
dim
[
0
]
/
2
+
random
.
uniform
(
-
5
,
5
))
y1
=
int
(
y
/
2
-
dim
[
1
]
+
30
+
random
.
uniform
(
-
10
,
30
))
if
x1
<
x0
:
x0
,
x1
=
x1
,
x0
if
y1
<
y0
:
y0
,
y1
=
y1
,
y0
d
.
arc
(
(
x0
,
y0
,
x1
,
y1
),
int
(
random
.
uniform
(
-
30
,
30
)),
int
(
random
.
uniform
(
160
,
300
)),
fill
=
fgcolor
,
)
# now get the bounding box of the nonzero parts of the image
bbox
=
im
.
getbbox
()
bord
=
min
(
dim
[
0
],
dim
[
1
])
/
4
# a bit of a border
im
=
im
.
crop
((
bbox
[
0
]
-
bord
,
bbox
[
1
]
-
bord
,
bbox
[
2
]
+
bord
,
bbox
[
3
]
+
bord
))
# and turn into black on white
im
=
ImageOps
.
invert
(
im
)
# save the image, in format determined from filename
im
.
save
(
file_name
)
def
gen_subdir
(
basedir
,
md5hash
,
levels
):
"""Generate a subdirectory path out of the first _levels_
characters of _hash_, and ensure the directories exist
under _basedir_."""
subdir
=
None
for
i
in
range
(
0
,
levels
):
char
=
md5hash
[
i
]
if
subdir
:
subdir
=
os
.
path
.
join
(
subdir
,
char
)
else
:
subdir
=
char
fulldir
=
os
.
path
.
join
(
basedir
,
subdir
)
if
not
os
.
path
.
exists
(
fulldir
):
os
.
mkdir
(
fulldir
)
return
subdir
def
try_pick_word
(
words
,
badwordlist
,
verbose
,
nwords
,
min_length
,
max_length
):
if
words
is
not
None
:
word
=
words
[
random
.
randint
(
0
,
len
(
words
)
-
1
)]
while
nwords
>
1
:
word2
=
words
[
random
.
randint
(
0
,
len
(
words
)
-
1
)]
word
=
word
+
word2
nwords
=
nwords
-
1
else
:
word
=
""
max_length
=
max_length
if
max_length
>
0
else
10
for
i
in
range
(
0
,
random
.
randint
(
min_length
,
max_length
)):
word
=
word
+
chr
(
97
+
random
.
randint
(
0
,
25
))
if
verbose
:
print
(
"word is
%s
"
%
word
)
if
len
(
word
)
<
min_length
:
if
verbose
:
print
(
"skipping word pair '
%s
' because it has fewer than
%d
characters"
%
(
word
,
min_length
)
)
return
None
if
max_length
>
0
and
len
(
word
)
>
max_length
:
if
verbose
:
print
(
"skipping word pair '
%s
' because it has more than
%d
characters"
%
(
word
,
max_length
)
)
return
None
if
nonalpha
.
search
(
word
):
if
verbose
:
print
(
"skipping word pair '
%s
' because it contains non-alphabetic characters"
%
word
)
return
None
if
confusedletters
.
search
(
word
):
if
verbose
:
print
(
"skipping word pair '
%s
' because it contains confusing letters beside each other"
%
word
)
return
None
for
naughty
in
badwordlist
:
if
naughty
in
word
:
if
verbose
:
print
(
"skipping word pair '
%s
' because it contains word '
%s
'"
%
(
word
,
naughty
)
)
return
None
return
word
def
pick_word
(
words
,
badwordlist
,
verbose
,
nwords
,
min_length
,
max_length
):
for
x
in
range
(
1000
):
# If we can't find a valid combination in 1000 tries, just give up
word
=
try_pick_word
(
words
,
badwordlist
,
verbose
,
nwords
,
min_length
,
max_length
)
if
word
:
return
word
sys
.
exit
(
"Unable to find valid word combinations"
)
def
read_wordlist
(
filename
):
if
not
os
.
path
.
isfile
(
filename
):
return
[]
f
=
open
(
filename
)
words
=
[
x
.
strip
()
.
lower
()
for
x
in
f
.
readlines
()]
f
.
close
()
return
words
def
run_in_thread
(
object
):
count
=
object
[
0
]
words
=
object
[
1
]
badwordlist
=
object
[
2
]
opts
=
object
[
3
]
font
=
object
[
4
]
fontsize
=
object
[
5
]
jsonmap
=
object
[
6
]
for
i
in
range
(
count
):
word
=
pick_word
(
words
,
badwordlist
,
opts
.
verbose
,
opts
.
number_words
,
opts
.
min_length
,
opts
.
max_length
,
)
salt
=
"
%08x
"
%
random
.
randrange
(
2
**
32
)
# 64 bits of hash is plenty for this purpose
md5hash
=
hashlib
.
md5
(
(
opts
.
key
+
salt
+
word
+
opts
.
key
+
salt
)
.
encode
(
"utf-8"
)
)
.
hexdigest
()[:
16
]
filename
=
"image_
%s
_
%s
.png"
%
(
salt
,
md5hash
)
if
opts
.
dirs
:
subdir
=
gen_subdir
(
opts
.
output
,
md5hash
,
opts
.
dirs
)
filename
=
os
.
path
.
join
(
subdir
,
filename
)
if
opts
.
verbose
:
print
(
filename
)
if
opts
.
jsonmap
:
jsonmap
[
filename
]
=
word
gen_captcha
(
word
,
font
,
fontsize
,
os
.
path
.
join
(
opts
.
output
,
filename
))
if
__name__
==
"__main__"
:
"""This grabs random words from the dictionary 'words' (one
word per line) and generates a captcha image for each one,
with a keyed salted hash of the correct answer in the filename.
To check a reply, hash it in the same way with the same salt and
secret key, then compare with the hash value given.
"""
script_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
parser
=
OptionParser
()
parser
.
add_option
(
"--wordlist"
,
help
=
"A list of words (required)"
,
metavar
=
"WORDS.txt"
)
parser
.
add_option
(
"--random"
,
help
=
"Use random characters instead of a wordlist"
,
action
=
"store_true"
,
)
parser
.
add_option
(
"--key"
,
help
=
"The passphrase set as $wgCaptchaSecret (required)"
,
metavar
=
"KEY"
)
parser
.
add_option
(
"--output"
,
help
=
"The directory to put the images in - $wgCaptchaDirectory (required)"
,
metavar
=
"DIR"
,
)
parser
.
add_option
(
"--font"
,
help
=
"The font to use (required)"
,
metavar
=
"FONT.ttf"
)
parser
.
add_option
(
"--font-size"
,
help
=
"The font size (default 40)"
,
metavar
=
"N"
,
type
=
"int"
,
default
=
40
,
)
parser
.
add_option
(
"--count"
,
help
=
"The maximum number of images to make (default 20)"
,
metavar
=
"N"
,
type
=
"int"
,
default
=
20
,
)
parser
.
add_option
(
"--badwordlist"
,
help
=
"A list of words that should not be used"
,
metavar
=
"FILE"
,
default
=
os
.
path
.
join
(
script_dir
,
"badwordlist"
),
)
parser
.
add_option
(
"--fill"
,
help
=
"Fill the output directory to contain N files, overrides count, cannot be used with --dirs"
,
metavar
=
"N"
,
type
=
"int"
,
)
parser
.
add_option
(
"--dirs"
,
help
=
"Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels"
,
metavar
=
"N"
,
type
=
"int"
,
)
parser
.
add_option
(
"--verbose"
,
"-v"
,
help
=
"Show debugging information"
,
action
=
"store_true"
)
parser
.
add_option
(
"--number-words"
,
help
=
"Number of words from the wordlist which make a captcha challenge (default 2)"
,
type
=
"int"
,
default
=
2
,
)
parser
.
add_option
(
"--min-length"
,
help
=
"Minimum length for a captcha challenge"
,
type
=
"int"
,
default
=
1
,
)
parser
.
add_option
(
"--max-length"
,
help
=
"Maximum length for a captcha challenge"
,
type
=
"int"
,
default
=-
1
,
)
parser
.
add_option
(
"--threads"
,
help
=
"Maximum number of threads to be used to generate captchas"
,
type
=
"int"
,
default
=
1
,
)
parser
.
add_option
(
"--jsonmap"
,
help
=
"Outputs
\"
filename
\"
:
\"
word
\"
mapping for test/debug purposes"
,
action
=
"store_true"
)
opts
,
args
=
parser
.
parse_args
()
if
opts
.
wordlist
:
wordlist
=
opts
.
wordlist
elif
opts
.
random
:
wordlist
=
None
else
:
sys
.
exit
(
"Need to specify a wordlist"
)
if
opts
.
key
:
key
=
opts
.
key
else
:
sys
.
exit
(
"Need to specify a key"
)
if
opts
.
output
:
output
=
opts
.
output
else
:
sys
.
exit
(
"Need to specify an output directory"
)
if
opts
.
font
and
os
.
path
.
exists
(
opts
.
font
):
font
=
opts
.
font
else
:
sys
.
exit
(
"Need to specify the location of a font"
)
badwordlist
=
read_wordlist
(
opts
.
badwordlist
)
count
=
opts
.
count
fill
=
opts
.
fill
fontsize
=
opts
.
font_size
threads
=
opts
.
threads
if
fill
:
count
=
max
(
0
,
fill
-
len
(
os
.
listdir
(
output
)))
words
=
None
if
wordlist
:
words
=
read_wordlist
(
wordlist
)
words
=
[
x
for
x
in
words
if
len
(
x
)
in
(
4
,
5
)
and
x
[
0
]
!=
"f"
and
x
[
0
]
!=
x
[
1
]
and
x
[
-
1
]
!=
x
[
-
2
]
]
if
count
==
0
:
sys
.
exit
(
"No need to generate CAPTCHA images."
)
if
count
<
threads
:
chunks
=
1
threads
=
1
else
:
chunks
=
count
//
threads
p
=
multiprocessing
.
Pool
(
threads
)
data
=
[]
print
(
"Generating
%s
CAPTCHA images separated in
%s
image(s) per chunk run by
%s
threads..."
%
(
count
,
chunks
,
threads
)
)
jsonmap
=
multiprocessing
.
Manager
()
.
dict
()
for
i
in
range
(
0
,
threads
):
data
.
append
([
chunks
,
words
,
badwordlist
,
opts
,
font
,
fontsize
,
jsonmap
])
result
=
p
.
map_async
(
run_in_thread
,
data
)
result
.
wait
()
if
opts
.
jsonmap
:
with
open
(
"map.json"
,
"w"
)
as
outfile
:
json
.
dump
(
jsonmap
.
copy
(),
outfile
,
indent
=
4
)
File Metadata
Details
Attached
Mime Type
text/x-script.python
Expires
Sat, May 16, 14:51 (1 d, 3 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
90/d8/cc4dbb2d993b80590a055560e0f6
Default Alt Text
captcha.py (13 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment