python生成简单的验证码

最近因为需要用深度学习做一些简单的验证码识别工作，网上爬取验证码的效率比较慢，于是决定采用自己写脚本生成验证码来使用。
python版本： python3.6
代码如下：
#!/usr/bin/env python3
# coding: utf-8

import sys
import os
import random
import shutil
import uuid
import itertools
import threading
from queue import Queue
# 需要pillow库，安装方法： pip3 install ${name}  （terminal下）
from PIL import Image, ImageDraw, ImageFont, ImageFilter

DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'ttf')     #字体文件夹放在与脚本同目录下，可以自行添加和修改字体
DEFAULT_FONTS = [os.path.join(DATA_DIR, 'DroidSansMono.ttf')]


class Captcha():

    def __init__(self, width=128, height=32, fonts=DEFAULT_FONTS, font_sizes=None):
        self._width = width
        self._height = height
        self._fonts = fonts
        self._font_sizes = font_sizes or (26, 28, 30)
        self._true_fonts = tuple((ImageFont.truetype(f, s)
                                  for f in self._fonts
                                  for s in self._font_sizes))

    @staticmethod
    def create_noise_dots(image, number=50):
        color = Captcha.random_color(100, 238, random.randint(220, 255))
        w, h = image.size
        draw = ImageDraw.Draw(image)
        for i in range(number):
            x1 = random.randint(0, w)
            y1 = random.randint(0, h)
            draw.point((x1, y1), fill=color)
        return image

    @staticmethod
    def create_noise_curve(image):
        color = Captcha.random_color(100, 255, random.randint(220, 255))
        w, h = image.size
        x1 = random.randint(0, int(w / 5))
        x2 = random.randint(w - int(w / 5), w)
        y1 = random.randint(int(h / 5), h - int(h / 5))
        y2 = random.randint(y1, h - int(h / 5))
        points = [x1, y1, x2, y2]
        end = random.randint(160, 200)
        start = random.randint(0, 20)
        ImageDraw.Draw(image).arc(points, start, end, fill=color)
        return image

    @staticmethod
    def random_color(start, end, opacity=None):
        red = random.randint(start, end)
        green = random.randint(start, end)
        blue = random.randint(start, end)
        return (red, green, blue) if opacity is None \
            else (red, green, blue, opacity)

    def draw_image(self, chars, background):
        """Create the CAPTCHA image itself.
        :param chars: text to be generated.
        :param background:  background color.
        """
        image = Image.new('RGB', (self._width, self._height), background)

        def _draw_character(c):
            font = random.choice(self._true_fonts)
            color = self.random_color(10, 175, random.randint(220, 255))
            w, h = ImageDraw.Draw(image).textsize(c, font=font)

            dx = random.randint(0, 3)
            dy = random.randint(0, 4)
            img = Image.new('RGBA', (w + dx, h + dy))
            ImageDraw.Draw(img).text((dx, dy), c, font=font, fill=color)

            # rotate
            img = img.crop(img.getbbox())
            img = img.rotate(random.uniform(-30, 30), Image.BILINEAR, expand=1)

            # warp
            dx = w * random.uniform(0.1, 0.3)
            dy = h * random.uniform(0.2, 0.3)
            x1 = int(random.uniform(-dx, dx))
            y1 = int(random.uniform(-dy, dy))
            x2 = int(random.uniform(-dx, dx))
            y2 = int(random.uniform(-dy, dy))
            w2 = w + abs(x1) + abs(x2)
            h2 = h + abs(y1) + abs(y2)
            data = (x1, y1,
                    -x1, h2 - y2,
                    w2 + x2, h2 + y2,
                    w2 - x2, -y1)
            img = img.resize((w2, h2))
            img = img.transform((w, h), Image.QUAD, data)
            return img

        char_images = []
        for c in chars:
            if random.random() > 0.5:
                char_images.append(_draw_character(" "))
            char_images.append(_draw_character(c))

        text_width = sum([im.size[0] for im in char_images])

        width = max(text_width, self._width)
        image = image.resize((width, self._height))

        average = int(text_width / len(chars))
        rand = int(0.25 * average)
        offset = int(average * 0.1)

        for img in char_images:
            w, h = img.size
            mask = img.split()[3]
            image.paste(img, (offset, int((self._height - h) / 2)), mask)
            offset = offset + w + random.randint(-rand, 0)

        if width != self._width:
            image = image.resize((self._width, self._height))

        return image

    def generate_captcha_image(self, chars):
        """Generate the image of the given characters.
        :param chars: text to be generated.
        """
        background = self.random_color(238, 255)
        img = self.draw_image(chars, background)
        self.create_noise_dots(img)
        self.create_noise_curve(img)
        img = img.filter(ImageFilter.SMOOTH)
        return img

    def write(self, chars, output, format='png'):
        """Generate and write an image CAPTCHA data to the output.
        :param chars: text to be generated.
        :param output: output destination.
        :param format: image file format
        """
        img = self.generate_captcha_image(chars)
        return img.save(output, format=format)


def get_choices(digit=True, lowercase=True, uppercase=True):
    choices = ""
    # 删除 0 O o 1 I l 变形后非常容易认错的
    digits = "23456789"
    lowercases = 'abcdefghijkmnpqrstuvwxyz'
    uppercases = 'ABCDEFGHJKLMNPQRSTUVWXYZ'
    # 移除变形后不易辨认的字符
    if digit:
        choices += digits
    if lowercase:
        choices += lowercases
    if uppercase:
        choices += uppercases
    return choices


def _gen_captcha(img_dir, n, choices, img_queue, is_remove=True):
    if is_remove:
        # delete dir
        if os.path.exists(img_dir):
            shutil.rmtree(img_dir)
    # create dir
    if not os.path.exists(img_dir):
        try:
            os.makedirs(img_dir)
        except OSError as exc:
            print("路径创建失败，请检查输入参数")
            exit(1)

    captcha = Captcha(width=80, height=32)
    print('generating %s captchas in %s' % (n, img_dir))
    candidate = tuple(itertools.permutations(choices, 4))
    for _ in range(n):
        chars = "".join(candidate[random.randint(0, len(candidate))])
        fn = os.path.join(img_dir, '%s_%s.png' % (chars, uuid.uuid4()))
        img_queue.put_nowait((fn, captcha.generate_captcha_image(chars)))
        # captcha.write(chars, fn)


def _write(img_queue, format, t1):
    count = 0
    while t1.is_alive():
        while not img_queue.empty():
            fn, img = img_queue.get_nowait()
            img.save(fn, format=format)
            count += 1
            if count % 1000 is 0:
                print(f"gen {count}")


# 生成文件列表
def _gen_list(path, n):
    img_list = os.listdir(path)          # 读取目录下文件列表
    random.shuffle(img_list)             # 随机打乱
    img_path = path + "/img_list.txt"
    label_path = path + "/label.txt"
    choices = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

    print('generating %s captchas list in %s' % (n, path))
    with open(img_path, "w") as f_img, open(label_path, "w") as f_label:
        for i in img_list[:n]:
            if i.split('.')[-1] == "png":
                chars = i.split('_', 2)[0]
                label = ' '.join([str(choices.index(x)) for x in chars])
                f_img.write(i + '\n')
                f_label.write(i + ' ' + label + '\n')


if __name__ == "__main__":

    info = """
            use：python3 gen_verification_code.py argv[1] argv[2] argv[3]
            argv[1]: img / list             |-> 
                    img 生成图片+列表文件；list 根据目录生成图片列表
            argv[2]: {img_path}(文件夹路径)  |-> 
                    如果argv[1]是img，则该目录为生成图片的路径；
                    如果argv[1]是list,则生成该目录下图片的列表文件，且该列表文件保存在该目录下
            argv[3]: {num}(数量，正整数)     |->
                    生成图片的数量，或者生成图片列表中的图片数量
                    如果在选择生成list时，该数量大于目录下总图片数，则生成的列表长度为总图片数，即num = min{argv[3], num of 目录下图片数}
                    
            示例:   生成图片： python3 gen_verification_code.py img ~/data/img 10000  
                        |->   在~/data/img文件夹下生成10000张验证码图片
                    生成列表： python3 gen_verification_code.py list ~/data/img 10000 
                        |->   在~/data/img文件夹下该目录下图片的一个列表文件，长度最多10000
           """
    if len(sys.argv) == 4:
        task = sys.argv[1]
        path = sys.argv[2]
        num = int(sys.argv[3])

        if task == "img":
            img_queue = Queue()
            t1 = None
            # 生成图片
            t1 = threading.Thread(target=_gen_captcha, args=(path, num, get_choices(), img_queue, True))
            # 存储图片
            t2 = threading.Thread(target=_write, args=(img_queue, "png", t1))
            # 开始线程
            t1.start()
            t2.start()
            # 等待图片生成完成
            while t2.is_alive():
                pass
            # 生成文件列表
            _gen_list(path, num)

        elif task == "list":
            # 生成文件列表
            _gen_list(path, num)

        else:
            print(info)

    else:
        print(info)
一个简单的脚本，该脚本主要是对字母进行了一些简单的变形和大小缩放，在加上一些随机的噪点和细线，同时用uuid来进行命名避免同字母组合覆盖，生成效果如下：
生成验证码
python生成简单的验证码

python生成简单的验证码

猜你喜欢