1 主程序
# -*- coding: utf-8 -*-
import cv2
import time
import numpy as np
import pandas as pd
# 1 抓取2000张训练图片放在MIS文件夹,200张测试图片放在TEST文件夹
'''
url='http://mis.sgu.edu.cn/WaterMark.ashx'
for i in range(2000):
urllib.request.urlretrieve(url,'01_MIS/%s.jpg'%i)
if i%10==1:
print(i)
for i in range(200):
urllib.request.urlretrieve(url,'02_TEST/%s.jpg'%i)
if i%10==1:
print(i)
# 2 因为原始图片为gif格式,而opencv读取不了,
# 所以先用PIL把所有训练的图片转化为PNG格式,保存在03_TRAIN_PNG文件夹里面
from PIL import Image
for i in range(2000):
try:
im=Image.open('01_MIS/%s.jpg'%i)
im.save('03_TRAIN_PNG/%s.png'%i)
print(i)
except:
print('---------------------读取出错---------------------',i)
continue
# 3 opencv图片进行预处理 放至文件夹 04_SLIPE
JPG=0
num=0
for i in range(2000):
try:
img = cv2.imread('03_TRAIN_PNG/%d.png'%i,0)
# 图片进行二值化
ret1,img = cv2.threshold(img,150,255,1)
# 对验证码进行分割
img1=img[4:,6:14]
img2=img[4:,15:23]
img3=img[4:,24:32]
img4=img[4:,33:41]
img5=img[4:,42:50]
# 保存 后 去手工区分 0,1,2.。。。9
cv2.imwrite('04_SLIPE/%d.png'%num,img1)
num=num+1
cv2.imwrite('04_SLIPE/%d.png'%num,img2)
num=num+1
cv2.imwrite('04_SLIPE/%d.png'%num,img3)
num=num+1
cv2.imwrite('04_SLIPE/%d.png'%num,img4)
num=num+1
cv2.imwrite('04_SLIPE/%d.png'%num,img5)
num=num+1
# 打印进度
if i%10==1:
print('slipe %s piture...'%i)
except:
print('--------------ERROR-------------',i,'num=',num)
continue
# print(img1.shape,img2.shape,img3.shape,img4.shape,img5.shape)
# 5 手工把不同数字放进相应文件夹
# 1到9每个数字170个样本
# 6 读取图片转化为数字
H=0
txt_0=open('libsvm_train.txt','w') # w 只写 r 只读
# 循环每个文件夹
for file in range(10):
#循环读取每个文件夹下的图片,因为图片名字最高的图片2300多
# 训练部分
for i in range(2000):
try:
img = cv2.imread('05_NUM_0_9/%s/%s.png'%(file,i),0)#读取为灰度图片 3维读取为2维
# 把最白的255 转为数值1
img[img[:,:]>0] =1
df_img=pd.DataFrame(img)
txt_0.write(str(file))
# print(df_img.sum())
for col in range(8):
txt_0.write(' %s:%s'%(col+1, str(df_img.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
txt_0.write(' %s:%s'%(cum+9, str(df_img.sum(axis=1)[cum])) )
txt_0.write('\n')
H=H+1
if H%20==1:
print('piture %s '%H)
except:
continue
print('正在处理数字 %s .....'%file)
txt_0.close()
# 读取
txt_0=open('libsvm_train.txt','r')
print(txt_0.read())
# 测试集
txt_2=open('libsvm_test.txt','w') # w 只写 r 只读
# 循环每个文件夹
for file in range(10):
#循环读取每个文件夹下的图片,因为图片名字最高的图片2300多
# 训练部分
for i in range(2000,2500):
try:
img = cv2.imread('05_NUM_0_9/%s/%s.png'%(file,i),0)#读取为灰度图片 3维读取为2维
# 把最白的255 转为数值1
img[img[:,:]>0] =1
df_img=pd.DataFrame(img)
txt_2.write(str(file))
# print(df_img.sum())
for col in range(8):
txt_2.write(' %s:%s'%(col+1, str(df_img.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
txt_2.write(' %s:%s'%(cum+9, str(df_img.sum(axis=1)[cum])) )
txt_2.write('\n')
H=H+1
if H%20==1:
print('piture %s '%H)
except:
continue
print('正在处理数字 %s .....'%file)
txt_2.close()
'''
# SVM
from svmutil import *
'''
# 训练SVM模型
y,x=svm_read_problem('libsvm_train.txt')
model=svm_train(y,x,'-c 4')
# model=svm_train(y[:1200],x[:1200],'-c 4')
# 保存SVM模型
svm_save_model('libsvm_model.txt', model)
'''
# 读取SVM模型
model=svm_load_model('libsvm_model.txt')
test_y,test_x=svm_read_problem('libsvm_test.txt')
p_label,p_acc,p_val=svm_predict(test_y,test_x,model)
print(p_label)
2 读取模型,从文件夹里读取图片进行识别
# -*- coding: utf-8 -*-
import cv2
import time
import numpy as np
import pandas as pd
from PIL import Image
import requests
from bs4 import BeautifulSoup
import urllib.request
from svmutil import *
model=svm_load_model('libsvm_model.txt')
url='http://mis.sgu.edu.cn/WaterMark.ashx'
for PIC in range(1000):
svm_train_100=open('svm_train_100.txt','w')
# 从文件读取识别
img = cv2.imread('03_TRAIN_PNG/%d.png'%PIC,0)
# 二值化
ret1,img = cv2.threshold(img,150,255,1)
# 把最白的255 转为数值1
img[img[:,:]>0] =1
# 切割图片
img1=img[4:,6:14]
img2=img[4:,15:23]
img3=img[4:,24:32]
img4=img[4:,33:41]
img5=img[4:,42:50]
# 读取为数值
df_img1=pd.DataFrame(img1)
df_img2=pd.DataFrame(img2)
df_img3=pd.DataFrame(img3)
df_img4=pd.DataFrame(img4)
df_img5=pd.DataFrame(img5)
# 1
svm_train_100.write('0')
# print(df_img.sum())
for col in range(8):
svm_train_100.write(' %s:%s'%(col+1, str(df_img1.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
svm_train_100.write(' %s:%s'%(cum+9, str(df_img1.sum(axis=1)[cum])) )
svm_train_100.write('\n')
# 2
svm_train_100.write('0')
# print(df_img.sum())
for col in range(8):
svm_train_100.write(' %s:%s'%(col+1, str(df_img2.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
svm_train_100.write(' %s:%s'%(cum+9, str(df_img2.sum(axis=1)[cum])) )
svm_train_100.write('\n')
# 3
svm_train_100.write('0')
# print(df_img.sum())
for col in range(8):
svm_train_100.write(' %s:%s'%(col+1, str(df_img3.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
svm_train_100.write(' %s:%s'%(cum+9, str(df_img3.sum(axis=1)[cum])) )
svm_train_100.write('\n')
# 4
svm_train_100.write('0')
# print(df_img.sum())
for col in range(8):
svm_train_100.write(' %s:%s'%(col+1, str(df_img4.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
svm_train_100.write(' %s:%s'%(cum+9, str(df_img4.sum(axis=1)[cum])) )
svm_train_100.write('\n')
# 5
svm_train_100.write('0')
# print(df_img.sum())
for col in range(8):
svm_train_100.write(' %s:%s'%(col+1, str(df_img5.sum()[col])) )
# print(df_img.sum(axis=1))
for cum in range(14):
svm_train_100.write(' %s:%s'%(cum+9, str(df_img5.sum(axis=1)[cum])) )
svm_train_100.write('\n')
svm_train_100.close()
test_y,test_x=svm_read_problem('svm_train_100.txt')
p_label,p_acc,p_val=svm_predict(test_y,test_x,model)
print(p_label)
NAME=''
for i in range(5):
NAME=NAME+str(int(p_label[i]))
print(NAME)
img = cv2.imread('03_TRAIN_PNG/%d.png'%PIC)
cv2.imwrite('Result/%s.png'%NAME,img)