from skimage.feature import hog from sklearn.externals import joblib import xml.dom.minidom as xdm import numpy as np from PIL import Image import cv2 import os import time from config import *
train_xml_filePath = './sig-pos/sig-train-pos-xml/' # xml path train_pos_restPath = './sig-pos/sig-train-pos-rest/' # save path
defgetBox(object): groupId = str(object.getElementsByTagName("name")[0].firstChild.data) # 细胞类别 bndbox = object.getElementsByTagName("bndbox")[0] xmin = int(bndbox.childNodes[1].firstChild.data) ymin = int(bndbox.childNodes[3].firstChild.data) xmax = int(bndbox.childNodes[5].firstChild.data) ymax = int(bndbox.childNodes[7].firstChild.data) box = (xmin,ymin,xmax,ymax) return box defsave_image_pos(filePath): Data = [] num = 0 for childDir in os.listdir(filePath): f_im = os.path.join(filePath, childDir) f_xml = os.path.join(train_xml_filePath, '%s.xml' %childDir.split('.')[0]) dom = xdm.parse(f_xml) root = dom.documentElement objects = root.getElementsByTagName("object") image = Image.open(f_im) # open the image for object in objects: box = getBox(object) region = image.crop(box) # 分割图片 data = np.asarray(region) # 数据转换 data = cv2.resize(data,save_image_size,interpolation=cv2.INTER_CUBIC) # 插值方法 save_name = os.path.join(train_pos_restPath,"%s.jpg" %str(num)) cv2.imwrite(save_name,data) num += 1 print("%d processing: %s" %(num,childDir))
if __name__ == "__main__": save_image_pos("./sig-pos/sig-train-pos") # 正样本提取了大概4500张左右(100,100)尺寸的患病细胞图片
defsave_image_neg2(filePath):# 平均切割 num = 0 pic_num = 0 crop_width,crop_height = crop_image_size for childDir in tqdm(os.listdir(filePath)): f_im = os.path.join(filePath, childDir) image = Image.open(f_im) width,height = image.size for x in range(0, width-save_image_size[0],slid_step_size2[0]): for y in range(0, height-save_image_size[1],slid_step_size2[1]): box = (x,y,x+save_image_size[0],y+save_image_size[1]) region = image.crop(box) data = np.asarray(region) data = cv2.resize(data,save_image_size,interpolation=cv2.INTER_CUBIC) save_name = os.path.join(train_neg_restPath,"%s.jpg" %str(num)) cv2.imwrite(save_name,data) num += 1 if num > 20000: return pic_num += 1
2 HOG特征提取
HOG,也即方向梯度直方图(Histogram of Oriented Gradient),采用了统计的方式(直方图)进行提取. 其基本思路是将图像局部的梯度统计特征拼接起来作为总特征. 局部特征在这里指的是将图像划分为多个Block, 每个Block内的特征进行联合以形成最终的特征
fds = [] labels = [] # Load the positive features for feat_path in glob.glob(os.path.join(pos_feat_ph,"*.feat")): fd = joblib.load(feat_path) fds.append(fd) labels.append(1) print("Pos features read over!") # Load the negative features for feat_path in glob.glob(os.path.join(neg_feat_ph,"*.feat")): fd = joblib.load(feat_path) fds.append(fd) labels.append(0) print("Neg features read over!") if clf_type is"LIN_SVM": clf = LinearSVC(max_iter = 1000) print("Training a Linear SVM Classifier") clf.fit(fds, labels) # If feature directories don't exist, create them ifnot os.path.isdir(os.path.split(model_path)[0]): os.makedirs(os.path.split(model_path)[0]) joblib.dump(clf, model_path+model_name) print("Classifier saved to {}".format(model_path))