import tensorflow as tf import numpy as np import string,random n=10 #街道名称 street_names=['abbey','baker','canal','donner','elm'] #街道类型 street_types=['rd','st','ln','pass','ave'] #邮编 rand_zips=[ random.randint(65000,65999) for i in range(5) ] #门牌号码 numbers=[random.randint(1,9999) for i in range(n)] #街道名称 streets=[random.choice(street_names) for i in range(n)] #街道类型 street_stuffs=[random.choice(street_types) for i in range(n)] #邮编 zips=[random.choice(rand_zips) for i in range(n)] # '4905 baker rd', '6417 canal st' full_streets=[str(x)+' '+y+' '+z for x,y,z in zip(numbers,streets,street_stuffs)] #['8758 elm st', 65469], ['3237 donner rd', 65479] shape=(10,2) ref_data=[list(x) for x in zip(full_streets,zips)] #生成错误随机数据 def create_type(s): rand_ind=random.choice(range(len(s))) s=list(s) s[rand_ind]=random.choice(string.ascii_lowercase) s=''.join(s) return s #错误街道名 type_streets=[ create_type(x) for x in streets] type_full_streets=[str(x)+' '+y+' '+z for x,y,z in zip(numbers,type_streets,street_stuffs)] #['8758 efm st', 65469], ['3237 donnzr rd', 65479] shape=(10,2) test_data=[list(x) for x in zip(type_full_streets,zips)] #声明变量 占位符 sess=tf.Session() test_address=tf.sparse_placeholder(dtype=tf.string) test_zip=tf.placeholder(shape=[None,1],dtype=tf.float32) ref_address=tf.sparse_placeholder(dtype=tf.string) ref_zip=tf.placeholder(shape=[None,n],dtype=tf.float32) #邮编距离 shape=(1,10) target=(0,无穷尽) 当为0时 相似度为一 zip_dist=tf.square(tf.subtract(ref_zip,test_zip)) #地址距离 返回(n,1) (错误的个数 归一化) address_dist=tf.edit_distance(test_address,ref_address,normalize=True) #计算相似度 (0-1) zip_max=tf.gather(tf.squeeze(zip_dist),tf.argmax(zip_dist,1)) zip_min=tf.gather(tf.squeeze(zip_dist),tf.argmin(zip_dist,1)) #(1,10) zip_sim=tf.div(tf.subtract(zip_max,zip_dist),tf.subtract(zip_max,zip_min) ) #(10,1) address_sim=tf.subtract(1.,address_dist) #加权平均 address_weight=0.5 zip_weight=1-address_weight #shape=(1,10) weight_sim=tf.add(tf.transpose(tf.multiply(address_weight,address_sim)),tf.multiply(zip_weight,zip_sim) ) #获取最大索引 top_match_index=tf.arg_max(weight_sim,1) #地址转换成稀疏向量 def sparese_word_vec(word): #['abbey', 'baker'] num_words=len(word) indcies=[[xi,0,yi] for xi,x in enumerate(word) for yi,y in enumerate(x) ] chars=list(''.join(word)) return (tf.SparseTensorValue(indcies,chars,[num_words,1,1])) #获取标准地址 referece_address=np.array([x[0] for x in ref_data]) #(1,10) referece_zips=np.array( [[x[1] for x in ref_data]]) #转换 sparse_ref_set=sparese_word_vec(referece_address) for i in range(n): test_address_entry=test_data[i][0] test_zip_entry=[[test_data[i][1]]] test_address_repeat=[test_address_entry]*n sparse_test_set=sparese_word_vec(test_address_repeat) feeddict={test_address:sparse_test_set,test_zip:test_zip_entry,ref_address:sparse_ref_set,ref_zip:referece_zips } #获取最大相似度索引 best_match=sess.run(top_match_index,feed_dict=feeddict) best_street=referece_address[best_match] [best_zip]=referece_zips[0][best_match] [[test_zip_]]=test_zip_entry print("Error address : " +str(test_address_entry) +" "+str(test_zip_)) print("Match: "+str(best_street)+" "+str(best_zip))
TensorFlow(七) 地址匹配
猜你喜欢
转载自www.cnblogs.com/x0216u/p/9237761.html
今日推荐
周排行