import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import edu.udo.cs.wvtool.main.WVTWordVector; public class KNN { private int k = 15; private int cLimit = 1; private double[] ClassSim = null; private static Map<Integer, String> indexmap = null; static { if (indexmap == null) { indexmap = new HashMap<Integer, String>(); indexmap.put(0, "计算机"); indexmap.put(1, "房地产"); /* indexmap.put(0, "汽车"); indexmap.put(1, "教育"); indexmap.put(2, "娱乐"); indexmap.put(3, "财经"); indexmap.put(4, "房产"); indexmap.put(5, "军事"); indexmap.put(6, "奥运"); indexmap.put(7, "时政"); indexmap.put(8, "体育"); indexmap.put(9, "科技"); */ } } public KNN() { } public List LazyLearning(WVTWordVector v, WVTWordVector[] vectors, int numClasses) { if (v == null || vectors == null) return null; System.out.println("number of documents : " + vectors.length); System.out.println("number of classes: " + numClasses); ClassSim = new double[numClasses]; for (int i = 0; i < numClasses; i++) { ClassSim[i] = 0; } k = (k < vectors.length)? k : vectors.length; double[] Sim = new double[vectors.length]; for (int i = 0; i < Sim.length; i++) { Sim[i] = 0; Map map1 = v.getWordMap(); Map map2 = vectors[i].getWordMap(); for (Iterator it = map1.keySet().iterator(); it.hasNext();) { String word1 = (String)it.next(); if (map2.containsKey(word1)) { double value1 = Double.valueOf(map1.get(word1).toString()); double value2 = Double.valueOf(map2.get(word1).toString()); Sim[i] += (value1 * value2); } } } for (int i = 0; i < k; i++) { for (int j = i + 1; j < Sim.length; j++) { if (Sim[j] > Sim[i]) { double dtemp = Sim[i]; Sim[i] = Sim[j]; Sim[j] = dtemp; WVTWordVector wv = vectors[i]; vectors[i] = vectors[j]; vectors[j] = wv; } } } double TotalSim = 0; for (int i = 0; i < k; i++) { WVTWordVector wv = vectors[i]; int numClass = wv.getDocumentInfo().getClassValue(); ClassSim[numClass] += Sim[i]; TotalSim += Sim[i]; } // output the first 3 class int[] index = new int[ClassSim.length]; for (int i = 0; i < ClassSim.length; i++) index[i] = i; for (int i = 0; i < cLimit; i++) { for (int j = i + 1; j < ClassSim.length; j++) { if (ClassSim[j] > ClassSim[i]) { double dtemp = ClassSim[i]; ClassSim[i] = ClassSim[j]; ClassSim[j] = dtemp; int itemp = index[i]; index[i] = index[j]; index[j] = itemp; } } } List<CategoryResult> result = new ArrayList<CategoryResult>(); for (int i = 0; i < cLimit; i++) { if (ClassSim[i] > 0) { CategoryResult cr = new CategoryResult(indexmap.get(index[i]).toString(), ClassSim[i] / TotalSim); result.add(cr); } } ///* for (int i = 0; i < index.length; i++){ System.out.println("cat:"+index[i] + ": " + ClassSim[i]); } for (int i = 0; i < k; i++){ WVTWordVector wv = vectors[i]; String id; //int cutIndex = wv.getDocumentInfo().getSourceName().lastIndexOf(File.separator); //if (cutIndex > 0) // id = wv.getDocumentInfo().getSourceName().substring(cutIndex + 1); //else id = wv.getDocumentInfo().getSourceName(); System.out.println("c:"+id + ": " + Sim[i]); } //*/ return result; } } class CategoryResult { private String CategoryName; private double similarity; public CategoryResult(String categoryname, double similarity) { this.CategoryName = categoryname; this.similarity = similarity; } public String getCategoryName() { return CategoryName; } public void setCategoryName(String categoryName) { CategoryName = categoryName; } public double getSimilarity() { return similarity; } public void setSimilarity(double similarity) { this.similarity = similarity; } }
KNN(转)
猜你喜欢
转载自strayly.iteye.com/blog/2317092
今日推荐
周排行