DIH虽然有不写程序就可进行数据导入的优势,但要是复杂的数据关系,比如数据库中需要进行大量的jion之类的查询,用DIH反而显得麻烦些,不仅配置繁琐难懂,而且效率也不高。另一种方式是使用solrj 。solrj是solr的java客户端,可以通过solrj做索引,查询等一系列操作,建议复制的数据导入使用solrj,更能提高效率。
依赖的包都可以在solr的发行包中以下的几个文件夹中找到
/dist:apache-solr-solrj-*.jar
/dist/solrj-lib
commons-codec-1.3.jar
commons-httpclient-3.1.jar
commons-io-1.4.jar
jcl-over-slf4j-1.5.5.jar
slf4j-api-1.5.5.jar
扫描二维码关注公众号,回复:
1226015 查看本文章
/lib
slf4j-jdk14-1.5.5.jar
下面是建立索引的简单例子,从数据库读入数据,然后通过solrj发送给solr建立索引
import java.io.IOException; import java.net.MalformedURLException; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; import java.util.ArrayList; import java.util.Collection; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.common.SolrInputDocument; public class Test { private static int fetchSize = 1000; private static String url = "http://localhost:8983/solr/core1/"; private static CommonsHttpSolrServer solrCore; public Test() throws MalformedURLException { solrCore = new CommonsHttpSolrServer(url); } /** * Takes an SQL ResultSet and adds the documents to solr. Does it in batches * of fetchSize. * * @param rs * A ResultSet from the database. * @return The number of documents added to solr. * @throws SQLException * @throws SolrServerException * @throws IOException */ public long addResultSet(ResultSet rs) throws SQLException, SolrServerException, IOException { long count = 0; int innerCount = 0; Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(); ResultSetMetaData rsm = rs.getMetaData(); int numColumns = rsm.getColumnCount(); String[] colNames = new String[numColumns + 1]; /** * JDBC numbers the columns starting at 1, so the normal java convention * of starting at zero won't work. */ for (int i = 1; i < (numColumns + 1); i++) { colNames[i] = rsm.getColumnName(i); /** * If there are fields that you want to handle manually, check for * them here and change that entry in colNames to null. This will * cause the loop in the next section to skip that database column. */ // //Example: // if (rsm.getColumnName(i) == "db_id") // { // colNames[i] = null; // } } while (rs.next()) { count++; innerCount++; SolrInputDocument doc = new SolrInputDocument(); /** * At this point, take care of manual document field assignments for * which you previously assigned the colNames entry to null. */ // //Example: // doc.addField("solr_db_id", rs.getLong("db_id")); for (int j = 1; j < (numColumns + 1); j++) { if (colNames[j] != null) { Object f; switch (rsm.getColumnType(j)) { case Types.BIGINT: { f = rs.getLong(j); break; } case Types.INTEGER: { f = rs.getInt(j); break; } case Types.DATE: { f = rs.getDate(j); break; } case Types.FLOAT: { f = rs.getFloat(j); break; } case Types.DOUBLE: { f = rs.getDouble(j); break; } case Types.TIME: { f = rs.getDate(j); break; } case Types.BOOLEAN: { f = rs.getBoolean(j); break; } default: { f = rs.getString(j); } } doc.addField(colNames[j], f); } } docs.add(doc); /** * When we reach fetchSize, index the documents and reset the inner * counter. */ if (innerCount == fetchSize) { solrCore.add(docs); docs.clear(); innerCount = 0; } } /** * If the outer loop ended before the inner loop reset, index the * remaining documents. */ if (innerCount != 0) { solrCore.add(docs); } return count; } }