(Comparing tika and regular, I prefer jsoup) jsoup crawls iteye website

 jsoup effect

 

  

QQ news content grabbing regular expression  (regular)

http://knight-black-bob.iteye.com/blog/2312411

Comparing tika and regular, I prefer jsoup 

Regular is more difficult to write,,,,,,,

Below is the jar package download

 <dependency>

    <groupId>org.jsoup</groupId>

    <artifactId>jsoup</artifactId>

    <version>1.9.2</version>

</dependency>

 

 

  

IteyeItemEntity [
userName=wosyingjun,
userPicLink=http://www.iteye.com/upload/logo/user/1184026/fa9a8493-f9a7-3e3b-9630-12ad8f65d277-thumb.png?1467599214,
userBlogLink=http://wosyingjun.iteye.com,
title=Recommend several sample projects related to Java backend written by yourself,
content=Recommend several self-written sample projects Here are some self-written sample projects, mainly using the SSM (Spring+SpringMVC+Mybatis) framework, and the distributed architecture (dubbo+zookeeper). The advantage of the example project is that it is simple and easy to understand. When building a new project, it can be directly used as a scaffold to facilitate rapid development. In addition, the knowledge points involved in the project and those that may be involved in the future will be continuously improved. The three projects have developed from each other and are still being continuously improved. They are as follows: ...,
articleLink=http://wosyingjun.iteye.com/blog/2312553,
seeNum=2871 people viewed,
goodNum=7 top,
badNum=0 step,
insertTime=2016-07-21 09:04
]



//Connection connection = Jsoup.connect(url);  
			//Document document = connection.get();
			Document document = Jsoup.parse(data);
			Elements indexmain = document.select(".blog");
			 Iterator<Element> blogIter = indexmain.iterator();  
			 IteyeItemEntity item = null;
             while (blogIter.hasNext()) {
            	 Element element = blogIter.next();
            	 
            	 String userName = element.select(".content .blog_info a[title]").text();
            	 String userPicLink = element.select(".content .logo img").attr("src");
            	 String userBlogLink = element.select(".content .blog_info a").attr("href");
            	 String title = element.select(".content h3 a[title]").text();
            	 String content = element.select(".content  div").iterator().next().text();
            	 String articleLink =    element.select(".content h3 a").last().attr("href");
            	 String seeNum = element.select(".content .blog_info .view").text();
            	 String goodNum = element.select(".content .blog_info .digged .digg").text();
            	 String badNum = element.select(".content .blog_info .digged .bury").text();
            	 String insertTime = element.select(".content .blog_info .date").text();
            	 item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime);
            	 list.add(item);

 


 

 

package com.couriousby.iteyedemo.util;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import android.util.Log;
import android.widget.Toast;

import com.couriousby.iteyedemo.MyApplication;
import com.couriousby.iteyedemo.entity.IteyeItemEntity;

/**
 * @author baoyou E-mail:[email protected]
 * @version 2016-7-22 10:58:49 AM
 *
 * desc: ...
 */
public class IteyeJsoupPerformer {
	
	public static List<IteyeItemEntity> getListIteyeEntity(String data){
		List<IteyeItemEntity> list = new ArrayList<IteyeItemEntity>();
		try{
			//Connection connection = Jsoup.connect(url);  
			//Document document = connection.get();
			Document document = Jsoup.parse(data);
			Elements indexmain = document.select(".blog");
			 Iterator<Element> blogIter = indexmain.iterator();  
			 IteyeItemEntity item = null;
             while (blogIter.hasNext()) {
            	 Element element = blogIter.next();
            	 
            	 String userName = element.select(".content .blog_info a[title]").text();
            	 String userPicLink = element.select(".content .logo img").attr("src");
            	 String userBlogLink = element.select(".content .blog_info a").attr("href");
            	 String title = element.select(".content h3 a[title]").text();
            	 String content = element.select(".content  div").iterator().next().text();
            	 String articleLink =    element.select(".content h3 a").last().attr("href");
            	 String seeNum = element.select(".content .blog_info .view").text();
            	 String goodNum = element.select(".content .blog_info .digged .digg").text();
            	 String badNum = element.select(".content .blog_info .digged .bury").text();
            	 String insertTime = element.select(".content .blog_info .date").text();
            	 item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime);
            	 list.add(item);
             }
			
			
		}catch(Exception e){
			e.printStackTrace ();
		}
		return list;
	}


 

package com.couriousby.iteyedemo.util;

import java.util.ArrayList;
import java.util.List;

import com.couriousby.iteyedemo.R;
import com.couriousby.iteyedemo.entity.GridEntity;

/**
 * @author baoyou E-mail:[email protected]
 * @version 2016-7-22 1:10:55 PM
 *
 * desc: ...
 */
public class Constants {

	final static String URL_BASE = "http://www.iteye.com/blogs";
	
	public static String getIteyeCategory(int category ) {
		switch (category) {
		case 0:
			return "";
		case 1:
			return "/category/mobile";
		case 2:
			return "/category/web";
		case 3:
			return "/category/architecture";
		case 4:
			return "/category/language";
		case 5:
			return "/category/internet";
		case 6:
			return "/category/opensource";
		case 7:
			return "/category/os";
		case 8:
			return "/category/database";
		case 9:
			return "/category/develop";
		case 10:
			return "/category/industry";
		case 11:
			return "/category/other";

		default:
			return "";
		}
	}
	
	public static String getIteyeUrl(int category,int page) {
		return URL_BASE + getIteyeCategory(category)+"?page="+page;
	}
	
	
	
	public static List<GridEntity> getGridItem() {
		List<GridEntity> list = new ArrayList<GridEntity>();
		list.add(new GridEntity(0, "All categories", R.drawable.iteye_all, 0));
		list.add(new GridEntity(1, "Mobile Development", R.drawable.iteeye_mobile, 1));
		list.add(new GridEntity(2, "web前端", R.drawable.iteye_web, 2));
		list.add(new GridEntity(3, "Enterprise Architecture", R.drawable.iteye_architecture, 3));
		list.add(new GridEntity(4, "Programming Language", R.drawable.iteye_language, 4));
		list.add(new GridEntity(5, "Internet", R.drawable.iteye_internet, 5));
		list.add(new GridEntity(6, "Open Source Software", R.drawable.iteye_opensource, 6));
		list.add(new GridEntity(7, "operating system", R.drawable.iteeye_os, 7));
		list.add(new GridEntity(8, "Database", R.drawable.iteeye_database, 8));
		list.add(new GridEntity(9, "R&D Management", R.drawable.iteye_develop, 9));
		list.add(new GridEntity(10, "Industry Application", R.drawable.iteye_industry, 10));
		list.add(new GridEntity(11, "非技术", R.drawable.iteye_other, 11));

		return list;
	}
}

 

 

 

 

package com.couriousby.iteyedemo.activity;

import java.util.ArrayList;
import java.util.List;

import android.app.Activity;
import android.content.Context;
import android.content.Intent;
import android.graphics.Color;
import android.graphics.drawable.BitmapDrawable;
import android.graphics.drawable.ColorDrawable;
import android.os.Bundle;
import android.view.View;
import android.view.View.OnClickListener;
import android.widget.AdapterView;
import android.widget.AdapterView.OnItemClickListener;
import android.widget.AdapterView.OnItemSelectedListener;
import android.widget.LinearLayout.LayoutParams;
import android.widget.GridView;
import android.widget.PopupWindow;
import android.widget.TextView;

import com.couriousby.iteyedemo.MyApplication;
import com.couriousby.iteyedemo.R;
import com.couriousby.iteyedemo.adapter.IteyeAdapter;
import com.couriousby.iteyedemo.adapter.IteyePopwindowGridListAdapter;
import com.couriousby.iteyedemo.entity.GridEntity;
import com.couriousby.iteyedemo.entity.IteyeItemEntity;
import com.couriousby.iteyedemo.listener.OnIteyeGridViewItemclickListener;
import com.couriousby.iteyedemo.quote.xlistview.MsgListView;
import com.couriousby.iteyedemo.quote.xlistview.MsgListView.IXListViewListener;
import com.couriousby.iteyedemo.request.event.IteyeStringHttpEvent;
import com.couriousby.iteyedemo.request.event.base.RequestEvent;
import com.couriousby.iteyedemo.request.http.IteyeHttpRequest;
import com.couriousby.iteyedemo.util.Constants;
import com.couriousby.iteyedemo.util.IteyeJsoupPerformer;

import de.greenrobot.event.EventBus;

public class IteyeMainActivity extends Activity  implements IXListViewListener ,OnItemClickListener,OnClickListener,OnIteyeGridViewItemclickListener{
	
	
	final static String ITEYE_DETAIL_URL = "iteye_detail_url";
	
	private static int start = 1;
	private static int category = 0;
	
	private Context mContext;
	
	
	private MsgListView mListView;
	private List<IteyeItemEntity> mDataList;
	private IteyeAdapter mAdapter;
	
	
	 private TextView                   mTopChooseBar;
	 private PopupWindow                mPopupWindow;
	 
	 
	 private IteyePopwindowGridListAdapter gridAdapter;
	 private GridView gridView;
	 private List<GridEntity> mGridList;
	 
	@Override
	protected void onCreate(Bundle savedInstanceState) {
		super.onCreate (savedInstanceState);
		setContentView(R.layout.iteye_list);
		
		this.mContext = IteyeMainActivity.this;
		
		initUtils ();
		initView( );
		initListeners();
		
		EventBus.getDefault().register( this );
		start =1;
		category=0;
		IteyeHttpRequest.getIteyeDate(category, start);
	}
	
	private void initUtils() {  
		mDataList =  new ArrayList<IteyeItemEntity>();
		mAdapter = new IteyeAdapter(mContext);
		mAdapter.setmDataList(mDataList);
		
		mGridList = Constants.getGridItem();
		gridAdapter = new IteyePopwindowGridListAdapter(mContext);
		gridAdapter.setOnIteyeGridViewItemclickListener(this);
		gridAdapter.setmList(mGridList);
	}
	private void initView() {
		mListView = (MsgListView) this.findViewById(R.id.qq_news_list);   
		mListView.setAdapter(mAdapter);
		
		
        
		
		View baseView = View.inflate( this, R.layout.iteye_topbar, null );
		
		mTopChooseBar = (TextView) this.findViewById( R.id.tv_iteye_topbar);
		mPopupWindow = new PopupWindow(baseView ,LayoutParams.MATCH_PARENT,
                LayoutParams.WRAP_CONTENT, false );
        mPopupWindow.setBackgroundDrawable( new BitmapDrawable() );
        mPopupWindow.setOutsideTouchable( true );
        mPopupWindow.setFocusable( true );
		
		gridView = (GridView) baseView.findViewById(R.id.iteye_gr_mlist);
		gridView.setAdapter(gridAdapter);
	}
	private void initListeners() {  

	    mTopChooseBar.setOnClickListener(this);
	    
		mListView.setPullLoadEnable(true);
		mListView.setPullRefreshEnable(true);
		mListView.setXListViewListener(this);
		mListView.setAdapter(mAdapter);
		mListView.setOnItemClickListener(this);
		 
		gridView.setSelector(new ColorDrawable(Color.TRANSPARENT));
	}

	@Override
    public void onDestroy() {
        EventBus.getDefault().unregister( this );
        super.onDestroy ();
    }
	public void onEventMainThread(RequestEvent requestEvent){
			
			if(requestEvent instanceof IteyeStringHttpEvent){
				IteyeStringHttpEvent event = (IteyeStringHttpEvent) requestEvent;
				switch(event.status){
				case HTTP_ERROR:
					mListView.stopRefresh();
					mListView.stopLoadMore();
					mListView.setPullLoadEnable(false);
					break;
				case HTTP_START:
				{
					mListView.stopRefresh();
					mListView.stopLoadMore();
					String result = event.data;
					mAdapter.clearMDataList();
					 List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result);
					mAdapter.setmDataList(list);
					mAdapter.notifyDataSetChanged();
					 
					   
				}
				break;
				case HTTP_SUCCESS: {   
					mListView.stopRefresh();
					mListView.stopLoadMore();
					String result = event.data;
					mAdapter.clearMDataList();
					 List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result);
					mAdapter.addMDataList(list);
					mAdapter.notifyDataSetChanged();
				}
				  break;
				default:
					break;
				}
			}else{
				
			}
	}

	@Override
	public void onItemClick(AdapterView<?> parent, View v, int position, long id) {
		IteyeItemEntity  item = mAdapter.getItem(position - 1 );
		 if (item != null) {
				Intent msgIntent  = new Intent();   
				Bundle bundle = new Bundle(); // The bottom layer of Bundle is a HashMap<String, Object
	            bundle.putString(IteyeMainActivity.ITEYE_DETAIL_URL, item.getArticleLink() );
	            msgIntent.putExtra("bundle", bundle);
	            msgIntent.setClass(MyApplication.newInstance(), IteyeDetailActivity.class);
	            startActivityForResult (msgIntent, 1000);
			}
	}

	@Override
	public void onRefresh() {
		start =1;
		mListView.setPullLoadEnable(true);
		mListView.setPullRefreshEnable(true);
		IteyeHttpRequest.getIteyeDate(category, start);
	}


	@Override
	public void onLoadMore() {
		start += 1;  
		IteyeHttpRequest.getIteyeDate(category, start);
	}

	@Override
	public void onClick(View view) {
		switch (view.getId()) {
		case R.id.tv_iteye_topbar:
			if (mPopupWindow.isShowing()) {
				mPopupWindow.dismiss();
			} else {
				mPopupWindow.showAsDropDown(view);
			}
			break;
		default:
			break;
		}
	}
 

	@Override
	public void OnIteyeGridViewItemclick(GridEntity item) {
		mTopChooseBar.setText(item.getName() );
        mPopupWindow.dismiss();
        start = 1;
        category = item.getId();
        IteyeHttpRequest.getIteyeDate(category, start);
	}
	
}

 

 

 

package com.couriousby.iteyedemo.request.http;

import com.android.volley.Response.ErrorListener;
import com.android.volley.Response.Listener;
import com.android.volley.VolleyError;
import com.android.volley.toolbox.StringRequest;
import com.android.volley.toolbox.Volley;
import com.couriousby.iteyedemo.MyApplication;
import com.couriousby.iteyedemo.request.manager.IteyeHttpManager;
import com.couriousby.iteyedemo.util.Constants;

public class IteyeHttpRequest {

	
	public  static void getIteyeDate(int catgory ,final int page){
		String url =  Constants.getIteyeUrl(catgory,page);
		StringRequest request = new StringRequest(url, new Listener<String>() {
 
			@Override
			public void onResponse(String response) {
				if (page ==1 )
					IteyeHttpManager.getIteyeByPageFirst(response);
				else
					IteyeHttpManager.getIteyeByPage(response);	
			}
		}, new ErrorListener() { 
			@Override
			public void onErrorResponse(VolleyError error) { 
				IteyeHttpManager.getIteyeByPageError();
			}
		})
	/*	{
			@Override
			protected Response<String> parseNetworkResponse(
					NetworkResponse response) {
				String str = null;
		        try {
		            str = new String(response.data,"utf-8");
		        } catch (UnsupportedEncodingException e) { 
		            e.printStackTrace();
		        }
		        return Response.success(str, HttpHeaderParser.parseCacheHeaders(response));
			}
		}*/
		;
		Volley.newRequestQueue(MyApplication.newInstance()).add(request); 
	}
	
}

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

捐助开发者

在兴趣的驱动下,写一个免费的东西,有欣喜,也还有汗水,希望你喜欢我的作品,同时也能支持一下。 当然,有钱捧个钱场(右上角的爱心标志,支持支付宝和PayPal捐助),没钱捧个人场,谢谢各位。



 
 
 谢谢您的赞助,我会做的更好!

 

 

 

 

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=326653395&siteId=291194637