(比较 tika 和正则 ,我更喜欢jsoup ) jsoup 抓取 iteye 网站
jsoup 效果
qq新闻 内容抓取 正则表达 (正则)
http://knight-black-bob.iteye.com/blog/2312411
比较 tika 和正则 ,我更喜欢jsoup
正则 比较难写 ,, ,,,,
下面有jar 包下载
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.9.2</version>
</dependency>
IteyeItemEntity [
userName=wosyingjun,
userPicLink=http://www.iteye.com/upload/logo/user/1184026/fa9a8493-f9a7-3e3b-9630-12ad8f65d277-thumb.png?1467599214,
userBlogLink=http://wosyingjun.iteye.com,
title=推荐几个自己写的Java后端相关的范例项目,
content=推荐几个自己写的范例项目 这里推荐几个自己写的范例项目,主要采用SSM(Spring+SpringMVC+Mybatis)框架,分布式架构采用的是(dubbo+zookeeper)。范例项目的好处是简单易懂,在架构一个新的项目的时候可以直接当成脚手架来用,方便快速开发,另外项目中涉及到以及未来可能涉及到的知识点都会不断完善。 三个项目是互相发展而来的,目前仍在不断完善中,依次为: ...,
articleLink=http://wosyingjun.iteye.com/blog/2312553,
seeNum=有2871人浏览,
goodNum=7顶,
badNum=0踩,
insertTime=2016-07-21 09:04
]
//Connection connection = Jsoup.connect(url);
//Document document = connection.get();
Document document = Jsoup.parse(data);
Elements indexmain = document.select(".blog");
Iterator<Element> blogIter = indexmain.iterator();
IteyeItemEntity item = null;
while (blogIter.hasNext()) {
Element element = blogIter.next();
String userName = element.select(".content .blog_info a[title]").text();
String userPicLink = element.select(".content .logo img").attr("src");
String userBlogLink = element.select(".content .blog_info a").attr("href");
String title = element.select(".content h3 a[title]").text();
String content = element.select(".content div").iterator().next().text();
String articleLink = element.select(".content h3 a").last().attr("href");
String seeNum = element.select(".content .blog_info .view").text();
String goodNum = element.select(".content .blog_info .digged .digg").text();
String badNum = element.select(".content .blog_info .digged .bury").text();
String insertTime = element.select(".content .blog_info .date").text();
item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime);
list.add(item);
package com.couriousby.iteyedemo.util;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import android.util.Log;
import android.widget.Toast;
import com.couriousby.iteyedemo.MyApplication;
import com.couriousby.iteyedemo.entity.IteyeItemEntity;
/**
* @author baoyou E-mail:[email protected]
* @version 2016-7-22 上午10:58:49
*
* desc: ...
*/
public class IteyeJsoupPerformer {
public static List<IteyeItemEntity> getListIteyeEntity(String data){
List<IteyeItemEntity> list = new ArrayList<IteyeItemEntity>();
try{
//Connection connection = Jsoup.connect(url);
//Document document = connection.get();
Document document = Jsoup.parse(data);
Elements indexmain = document.select(".blog");
Iterator<Element> blogIter = indexmain.iterator();
IteyeItemEntity item = null;
while (blogIter.hasNext()) {
Element element = blogIter.next();
String userName = element.select(".content .blog_info a[title]").text();
String userPicLink = element.select(".content .logo img").attr("src");
String userBlogLink = element.select(".content .blog_info a").attr("href");
String title = element.select(".content h3 a[title]").text();
String content = element.select(".content div").iterator().next().text();
String articleLink = element.select(".content h3 a").last().attr("href");
String seeNum = element.select(".content .blog_info .view").text();
String goodNum = element.select(".content .blog_info .digged .digg").text();
String badNum = element.select(".content .blog_info .digged .bury").text();
String insertTime = element.select(".content .blog_info .date").text();
item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime);
list.add(item);
}
}catch(Exception e){
e.printStackTrace();
}
return list;
}
package com.couriousby.iteyedemo.util;
import java.util.ArrayList;
import java.util.List;
import com.couriousby.iteyedemo.R;
import com.couriousby.iteyedemo.entity.GridEntity;
/**
* @author baoyou E-mail:[email protected]
* @version 2016-7-22 下午1:10:55
*
* desc: ...
*/
public class Constants {
final static String URL_BASE = "http://www.iteye.com/blogs";
public static String getIteyeCategory(int category ) {
switch (category) {
case 0:
return "";
case 1:
return "/category/mobile";
case 2:
return "/category/web";
case 3:
return "/category/architecture";
case 4:
return "/category/language";
case 5:
return "/category/internet";
case 6:
return "/category/opensource";
case 7:
return "/category/os";
case 8:
return "/category/database";
case 9:
return "/category/develop";
case 10:
return "/category/industry";
case 11:
return "/category/other";
default:
return "";
}
}
public static String getIteyeUrl(int category,int page) {
return URL_BASE + getIteyeCategory(category)+"?page="+page;
}
public static List<GridEntity> getGridItem() {
List<GridEntity> list = new ArrayList<GridEntity>();
list.add(new GridEntity(0, "全部分类", R.drawable.iteye_all, 0));
list.add(new GridEntity(1, "移动开发", R.drawable.iteye_mobile, 1));
list.add(new GridEntity(2, "web前端", R.drawable.iteye_web, 2));
list.add(new GridEntity(3, "企业架构", R.drawable.iteye_architecture, 3));
list.add(new GridEntity(4, "编程语言", R.drawable.iteye_language, 4));
list.add(new GridEntity(5, "互联网", R.drawable.iteye_internet, 5));
list.add(new GridEntity(6, "开源软件", R.drawable.iteye_opensource, 6));
list.add(new GridEntity(7, "操作系统", R.drawable.iteye_os, 7));
list.add(new GridEntity(8, "数据库", R.drawable.iteye_database, 8));
list.add(new GridEntity(9, "研发管理", R.drawable.iteye_develop, 9));
list.add(new GridEntity(10, "行业应用", R.drawable.iteye_industry, 10));
list.add(new GridEntity(11, "非技术", R.drawable.iteye_other, 11));
return list;
}
}
package com.couriousby.iteyedemo.activity;
import java.util.ArrayList;
import java.util.List;
import android.app.Activity;
import android.content.Context;
import android.content.Intent;
import android.graphics.Color;
import android.graphics.drawable.BitmapDrawable;
import android.graphics.drawable.ColorDrawable;
import android.os.Bundle;
import android.view.View;
import android.view.View.OnClickListener;
import android.widget.AdapterView;
import android.widget.AdapterView.OnItemClickListener;
import android.widget.AdapterView.OnItemSelectedListener;
import android.widget.LinearLayout.LayoutParams;
import android.widget.GridView;
import android.widget.PopupWindow;
import android.widget.TextView;
import com.couriousby.iteyedemo.MyApplication;
import com.couriousby.iteyedemo.R;
import com.couriousby.iteyedemo.adapter.IteyeAdapter;
import com.couriousby.iteyedemo.adapter.IteyePopwindowGridListAdapter;
import com.couriousby.iteyedemo.entity.GridEntity;
import com.couriousby.iteyedemo.entity.IteyeItemEntity;
import com.couriousby.iteyedemo.listener.OnIteyeGridViewItemclickListener;
import com.couriousby.iteyedemo.quote.xlistview.MsgListView;
import com.couriousby.iteyedemo.quote.xlistview.MsgListView.IXListViewListener;
import com.couriousby.iteyedemo.request.event.IteyeStringHttpEvent;
import com.couriousby.iteyedemo.request.event.base.RequestEvent;
import com.couriousby.iteyedemo.request.http.IteyeHttpRequest;
import com.couriousby.iteyedemo.util.Constants;
import com.couriousby.iteyedemo.util.IteyeJsoupPerformer;
import de.greenrobot.event.EventBus;
public class IteyeMainActivity extends Activity implements IXListViewListener ,OnItemClickListener,OnClickListener,OnIteyeGridViewItemclickListener{
final static String ITEYE_DETAIL_URL = "iteye_detail_url";
private static int start = 1;
private static int category = 0;
private Context mContext;
private MsgListView mListView;
private List<IteyeItemEntity> mDataList;
private IteyeAdapter mAdapter;
private TextView mTopChooseBar;
private PopupWindow mPopupWindow;
private IteyePopwindowGridListAdapter gridAdapter;
private GridView gridView;
private List<GridEntity> mGridList;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.iteye_list);
this.mContext = IteyeMainActivity.this;
initUtils();
initView( );
initListeners();
EventBus.getDefault().register( this );
start =1;
category=0;
IteyeHttpRequest.getIteyeDate(category, start);
}
private void initUtils() {
mDataList = new ArrayList<IteyeItemEntity>();
mAdapter = new IteyeAdapter(mContext);
mAdapter.setmDataList(mDataList);
mGridList = Constants.getGridItem();
gridAdapter = new IteyePopwindowGridListAdapter(mContext);
gridAdapter.setOnIteyeGridViewItemclickListener(this);
gridAdapter.setmList(mGridList);
}
private void initView() {
mListView = (MsgListView) this.findViewById(R.id.qq_news_list);
mListView.setAdapter(mAdapter);
View baseView = View.inflate( this, R.layout.iteye_topbar, null );
mTopChooseBar = (TextView) this.findViewById( R.id.tv_iteye_topbar);
mPopupWindow = new PopupWindow(baseView ,LayoutParams.MATCH_PARENT,
LayoutParams.WRAP_CONTENT, false );
mPopupWindow.setBackgroundDrawable( new BitmapDrawable() );
mPopupWindow.setOutsideTouchable( true );
mPopupWindow.setFocusable( true );
gridView = (GridView) baseView.findViewById(R.id.iteye_gr_mlist);
gridView.setAdapter(gridAdapter);
}
private void initListeners() {
mTopChooseBar.setOnClickListener(this);
mListView.setPullLoadEnable(true);
mListView.setPullRefreshEnable(true);
mListView.setXListViewListener(this);
mListView.setAdapter(mAdapter);
mListView.setOnItemClickListener(this);
gridView.setSelector(new ColorDrawable(Color.TRANSPARENT));
}
@Override
public void onDestroy() {
EventBus.getDefault().unregister( this );
super.onDestroy();
}
public void onEventMainThread(RequestEvent requestEvent){
if(requestEvent instanceof IteyeStringHttpEvent){
IteyeStringHttpEvent event = (IteyeStringHttpEvent) requestEvent;
switch(event.status){
case HTTP_ERROR:
mListView.stopRefresh();
mListView.stopLoadMore();
mListView.setPullLoadEnable(false);
break;
case HTTP_START:
{
mListView.stopRefresh();
mListView.stopLoadMore();
String result = event.data;
mAdapter.clearMDataList();
List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result);
mAdapter.setmDataList(list);
mAdapter.notifyDataSetChanged();
}
break;
case HTTP_SUCCESS: {
mListView.stopRefresh();
mListView.stopLoadMore();
String result = event.data;
mAdapter.clearMDataList();
List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result);
mAdapter.addMDataList(list);
mAdapter.notifyDataSetChanged();
}
break;
default:
break;
}
}else{
}
}
@Override
public void onItemClick(AdapterView<?> parent, View v, int position, long id) {
IteyeItemEntity item = mAdapter.getItem(position - 1 );
if (item != null) {
Intent msgIntent = new Intent();
Bundle bundle = new Bundle(); // Bundle的底层是一个HashMap<String, Object
bundle.putString(IteyeMainActivity.ITEYE_DETAIL_URL, item.getArticleLink() );
msgIntent.putExtra("bundle", bundle);
msgIntent.setClass(MyApplication.newInstance(), IteyeDetailActivity.class);
startActivityForResult( msgIntent, 1000 );
}
}
@Override
public void onRefresh() {
start =1;
mListView.setPullLoadEnable(true);
mListView.setPullRefreshEnable(true);
IteyeHttpRequest.getIteyeDate(category, start);
}
@Override
public void onLoadMore() {
start += 1;
IteyeHttpRequest.getIteyeDate(category, start);
}
@Override
public void onClick(View view) {
switch (view.getId()) {
case R.id.tv_iteye_topbar:
if (mPopupWindow.isShowing()) {
mPopupWindow.dismiss();
} else {
mPopupWindow.showAsDropDown(view);
}
break;
default:
break;
}
}
@Override
public void OnIteyeGridViewItemclick(GridEntity item) {
mTopChooseBar.setText(item.getName() );
mPopupWindow.dismiss();
start = 1;
category = item.getId();
IteyeHttpRequest.getIteyeDate(category, start);
}
}
package com.couriousby.iteyedemo.request.http;
import com.android.volley.Response.ErrorListener;
import com.android.volley.Response.Listener;
import com.android.volley.VolleyError;
import com.android.volley.toolbox.StringRequest;
import com.android.volley.toolbox.Volley;
import com.couriousby.iteyedemo.MyApplication;
import com.couriousby.iteyedemo.request.manager.IteyeHttpManager;
import com.couriousby.iteyedemo.util.Constants;
public class IteyeHttpRequest {
public static void getIteyeDate(int catgory ,final int page){
String url = Constants.getIteyeUrl(catgory,page);
StringRequest request = new StringRequest(url, new Listener<String>() {
@Override
public void onResponse(String response) {
if (page ==1 )
IteyeHttpManager.getIteyeByPageFirst(response);
else
IteyeHttpManager.getIteyeByPage(response);
}
}, new ErrorListener() {
@Override
public void onErrorResponse(VolleyError error) {
IteyeHttpManager.getIteyeByPageError();
}
})
/* {
@Override
protected Response<String> parseNetworkResponse(
NetworkResponse response) {
String str = null;
try {
str = new String(response.data,"utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return Response.success(str, HttpHeaderParser.parseCacheHeaders(response));
}
}*/
;
Volley.newRequestQueue(MyApplication.newInstance()).add(request);
}
}
捐助开发者
在兴趣的驱动下,写一个免费
的东西,有欣喜,也还有汗水,希望你喜欢我的作品,同时也能支持一下。 当然,有钱捧个钱场(右上角的爱心标志,支持支付宝和PayPal捐助),没钱捧个人场,谢谢各位。
谢谢您的赞助,我会做的更好!