Java爬虫(三)-----httpclient的简单应用

任务:

爬取懂球帝页面

存在问题:

没有解析页面

同时存储数据持久化

 

 

1.添加maven依赖

<dependencies>
<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpclient</artifactId>
    <version>4.5.5</version>
</dependency>

<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.47</version>

</dependency>


</dependencies>

2.编写请求

import com.alibaba.fastjson.JSONObject;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

public class HttpClientService {

    /**
  31      * 返回成功状态码
  32      */
      private static final int SUCCESS_CODE = 200;

             /**
       * 发送GET请求
       * @param url   请求url
       * @param nameValuePairList    请求参数
       * @return JSON或者字符串
       * @throws Exception
       */
              public static Object sendGet(String url, List<NameValuePair> nameValuePairList) throws Exception{
                JSONObject jsonObject = null;
                CloseableHttpClient client = null;
                CloseableHttpResponse response = null;
                try{
                        /**
                          * 创建HttpClient对象
                          */
                         client = HttpClients.createDefault();
                         /**
             52              * 创建URIBuilder
             53              */
                       URIBuilder uriBuilder = new URIBuilder(url);
                       /**
             56              * 设置参数
             57              */
                         uriBuilder.addParameters(nameValuePairList);
                         /**
             60              * 创建HttpGet
             61              */
                        HttpGet httpGet = new HttpGet(uriBuilder.build());
                         /**
             64              * 设置请求头部编码
             65              */
                        httpGet.setHeader(new BasicHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8"));
                        /**
             6            * 设置返回编码
             69              */
                         httpGet.setHeader(new BasicHeader("Accept", "text/plain;charset=utf-8"));
                        /**
             72              * 请求服务
             73              */
                        response = client.execute(httpGet);
                       /**
                         * 获取响应吗
                        */
                       int statusCode = response.getStatusLine().getStatusCode();

                       if (SUCCESS_CODE == statusCode){
                               /**
                                  * 获取返回对象
                                   */
                                HttpEntity entity = response.getEntity();
                                /**
                                  * 通过EntityUitls获取返回内容
                                  */
                                String result = EntityUtils.toString(entity,"UTF-8");
                                /**
                                  * 转换成json,根据合法性返回json或者字符串
                                  */
                                try{
                                        jsonObject = JSONObject.parseObject(result);
                                       return jsonObject;
                                   }catch (Exception e){
                                    return result;
                                }
                            }else{

                             }
                     }catch (Exception e){

                   } finally {
                        response.close();
                         client.close();
                    }
               return null;
           }

      /*** 发送POST请求
112      * @param url
113      * @param nameValuePairList
114      * @return JSON或者字符串
115      * @throws Exception
116      */
             public static Object sendPost(String url, List<NameValuePair> nameValuePairList) throws Exception{
                JSONObject jsonObject = null;
                CloseableHttpClient client = null;
                CloseableHttpResponse response = null;
                try{
                       /**
             123              *  创建一个httpclient对象
             124              */
            client = HttpClients.createDefault();
                        /**
             127              * 创建一个post对象
             128              */
            HttpPost post = new HttpPost(url);
                         /**
             131              * 包装成一个Entity对象
             132              */
                      StringEntity entity = new UrlEncodedFormEntity(nameValuePairList, "UTF-8");
                    /**
             135              * 设置请求的内容
             136              */
                         post.setEntity(entity);
                        /**
             139              * 设置请求的报文头部的编码
             140              */
                        post.setHeader(new BasicHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8"));
                        /**
             143              * 设置请求的报文头部的编码
             144              */
                        post.setHeader(new BasicHeader("Accept", "text/plain;charset=utf-8"));
                      /**
             147              * 执行post请求
             148              */
            response = client.execute(post);
                        /**
             151              * 获取响应码
             152              */
                        int statusCode = response.getStatusLine().getStatusCode();
                         if (SUCCESS_CODE == statusCode){
                                /**
                 156                  * 通过EntityUitls获取返回内容
                 157                  */
                                 String result = EntityUtils.toString(response.getEntity(),"UTF-8");
                                /**
                 160                  * 转换成json,根据合法性返回json或者字符串
                 161                  */
                                try{
                                        jsonObject = JSONObject.parseObject(result);
                                        return jsonObject;
                                   }catch (Exception e){
                                       return result;
                                    }
                            }else{

                            }
                    }catch (Exception e){

                    }finally {
                        response.close();
                        client.close();
                    }
               return null;
            }

           /**
181      * 组织请求参数{参数名和参数值下标保持一致}
182      * @param params    参数名数组
183      * @param values    参数值数组
184      * @return 参数对象
185      */
           public static List<NameValuePair> getParams(Object[] params, Object[] values){
                 /**
         188          * 校验参数合法性
         189          */
                boolean flag = params.length>0 && values.length>0 &&  params.length == values.length;
                 if (flag){
                         List<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();
                       for(int i =0; i<params.length; i++){
                                nameValuePairList.add(new BasicNameValuePair(params[i].toString(),values[i].toString()));
                            }
                       return nameValuePairList;
                    }else{

                    }
                 return null;
             }}

3.写mian方法进行调用

import org.apache.http.NameValuePair;

import java.util.List;

    /**
     * 发送post/get 测试类
     */
    public class httpclientmain {

        public static void main(String[] args) throws Exception{
            String url = "http://www.dongqiudi.com/data";
            /**
             * 参数值
             */
            Object [] params = new Object[]{"param1","param2"};
            /**
             * 参数名
             */
            Object [] values = new Object[]{"value1","value2"};
            /**
             * 获取参数对象
             */
            List<NameValuePair> paramsList = HttpClientService.getParams(params, values);
            /**
             * 发送get
             */
            Object result = HttpClientService.sendGet(url, paramsList);



            System.out.println("GET返回信息:" + result);

        }
    }


4.返回结果

 

Java爬虫(三)-----httpclient的简单应用



目前只是爬虫爬取工作得第一步

还要解析网页抽取出需要的数据

将数据进行存储