java爬虫拉取起点昵称
新手向,各位大佬勿喷
public static void method() throws IOException {
// 创建httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
// 使用set集合避免value重复
Set set = new HashSet();
// 注1:940 下图中的最大页码值
for (int i = 1; i < 940; i++) {
// 创建请求方法的实例, 并指定请求 注2:url下图中
HttpGet httpget = new HttpGet("https://book.qidian.com/ajax/comment/info?_csrfToken=PB0yagvmzuLX2ey7tmQyPY4n6XKoFZTBSMxUuoJR&pageIndex="+i+"&pageSize=15&orderBy=2&bookId=1209977");
CloseableHttpResponse response = httpClient.execute(httpget);
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity, "utf-8");
JSONObject jsonObject = JSONObject.parseObject(content);
// 获取到key为shoppingCartItemList的值
String r = jsonObject.getString("data");
JSONObject objects = JSONObject.parseObject(r);
String c = objects.getString("commentInfo");
JSONArray objects1 = JSON.parseArray(c);
for (int j = 0; j < objects1.size(); j++) {
Object nickName = JSONObject.parseObject(objects1.get(j).toString()).get("nickName");
String nick = (String) nickName;
if (!nick.contains("书友")) {
set.add(nick);
}
}
}
System.out.println(set);
System.out.println(set.size());
httpClient.close();