数据结构之归并算法(类似于分库操作模拟)
在我们项目中,如果数据量很大,那么分库操作不失为一个选择,分库如何实现操作数据呢,比如所有数据去重排序,下面以文件代替进行模拟:
target_file 是合并后生成文件,小了很多,是因为去重了,
这是临时文件(类似于我们的多个库)
下面是代码实现:
import lombok.*;
import java.io.BufferedReader;
import java.io.IOException;
/**
* @project: testdemo
* @author: SJT
* @date: 2019/3/15
* @desc:
*/
@Data
@ToString
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FileInfo {
//文件名
private String fileName;
//当前要对比的数据
private int currentNum;
//类似指针读取新的数据
private BufferedReader reader;
/**
* @desc: 移动指针读取新的数据
* @author: SJT
* @date: 2019/3/15
* @param: []
* @return: void
*/
public void readNext() throws IOException {
String data = reader.readLine();
if(data != null){
this.currentNum = Integer.valueOf(data);
}else {
this.currentNum = -1;
}
}
}
import java.io.*;
import java.util.*;
/**
* @project: testdemo
* @author: SJT
* @date: 2019/3/14
* @desc:
*/
public class MergeSort {
private static final String file_dir = "C:\\Users\\admin\\Desktop\\DOC";
private static final String temp_file_dir = "C:\\Users\\admin\\Desktop\\DOC\\TEST";
//读取的行数
private int count = 0;
//子文件最大行数
private static final int split_size = 500000;
//子文件编号
private int file_no = 1;
//文件前缀
private static final String file_prefix = "tjs";
//目标文件
private static String target_file ="C:\\Users\\admin\\Desktop\\DOC\\target_file";
//比较器
private Comparator<FileInfo> comparator = new Comparator<FileInfo>() {
@Override
public int compare(FileInfo o1, FileInfo o2) {
if (o1.getCurrentNum() == o2.getCurrentNum()) {
return o1.getFileName().compareTo(o2.getFileName());
}
return o1.getCurrentNum() - o2.getCurrentNum();
}
};
public static void main(String[] args) throws IOException {
MergeSort ms = new MergeSort();
ms.splitFile("test.txt");
ms.merge();
}
/**
* @desc: 文件拆分
* @author: SJT
* @date: 2019/3/15
* @param: [fileName]
* @return: void
*/
private void splitFile(String fileName) {
SortedSet<Integer> set = new TreeSet<>();
try(BufferedReader br = new BufferedReader(new FileReader(file_dir + File.separator+fileName))) {
String data;
do{
//读取每行数据
data = br.readLine();
count ++;
if(data != null){
set.add(Integer.valueOf(data));
if(count >= split_size){
//拆分文件写入到指定目录
writeFile(temp_file_dir,set);
count = 0;
set.clear();
}
}else {
//读到最后一批数据
if(!set.isEmpty()){
//将拆分文件写入到指定目录
writeFile(temp_file_dir,set);
count = 0;
set.clear();
}
}
}while (data != null);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @desc: 将拆分文件写入到指定目录
* @author: SJT
* @date: 2019/3/15
* @param: [s, set]
* @return: void
*/
private void writeFile(String s, SortedSet<Integer> set) {
try(BufferedWriter bw = new BufferedWriter(new FileWriter(s+File.separator+file_prefix+file_no))) {
set.stream().forEach(x ->{
try {
bw.write(x+"\r\n");
} catch (IOException e) {
e.printStackTrace();
}
});
file_no ++;
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @desc: 将各个文件中数据进行归并处理
* @author: SJT
* @date: 2019/3/15
* @param: []
* @return: void
*/
private void merge() throws IOException {
File[] files = new File(temp_file_dir).listFiles();
List<FileInfo> fileInfos = new ArrayList<>();
for (File file:files){
BufferedReader reader = new BufferedReader(new FileReader(file));
FileInfo fileInfo = FileInfo.builder().fileName(file.getName())
.reader(reader)
.build();
fileInfo.readNext();
fileInfos.add(fileInfo);
}
//初始排序(小---->大)
Collections.sort(fileInfos,comparator);
BufferedWriter bw = new BufferedWriter(new FileWriter(target_file));
while (!fileInfos.isEmpty()){
FileInfo fileInfo = fileInfos.get(0);
bw.write(fileInfo.getCurrentNum()+"\r\n");
//移动指针,读取此文件中下一条数据
fileInfo.readNext();
//再次将文件进行排序(小---->大)
Collections.sort(fileInfos,comparator);
//文件中数据读取完时,删除此文件,直至将所有子文件数据读完
if(fileInfo.getCurrentNum() == -1){
fileInfos.remove(fileInfo);
}
}
bw.flush();
}
}
ok,这样即可~.~