Netty内存管理深度解析(下)

概述

1. PoolChunk

1.1 Buddy算法

1.2 PoolChunk初始化

1.3 分配

2. PoolSubPage

3. 分配和释放

上面的内容见上文:Netty内存管理深度解析(上)

4. PoolChunkList

在Netty内存管理概述中讲到过Arena将内存分为很多Chunk进行管理，其实就是通过多个PoolChunkList来保存，并且根据Chunk的使用率，动态的移动至对应的ChunkList中。在Arena会初始化六种PoolChunklist分别为：QINIT，Q0，Q25，Q50，Q75，Q100。
Netty内存管理深度解析(下)
它们之间除了QINIT外形成双向链表；PoolChunkList中的Chunk块也形成双向链表，其中头结点是双向链表的尾部，且新加入的节点也加到尾部。
Chunk随着内存使用率的变化，会在PoolChunkList中移动，初始时都在QINI，随着使用率增大，移动到Q0，Q25等；随着使用率降低，又移回Q0，当Q0中的Chunk块不再使用时，从Q0中移除。

PoolChunkList的属性如下：

final class PoolChunkList<T> implements PoolChunkListMetric {
    private final PoolArena<T> arena;// 所属的Arena
    private final int minUsage;// 最小内存使用率
    private final int maxUsage;// 最大内存使用率
    private final int maxCapacity;// 下的一个Chunk可分配的最大字节数
    private PoolChunk<T> head;// head节点
    private PoolChunkList<T> prevList;// 上一个状态list
    private final PoolChunkList<T> nextList;// 下一个状态list
}

在PoolArena中会创建状态列表,截取部分代码：

q100 = new PoolChunkList<T>(this, null, 100, Integer.MAX_VALUE, chunkSize);
q075 = new PoolChunkList<T>(this, q100, 75, 100, chunkSize);
q050 = new PoolChunkList<T>(this, q075, 50, 100, chunkSize);
q025 = new PoolChunkList<T>(this, q050, 25, 75, chunkSize);
q000 = new PoolChunkList<T>(this, q025, 1, 50, chunkSize);
qInit = new PoolChunkList<T>(this, q000, Integer.MIN_VALUE, 25, chunkSize);

q100.prevList(q075);
q075.prevList(q050);
q050.prevList(q025);
q025.prevList(q000);
q000.prevList(null);
qInit.prevList(qInit);

下面看它的构造方法PoolChunkList的构造方法：

PoolChunkList(PoolArena<T> arena, PoolChunkList<T> nextList, int minUsage, int maxUsage, int chunkSize) {
    this.arena = arena;
    this.nextList = nextList;
    this.minUsage = minUsage;
    this.maxUsage = maxUsage;
    maxCapacity = calculateMaxCapacity(minUsage, chunkSize);
}
private static int calculateMaxCapacity(int minUsage, int chunkSize) {
    minUsage = minUsage0(minUsage);
    if (minUsage == 100) return 0;// Q100 不能再分配
    // 比如Q25中可以分配的最大内存为0.75 * ChunkSize
    return  (int) (chunkSize * (100L - minUsage) / 100L);
}

PoolArena负责管理QInit~Q100，其内部的Chunk的管理是在PoolChunkList内部，我们看下添加Chunk的逻辑

void add(PoolChunk<T> chunk) {
    if (chunk.usage() >= maxUsage) { // chunk的使用率如果大于当前list的最大使用率，调用下一个list的add方法
        nextList.add(chunk);
        return;
    }
    add0(chunk); // 添加至内部chunk列表头部
}

接下来分析关键的分配过程，代码如下：

boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) {
    // 根据head是否为null来判断此PoolChunkList没有chunk可用，如果没有，则返回false。
    if (head == null || normCapacity > maxCapacity) {
        return false;
    }
    // 遍chunk列表
    for (PoolChunk<T> cur = head;;) {
        // PoolChunk.allocate见上文
        long handle = cur.allocate(normCapacity);
        if (handle < 0) {
            cur = cur.next;
            if (cur == null) {
                return false;
            }
        } else {
            // 找到满足的chunk
            cur.initBuf(buf, handle, reqCapacity);
            if (cur.usage() >= maxUsage) { // 在此计算使用率，如果有必要则向后移动
                remove(cur);
                nextList.add(cur);
            }
            return true;
        }
    }
}

ChunkList内容比较易懂，部分代码就不在此列出，下面总结一下

每个PoolChunkList中用head字段维护一个PoolChunk链表的头部
当需要进行内存分配时，会依次遍历该PoolChunkList中的PoolChunk节点来完成，完成之后，会判断此PoolChunk的内存使用量是否大于该PoolChunkLis他的maxUsage，如果大于，则将此chunk放到下一个PoolChunkList中。
当chunk由于内存释放的原因而导致内存使用量减少，即剩余内存量增大，如果小于此PoolChunkList的minUsage，则将其加入到上一个PoolChunkList中去。

5. PoolArena

终于到了这一节了，前面所有的内容都是对PoolArena的铺垫，比如PoolChunk、PoolSubPage相当于零件，PoolArena是对这些零件的组装，提供内存管理的功能。
为了缓解线程竞争，一般通过创建多个poolArena细化锁的粒度，提高并发执行的效率。

PoolArena类是一个抽象类，这是因为ByteBuf分为Heap和Direct，所以PoolArena同样分为：Heap和Direct。实现类为PoolArena的内部类。下面先从PoolArena的属性看起

final PooledByteBufAllocator parent;// 表示该PoolArena的allocator
private final int maxOrder; ;// 表示chunk中由Page节点构成的二叉树的最大高度。默认11
final int pageSize;// page的大小，默认8K
final int pageShifts;// pageShifts=log(pageSize),默认13
final int chunkSize;;// chunk的大小,默认16M
final int subpageOverflowMask;
static final int numTinySubpagePools = 512 >>> 4;// 用来分配tiny内存的数组长度
final int numSmallSubpagePools; //用来分配small内存的数组长度
//tinySubpagePools来缓存（或说是存储）用来分配tiny（小于512）内存的Page；
//smallSubpagePools来缓存用来分配small（大于等于512且小于pageSize）内存的Page
private final PoolSubpage<T>[] tinySubpagePools;
private final PoolSubpage<T>[] smallSubpagePools;
// PoolChunkList作为容器存放相同状态的Chunk块，相关变量如下：
private final PoolChunkList<T> q050;
private final PoolChunkList<T> q025;
private final PoolChunkList<T> q000;
private final PoolChunkList<T> qInit;
private final PoolChunkList<T> q075;
private final PoolChunkList<T> q100;
private final List<PoolChunkListMetric> chunkListMetrics;
private long allocationsNormal;

继续看构造方法:

protected PoolArena(PooledByteBufAllocator parent, int pageSize,
      int maxOrder, int pageShifts, int chunkSize, int cacheAlignment) {
    this.parent = parent;
    this.pageSize = pageSize;
    this.maxOrder = maxOrder;
    this.pageShifts = pageShifts;
    this.chunkSize = chunkSize;
    directMemoryCacheAlignment = cacheAlignment;
    directMemoryCacheAlignmentMask = cacheAlignment - 1;
    subpageOverflowMask = ~(pageSize - 1);
    tinySubpagePools = newSubpagePoolArray(numTinySubpagePools);
    for (int i = 0; i < tinySubpagePools.length; i ++) {
        tinySubpagePools[i] = newSubpagePoolHead(pageSize);
    }

    numSmallSubpagePools = pageShifts - 9; //该变量用于判断申请的内存大小与page之间的关系，是大于，还是小于
    smallSubpagePools = newSubpagePoolArray(numSmallSubpagePools);
    for (int i = 0; i < smallSubpagePools.length; i ++) {
        smallSubpagePools[i] = newSubpagePoolHead(pageSize);
    }
    q100 = new PoolChunkList<T>(this, null, 100, Integer.MAX_VALUE, chunkSize);
    q075 = new PoolChunkList<T>(this, q100, 75, 100, chunkSize);
    q050 = new PoolChunkList<T>(this, q075, 50, 100, chunkSize);
    q025 = new PoolChunkList<T>(this, q050, 25, 75, chunkSize);
    q000 = new PoolChunkList<T>(this, q025, 1, 50, chunkSize);
    qInit = new PoolChunkList<T>(this, q000, Integer.MIN_VALUE, 25, chunkSize);

    q100.prevList(q075);
    q075.prevList(q050);
    q050.prevList(q025);
    q025.prevList(q000);
    q000.prevList(null);
    qInit.prevList(qInit);

    List<PoolChunkListMetric> metrics = new ArrayList<PoolChunkListMetric>(6);
    metrics.add(qInit);
    metrics.add(q000);
    metrics.add(q025);
    metrics.add(q050);
    metrics.add(q075);
    metrics.add(q100);
    chunkListMetrics = Collections.unmodifiableList(metrics);
}

总的来看主要是对PoolChunkList和SubpagePools进行初始工作，其中前者管理的Chunk，用于分配大于PageSize的内存，后者哟用于管理SubPage，用于分配小于PageSize的内存。
SubpagePools数组中只保存头结点，它是一个空的节点。SubpagePools分为两类：

tinySubpagePools：用于分配小于512字节的内存，默认长度为32，因为内存分配最小为16，每次增加16，直到512，区间[16，512)一共有32个不同值
smallSubpagePools：用于分配大于等于512字节的内存，默认长度为4

poolChunkList用于分配大于8k的内存；

qInit：存储内存利用率0-25%的chunk
q000：存储内存利用率1-50%的chunk
q025：存储内存利用率25-75%的chunk
q050：存储内存利用率50-100%的chunk
q075：存储内存利用率75-100%的chunk
q100：存储内存利用率100%的chunk

有一个疑问，初始化SubpagePools时newSubpagePoolHead创建额PoolSubPage是一样的，那么如何区分每个元素之间的代表不同大小的内存?
答案就在下面代码中

PoolSubpage<T> findSubpagePoolHead(int elemSize) {
    int tableIdx;
    PoolSubpage<T>[] table;
    if (isTiny(elemSize)) { // < 512
        tableIdx = elemSize >>> 4;
        table = tinySubpagePools;
    } else {
        tableIdx = 0;
        elemSize >>>= 10;
        while (elemSize != 0) {
            elemSize >>>= 1;
            tableIdx ++;
        }
        table = smallSubpagePools;
    }

    return table[tableIdx];
}

当elemSize小于512时，块大小为elemSize的page将存储在tinySubpagePools[elemSize>>>4]的位置上，用于之后分配大小为elemSize（小于512的tiny内存）的内存请求。也就是说：tinySubpagePools[tableIdx]处的page负责分配大小为16*tableIdx
当elemSize在区间[512,pageSize)范围内时，块大小为elemSize的page将存储在tinySubpagePools[{(log(elemSize)-10)+1}]的位置上，用于之后分配大小为elemSize的内存请求。
也就是说：smallSubpagePools[0]处的page负责分配大小为512的块内存，smallSubpagePools[1]处的page负责分配大小为1024的内存，smallSubpagePools[2]处的page负责分配大小为2048的内存，按这种倍增的方式依次类推。
接下去看看poolArena最核心的内存分配，实现如下：

private void allocate(PoolThreadCache cache, PooledByteBuf<T> buf, final int reqCapacity) {
    final int normCapacity = normalizeCapacity(reqCapacity);// 规范容量
    if (isTinyOrSmall(normCapacity)) { // capacity < pageSize
        int tableIdx;
        PoolSubpage<T>[] table;
        boolean tiny = isTiny(normCapacity);
        if (tiny) { // < 512
            if (cache.allocateTiny(this, buf, reqCapacity, normCapacity)) {
                // was able to allocate out of the cache so move on
                return;
            }
            tableIdx = tinyIdx(normCapacity);
            table = tinySubpagePools;
        } else {
            if (cache.allocateSmall(this, buf, reqCapacity, normCapacity)) {
                // was able to allocate out of the cache so move on
                return;
            }
            tableIdx = smallIdx(normCapacity);
            table = smallSubpagePools;
        }
        final PoolSubpage<T> head = table[tableIdx];
        synchronized (head) {
            final PoolSubpage<T> s = head.next;
            if (s != head) {
                assert s.doNotDestroy && s.elemSize == normCapacity;
                long handle = s.allocate();
                assert handle >= 0;
                s.chunk.initBufWithSubpage(buf, handle, reqCapacity);
                incTinySmallAllocation(tiny);
                return;
            }
        }
        synchronized (this) {
            allocateNormal(buf, reqCapacity, normCapacity);
        }

        incTinySmallAllocation(tiny);
        return;
    }
    if (normCapacity <= chunkSize) {
        if (cache.allocateNormal(this, buf, reqCapacity, normCapacity)) {
            // was able to allocate out of the cache so move on
            return;
        }
        synchronized (this) {
            allocateNormal(buf, reqCapacity, normCapacity);
            ++allocationsNormal;
        }
    } else {
        // Huge allocations are never served via the cache so just call allocateHuge
        allocateHuge(buf, reqCapacity);
    }
}