关联规则算法的实现及其应用 Apriori算法

实验一  关联规则算法的实现及其应用

【实验目的】

  1. 掌握关联规则挖掘算法Apriori的概念,理解算法的步骤。
  2. 加深对Apriori算法的理解,逐步培养解决实际问题的能力。

【实验性质】

设计型实验

【实验内容】    

 实现使用Apriori算法来挖掘关联规则 

【实验环境】

Python 2

【实验结果】

  • 完整的apriori.py文件

代码:

def loadDataSet():

    return[[1,3,4],[2,3,5],[1,2,3,5],[2,5]]

 

def createC1(dataSet):

    C1=[]

    for transaction in dataSet:

        for item in transaction:

            if not [item] in C1:

                C1.append([item])

    C1.sort()

    return map(frozenset,C1)

 

def scanD(D,Ck,minSupport):

    ssCnt={}

    for tid in D:

        for can in Ck:

            if can.issubset(tid):

                if not ssCnt.has_key(can):ssCnt[can]=1

                else:ssCnt[can]+=1

    numItems = float(len(D))

    retList =[]

    supportData={}

    for key in ssCnt:

        support =ssCnt[key]/numItems

        if support >=minSupport:

            retList.insert(0,key)

        supportData[key]=support

    return retList,supportData

 

def aprioriGen(Lk, k): #creates Ck

    retList = []

    lenLk = len(Lk)

    for i in range(lenLk):

        for j in range(i+1, lenLk):

            L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]

            L1.sort(); L2.sort()

            if L1==L2:

                retList.append(Lk[i] | Lk[j])

    return retList

 

def apriori(dataSet, minSupport = 0.5):

    C1 = createC1(dataSet)

    D = map(set, dataSet)

    L1, supportData = scanD(D, C1, minSupport)

    L = [L1]

    k = 2

    while (len(L[k-2]) > 0):

        Ck = aprioriGen(L[k-2], k)

        Lk, supK = scanD(D, Ck, minSupport)

        supportData.update(supK)

        L.append(Lk)

        k += 1

    return L, supportData

 

def generateRules(L,supportData,minConf=0.7):

    bigRuleList=[]

    for i in range(1,len(L)):

        for freqSet in L[i]:

            H1=(frozenset([item])for item in freqSet)

            if(i>1):

                rulesFromConseq(freqset,H1,supportData,bigRuleList,\

                                minConf)

            else:

                calcConf(freqSet,H1,supportData,bigRuleList,minConf)

        return bigRuleList

 

def calcConf(freqSet,H,supportData,brl,minConf=0.7):

    prunedH=[]

    for conseq in H:

        conf=supportData[freqSet]/supportData[freqSet-conseq]

        if conf>=minConf:

            print freqSet-conseq,'-->',conseq,'conf:',conf

            brl.append((freqSet-conseq,conseq,conf))

            prunedH.append(conseq)

    return prunedH

 

def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):

    m=len(H[0])

    if(len(freqSet)>(m+1)):

        Hmp1=aprioriGen(H.m+1)

        Hmp1=calcConf(freqSet,Hmp1,supportData,brl,minConf)

        if(len(Hmp1)>1):

            rulesFromConseq(freqSet,Hmp1,supportData,brl,minConf)

  • 编写调用并测试apriori.py的代码

代码:

import apriori

dataSet=apriori.loadDataSet()

print dataSet

C1=apriori.createC1(dataSet)

print C1

D=map(set,dataSet)

print D

L1,suppData0=apriori.scanD(D,C1,0.5)

print L1

 

reload (apriori)

L,suppData=apriori.apriori(dataSet)

print L

print L[0]

print L[1]

print L[2]

print L[3]

print apriori.aprioriGen(L[0],2)

L,suppData=apriori.apriori(dataSet,minSupport=0.7)

print L

reload(apriori)

L,suppData=apriori.apriori(dataSet,minSupport=0.5)

rules=apriori.generateRules(L,suppData,minConf=0.7)

print rules

rules=apriori.generateRules(L,suppData,minConf=0.5)

print rules

  • 运行结果:

关联规则算法的实现及其应用 Apriori算法

【实验步骤】

第一步:生成后选项集

程序清单1:Apriori算法中的辅助函数

代码:

def loadDataSet():

    return[[1,3,4],[2,3,5],[1,2,3,5],[2,5]]

 

def createC1(dataSet):

    C1=[]

    for transaction in dataSet:

        for item in transaction:

            if not [item] in C1:

                C1.append([item])

    C1.sort()

    return map(frozenset,C1)

 

def scanD(D,Ck,minSupport):

    ssCnt={}

    for tid in D:

        for can in Ck:

            if can.issubset(tid):

                if not ssCnt.has_key(can):ssCnt[can]=1

                else:ssCnt[can]+=1

    numItems = float(len(D))

    retList =[]

    supportData={}

    for key in ssCnt:

        support =ssCnt[key]/numItems

        if support >=minSupport:

            retList.insert(0,key)

        supportData[key]=support

    return retList,supportData

运行结果:

关联规则算法的实现及其应用 Apriori算法

第二步:组织完整的Apriori算法

程序清单2:Apriori算法

代码:

def aprioriGen(Lk, k): #creates Ck

    retList = []

    lenLk = len(Lk)

    for i in range(lenLk):

        for j in range(i+1, lenLk):

            L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]

            L1.sort(); L2.sort()

            if L1==L2:

                retList.append(Lk[i] | Lk[j])

    return retList

 

def apriori(dataSet, minSupport = 0.5):

    C1 = createC1(dataSet)

    D = map(set, dataSet)

    L1, supportData = scanD(D, C1, minSupport)

    L = [L1]

    k = 2

    while (len(L[k-2]) > 0):

        Ck = aprioriGen(L[k-2], k)

        Lk, supK = scanD(D, Ck, minSupport)

        supportData.update(supK)

        L.append(Lk)

        k += 1

    return L, supportData

运行结果:

关联规则算法的实现及其应用 Apriori算法

第三步:从频繁项集中挖掘关联规则

程序清单3:关联规则生成函数

代码:

def generateRules(L,supportData,minConf=0.7):

    bigRuleList=[]

    for i in range(1,len(L)):

        for freqSet in L[i]:

            H1=(frozenset([item])for item in freqSet)

            if(i>1):

                rulesFromConseq(freqset,H1,supportData,bigRuleList,\

                                minConf)

            else:

                calcConf(freqSet,H1,supportData,bigRuleList,minConf)

        return bigRuleList

 

def calcConf(freqSet,H,supportData,brl,minConf=0.7):

    prunedH=[]

    for conseq in H:

        conf=supportData[freqSet]/supportData[freqSet-conseq]

        if conf>=minConf:

            print freqSet-conseq,'-->',conseq,'conf:',conf

            brl.append((freqSet-conseq,conseq,conf))

            prunedH.append(conseq)

    return prunedH

 

def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):

    m=len(H[0])

    if(len(freqSet)>(m+1)):

        Hmp1=aprioriGen(H.m+1)

        Hmp1=calcConf(freqSet,Hmp1,supportData,brl,minConf)

        if(len(Hmp1)>1):

            rulesFromConseq(freqSet,Hmp1,supportData,brl,minConf)

运行结果:

关联规则算法的实现及其应用 Apriori算法

 

我想能看到这里的同学,无外乎两种人:来拷贝代码的人 和 来拷贝代码的人。

但,在拷贝走的时候,你要想清楚一件事,把代码拷走之后有个蛋用,搞明白对你来说才是最重要的。

好了,就酱紫。

 

老铁,这要是都不赞,说不过去吧!!!


最后对自己说:
你现在所遭遇的每一个不幸,都来自一个不肯努力的曾经。