一只斗牛犬
123456789101112131415161718192021222324252627282930313233343536373839404142434445from operator import and_from itertools import combinationsclass AprioriAssociationRule: def __init__(self, inputfile): self.transactions = [] self.itemSet = set([]) inf = open(inputfile, 'rb') for line in inf.readlines(): elements = set(filter(lambda entry: len(entry)>0, line.strip().split(','))) if len(elements)>0: self.transactions.append(elements) for element in elements: self.itemSet.add(element) inf.close() self.toRetItems = {} self.associationRules = [] def getSupport(self, itemcomb): if type(itemcomb) != frozenset: itemcomb = frozenset([itemcomb]) within_transaction = lambda transaction: reduce(and_, [(item in transaction) for item in itemcomb]) count = len(filter(within_transaction, self.transactions)) return float(count)/float(len(self.transactions)) def runApriori(self, minSupport=0.15, minConfidence=0.6): itemCombSupports = filter(lambda freqpair: freqpair[1]>=minSupport, map(lambda item: (frozenset([item]), self.getSupport(item)), self.itemSet)) currentLset = set(map(lambda freqpair: freqpair[0], itemCombSupports)) k = 2 while len(currentLset)>0: currentCset = set([i.union(j) for i in currentLset for j in currentLset if len(i.union(j))==k]) currentItemCombSupports = filter(lambda freqpair: freqpair[1]>=minSupport, map(lambda item: (item, self.getSupport(item)), currentCset)) currentLset = set(map(lambda freqpair: freqpair[0], currentItemCombSupports)) itemCombSupports.extend(currentItemCombSupports) k += 1 for key, supportVal in itemCombSupports: self.toRetItems[key] = supportVal self.calculateAssociationRules(minConfidence=minConfidence) def calculateAssociationRules(self, minConfidence=0.6): for key in self.toRetItems: subsets = [frozenset(item) for k in range(1, len(key)) for item in combinations(key, k)] for subset in subsets: confidence = self.toRetItems[key] / self.toRetItems[subset] if confidence > minConfidence: self.associationRules.append([subset, key-subset, confidence])1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253用Scala也大概六十多行:import scala.io.Sourceimport scala.collection.immutable.Listimport scala.collection.immutable.Setimport java.io.Fileimport scala.collection.mutable.Mapclass AprioriAlgorithm(inputFile: File) { var transactions : List[Set[String]] = List() var itemSet : Set[String] = Set() for (line<-Source.fromFile(inputFile).getLines()) { val elementSet = line.trim.split(',').toSet if (elementSet.size > 0) { transactions = transactions :+ elementSet itemSet = itemSet ++ elementSet } } var toRetItems : Map[Set[String], Double] = Map() var associationRules : List[(Set[String], Set[String], Double)] = List() def getSupport(itemComb : Set[String]) : Double = { def withinTransaction(transaction : Set[String]) : Boolean = itemComb .map( x => transaction.contains(x)) .reduceRight((x1, x2) => x1 && x2) val count = transactions.filter(withinTransaction).size count.toDouble / transactions.size.toDouble } def runApriori(minSupport : Double = 0.15, minConfidence : Double = 0.6) = { var itemCombs = itemSet.map( word => (Set(word), getSupport(Set(word)))) .filter( wordSupportPair => (wordSupportPair._2 > minSupport)) var currentLSet : Set[Set[String]] = itemCombs.map( wordSupportPair => wordSupportPair._1).toSet var k : Int = 2 while (currentLSet.size > 0) { val currentCSet : Set[Set[String]] = currentLSet.map( wordSet => currentLSet.map(wordSet1 => wordSet | wordSet1)) .reduceRight( (set1, set2) => set1 | set2) .filter( wordSet => (wordSet.size==k)) val currentItemCombs = currentCSet.map( wordSet => (wordSet, getSupport(wordSet))) .filter( wordSupportPair => (wordSupportPair._2 > minSupport)) currentLSet = currentItemCombs.map( wordSupportPair => wordSupportPair._1).toSet itemCombs = itemCombs | currentItemCombs k += 1 } for (itemComb<-itemCombs) { toRetItems += (itemComb._1 -> itemComb._2) } calculateAssociationRule(minConfidence) } def calculateAssociationRule(minConfidence : Double = 0.6) = { toRetItems.keys.foreach(item => item.subsets.filter( wordSet => (wordSet.size<item.size & wordSet.size>0)) .foreach( subset => {associationRules = associationRules :+ (subset, item diff subset, toRetItems(item).toDouble/toRetItems(subset).toDouble) } ) ) associationRules = associationRules.filter( rule => rule._3>minConfidence) }}