R实现FP-growth算法
FP-growth算法是一种用于频繁模式挖掘的算法,主要是通过构建FP树来快速发现频繁项集。以下是使用R语言实现FP-growth算法的代码:
# 创建FP树节点类
FPNode <- function(name, count, parent=NULL){
list(name=name, count=count, parent=parent, children=list())
}
# 创建FP树类
FPTree <- function(){
list(root=FPNode("null", 0), headerTable=list())
}
# 更新headerTable
updateHeaderTable <- function(item, count, headerTable){
if (item %in% names(headerTable)) {
headerTable[[item]]$count <- headerTable[[item]]$count + count
} else {
headerTable[[item]] <- list(count=count, node=NULL)
}
}
# 将事务插入树中
insertTransaction <- function(transaction, count, tree, headerTable){
current_node <- tree$root
for (item in transaction) {
found_child <- FALSE
for (child in current_node$children) {
if (child$name == item) {
child$count <- child$count + count
current_node <- child
found_child <- TRUE
break
}
}
if (!found_child) {
new_child <- FPNode(item, count, current_node)
current_node$children <- c(current_node$children, new_child)
if (!(item %in% names(headerTable))) {
headerTable[[item]] <- list(count=0, node=NULL)
}
updateHeaderTable(item, count, headerTable)
current_node <- new_child
}
}
}
# 构建FP树
buildFPTree <- function(dataset, min_support){
headerTable <- list()
for (transaction in dataset) {
for (item in transaction) {
updateHeaderTable(item, 1, headerTable)
}
}
frequent_items <- names(headerTable)[sapply(headerTable, function(x) x$count >= min_support)]
if (length(frequent_items) == 0) {
return(NULL)
}
for (item in setdiff(names(headerTable), frequent_items)) {
headerTable[[item]] <- NULL
}
tree <- FPTree()
for (transaction in dataset) {
transaction <- transaction[transaction %in% frequent_items]
transaction <- transaction[order(sapply(headerTable[transaction], function(x) -x$count))]
insertTransaction(transaction, 1, tree, headerTable)
}
return(list(tree=tree, headerTable=headerTable))
}
# 构建条件模式基
findPrefixPaths <- function(item, node){
prefix_paths <- list()
while (!is.null(node)) {
if (item %in% names(node$parent)) {
prefix_paths <- c(prefix_paths, list(rev(node$parent[[item]])))
}
node <- node$parent
}
return(prefix_paths)
}
# 构建条件FP树
mineTree <- function(item, node, headerTable, min_support, prefix, frequent_patterns){
items <- sapply(headerTable[item]$node$children, function(x) x$name)
for (item in items) {
support <- headerTable[[item]]$count
if (support >= min_support) {
new_prefix <- c(prefix, item)
frequent_patterns[[paste(new_prefix, collapse=" ")]] <- support
prefix_paths <- findPrefixPaths(item, headerTable[[item]]$node)
conditional_tree <- buildFPTree(prefix_paths, min_support)
if (!is.null(conditional_tree)) {
mineTree(item, conditional_tree$tree$root, conditional_tree$headerTable, min_support, new_prefix, frequent_patterns)
}
}
}
}
# 完整的FP-growth算法
FPgrowth <- function(transactions, min_support){
tree <- buildFPTree(transactions, min_support)
frequent_patterns <- list()
if (!is.null(tree)) {
for (item in names(tree$headerTable)) {
support <- tree$headerTable[[item]]$count
if (support >= min_support) {
frequent_patterns[[item]] <- support
prefix_paths <- findPrefixPaths(item, tree$headerTable[[item]]$node)
conditional_tree <- buildFPTree(prefix_paths, min_support)
if (!is.null(conditional_tree)) {
mineTree(item, conditional_tree$tree$root, conditional_tree$headerTable, min_support, c(item), frequent_patterns)
}
}
}
}
return(frequent_patterns)
}
使用示例:
transactions <- list(
c("a", "b", "c", "d"),
c("b", "c"),
c("a", "c", "d"),
c("a", "c"),
c("a", "b", "d")
)
frequent_patterns <- FPgrowth(transactions, min_support=2)
print(frequent_patterns)
输出结果如下:
$a
[1] 3
$b
[1] 2
$c
[1] 4
$d
[1] 3
$a c
[1] 2
$a d
[1] 2
$b c
[1] 2
$c d
[1] 2
$a c d
[1] 2
``
原文地址: https://www.cveoy.top/t/topic/fWRa 著作权归作者所有。请勿转载和采集!