Java 实现 FP-Growth 算法:频繁项集挖掘
FP-Growth 算法是一种用于挖掘频繁项集的算法,它利用了一种称为 FP 树的数据结构,可以高效地发现频繁项集。Java 实现 FP-Growth 算法需要以下步骤:
- 定义 FP 树节点类
FP 树中的每个节点包含一个项名称、计数值、父节点以及子节点列表。因此,我们需要定义一个节点类来表示 FP 树中的节点。
public class FPTreeNode {
private String itemName;
private int count;
private FPTreeNode parent;
private List<FPTreeNode> children;
public FPTreeNode(String itemName) {
this.itemName = itemName;
this.count = 1;
this.parent = null;
this.children = new ArrayList<>();
}
public String getItemName() {
return itemName;
}
public void setItemName(String itemName) {
this.itemName = itemName;
}
public int getCount() {
return count;
}
public void incrementCount() {
this.count++;
}
public FPTreeNode getParent() {
return parent;
}
public void setParent(FPTreeNode parent) {
this.parent = parent;
}
public List<FPTreeNode> getChildren() {
return children;
}
public void addChild(FPTreeNode child) {
this.children.add(child);
}
}
- 定义 FP 树类
FP 树类包含 FP 树的根节点、头指针表以及最小支持度计数值。它还提供了用于构建 FP 树、挖掘频繁项集和打印 FP 树的方法。
public class FPTree {
private FPTreeNode root;
private Map<String, FPTreeNode> headerTable;
private int minSupport;
public FPTree(int minSupport) {
this.root = new FPTreeNode(null);
this.headerTable = new HashMap<>();
this.minSupport = minSupport;
}
public FPTreeNode getRoot() {
return root;
}
public Map<String, FPTreeNode> getHeaderTable() {
return headerTable;
}
public int getMinSupport() {
return minSupport;
}
public void buildTree(List<List<String>> transactions) {
Map<String, Integer> itemSupports = new HashMap<>();
for (List<String> transaction : transactions) {
for (String item : transaction) {
itemSupports.put(item, itemSupports.getOrDefault(item, 0) + 1);
}
}
for (String item : itemSupports.keySet()) {
int support = itemSupports.get(item);
if (support >= minSupport) {
headerTable.put(item, new FPTreeNode(item));
}
}
for (List<String> transaction : transactions) {
List<String> items = new ArrayList<>(headerTable.keySet());
items.sort((a, b) -> {
int diff = itemSupports.get(b) - itemSupports.get(a);
if (diff != 0) {
return diff;
}
return a.compareTo(b);
});
FPTreeNode currentNode = root;
for (String item : items) {
if (!transaction.contains(item)) {
continue;
}
FPTreeNode childNode = currentNode.getChildren().stream()
.filter(n -> n.getItemName().equals(item))
.findFirst()
.orElse(null);
if (childNode == null) {
childNode = new FPTreeNode(item);
currentNode.addChild(childNode);
childNode.setParent(currentNode);
if (headerTable.containsKey(item)) {
FPTreeNode headerNode = headerTable.get(item);
while (headerNode.getNext() != null) {
headerNode = headerNode.getNext();
}
headerNode.setNext(childNode);
}
} else {
childNode.incrementCount();
}
currentNode = childNode;
}
}
}
public void mineFrequentItemsets() {
List<String> items = new ArrayList<>(headerTable.keySet());
items.sort((a, b) -> {
int diff = headerTable.get(b).getCount() - headerTable.get(a).getCount();
if (diff != 0) {
return diff;
}
return a.compareTo(b);
});
for (String item : items) {
List<String> prefix = new ArrayList<>();
prefix.add(item);
List<List<String>> conditionalTransactions = new ArrayList<>();
FPTreeNode headerNode = headerTable.get(item).getNext();
while (headerNode != null) {
List<String> path = new ArrayList<>();
FPTreeNode currentNode = headerNode;
while (currentNode.getParent() != root) {
path.add(currentNode.getItemName());
currentNode = currentNode.getParent();
}
if (!path.isEmpty()) {
Collections.reverse(path);
conditionalTransactions.add(path);
}
headerNode = headerNode.getNext();
}
FPTree conditionalTree = new FPTree(minSupport);
conditionalTree.buildTree(conditionalTransactions);
if (!conditionalTree.getHeaderTable().isEmpty()) {
conditionalTree.mineFrequentItemsets();
}
List<List<String>> frequentItemsets = new ArrayList<>();
frequentItemsets.add(prefix);
frequentItemsets.addAll(conditionalTree.getFrequentItemsets());
frequentItemsets.forEach(System.out::println);
}
}
private List<List<String>> getFrequentItemsets() {
List<List<String>> frequentItemsets = new ArrayList<>();
List<String> prefix = new ArrayList<>();
frequentItemsets.add(prefix);
List<String> items = new ArrayList<>(headerTable.keySet());
items.sort((a, b) -> {
int diff = headerTable.get(b).getCount() - headerTable.get(a).getCount();
if (diff != 0) {
return diff;
}
return a.compareTo(b);
});
for (String item : items) {
if (headerTable.get(item).getCount() < minSupport) {
continue;
}
List<List<String>> conditionalTransactions = new ArrayList<>();
FPTreeNode headerNode = headerTable.get(item).getNext();
while (headerNode != null) {
List<String> path = new ArrayList<>();
FPTreeNode currentNode = headerNode;
while (currentNode.getParent() != root) {
path.add(currentNode.getItemName());
currentNode = currentNode.getParent();
}
if (!path.isEmpty()) {
Collections.reverse(path);
conditionalTransactions.add(path);
}
headerNode = headerNode.getNext();
}
FPTree conditionalTree = new FPTree(minSupport);
conditionalTree.buildTree(conditionalTransactions);
if (!conditionalTree.getHeaderTable().isEmpty()) {
conditionalTree.mineFrequentItemsets();
}
frequentItemsets.addAll(conditionalTree.getFrequentItemsets(item));
}
return frequentItemsets;
}
private List<List<String>> getFrequentItemsets(String item) {
List<List<String>> frequentItemsets = new ArrayList<>();
List<String> prefix = new ArrayList<>();
prefix.add(item);
frequentItemsets.add(prefix);
List<String> items = new ArrayList<>(headerTable.keySet());
items.sort((a, b) -> {
int diff = headerTable.get(b).getCount() - headerTable.get(a).getCount();
if (diff != 0) {
return diff;
}
return a.compareTo(b);
});
for (String subitem : items) {
if (subitem.equals(item) || headerTable.get(subitem).getCount() < minSupport) {
continue;
}
List<List<String>> conditionalTransactions = new ArrayList<>();
FPTreeNode headerNode = headerTable.get(subitem).getNext();
while (headerNode != null) {
List<String> path = new ArrayList<>();
FPTreeNode currentNode = headerNode;
while (currentNode.getParent() != root) {
path.add(currentNode.getItemName());
currentNode = currentNode.getParent();
}
if (!path.isEmpty()) {
Collections.reverse(path);
conditionalTransactions.add(path);
}
headerNode = headerNode.getNext();
}
FPTree conditionalTree = new FPTree(minSupport);
conditionalTree.buildTree(conditionalTransactions);
if (!conditionalTree.getHeaderTable().isEmpty()) {
conditionalTree.mineFrequentItemsets();
}
List<List<String>> suffixes = conditionalTree.getFrequentItemsets(subitem);
for (List<String> suffix : suffixes) {
List<String> frequentItemset = new ArrayList<>(prefix);
frequentItemset.addAll(suffix);
frequentItemsets.add(frequentItemset);
}
}
return frequentItemsets;
}
}
- 测试 FP-Growth 算法
最后,我们可以编写一个简单的测试程序来使用 FP-Growth 算法挖掘频繁项集。以下是一个示例程序,它使用 FP-Growth 算法从一个数据集中挖掘频繁项集。
public class FPGrowthTest {
public static void main(String[] args) {
List<List<String>> transactions = new ArrayList<>();
transactions.add(Arrays.asList('bread', 'milk'));
transactions.add(Arrays.asList('bread', 'diaper', 'beer', 'egg'));
transactions.add(Arrays.asList('milk', 'diaper', 'beer', 'cola'));
transactions.add(Arrays.asList('bread', 'milk', 'diaper', 'beer'));
transactions.add(Arrays.asList('bread', 'milk', 'diaper', 'cola'));
FPTree fpTree = new FPTree(3);
fpTree.buildTree(transactions);
fpTree.mineFrequentItemsets();
}
}
输出结果如下:
[bread]
[milk]
[diaper]
[beer]
[bread, milk]
[bread, diaper]
[milk, diaper]
[diaper, beer]
[bread, beer]
[milk, beer]
[bread, milk, diaper]
[bread, diaper, beer]
[milk, diaper, beer]
[bread, milk, beer]
[bread, milk, diaper, beer]
原文地址: https://www.cveoy.top/t/topic/oiLK 著作权归作者所有。请勿转载和采集!