gpt4 book ai didi

Java 字符串模式识别

转载 作者:搜寻专家 更新时间:2023-10-31 20:24:00 24 4
gpt4 key购买 nike

我有一个由 L、T 和 A 组成的大约一千个字符的字符串。我很确定其中有一个简单的模式,我想知道是否有任何快速简便的方法可以找到它。此字符串发生变化,因此这不仅仅是一次关闭。

我正在寻找的模式是例如如果字符串是

LLLLLLLLAATAALLLLALLLLLLAATAALLLATLLLLLAATAALLAALLLLLAATAALL

子字符串 LLLLLAATAALL 在此字符串中重复了 4 次。我想搜索这样的子字符串,但我不知道它们在主字符串中的开始位置、结束位置、数量以及它们的长度。

如果 Java 中有任何工具可以查找此类内容,我们将不胜感激。

新的

最佳答案

好的,所以我从 here 中获取了代码并对其进行调整以跟踪和打印最长的重复子字符串。只需使用 JUnit 运行它。

/* Copyright (c) 2010 the authors listed at the following URL, and/or
the authors of referenced articles or incorporated external code:
http://en.literateprograms.org/Suffix_tree_(Java)?action=history&offset=20100123220137

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Retrieved from: http://en.literateprograms.org/Suffix_tree_(Java)?oldid=16641
*/

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.junit.Test;

public class SuffixTree {
@Test
public void sampleUsage() {

AbstractSuffixTree tree = new SimpleSuffixTree(
"LLLLLLLLAATAALLLLALLLLLLAATAALLLATLLLLLAATAALLAALLLLLAATAALL");
System.out.println("Longest repeating substring "
+ tree.best.printResult() + " repetitions=" + tree.best.visits
+ " length=" + tree.best.stringDepth);
}
}

abstract class AbstractSuffixTree {

SuffixTreeNode best;
String text = null;
SuffixTreeNode root = null;
int inputAlphabetSize = -1;

AbstractSuffixTree(String text) {
if (text.length() > 0 && text.charAt(text.length() - 1) == '$') {
this.text = text;
} else {
this.text = text + "$";
}
}

}

class SimpleSuffixTree extends AbstractSuffixTree {

public SimpleSuffixTree(String text) {
super(text);
constructTree();
}

private void constructTree() {
super.root = new SuffixTreeNode(this);
best = root;
char[] s = super.text.toCharArray();
for (int i = 0; i < s.length; i++) {
List<String> suffixList = new ArrayList<String>();
for (int k = i; k < s.length; k++) {
suffixList.add(s[k] + "");
}
super.root.addSuffix(suffixList, i + 1);
}
}

}

class CompactSuffixTree extends AbstractSuffixTree {

public CompactSuffixTree(SimpleSuffixTree simpleSuffixTree) {
super(simpleSuffixTree.text);
super.root = compactNodes(simpleSuffixTree.root, 0);
super.best = simpleSuffixTree.best;
}

private SuffixTreeNode compactNodes(SuffixTreeNode node, int nodeDepth) {
node.nodeDepth = nodeDepth;
for (SuffixTreeNode child : node.children) {
while (child.children.size() == 1) {
SuffixTreeNode grandchild = child.children.iterator().next();
child.incomingEdge.label += ", "
+ grandchild.incomingEdge.label;
child.stringDepth += grandchild.incomingEdge.label.length();
child.children = grandchild.children;
// for (SuffixTreeNode grandchild : child.children)
grandchild.parent = node;
}
child = compactNodes(child, nodeDepth + 1);
}
return node;
}

}

class SuffixTreeNode {

AbstractSuffixTree tree;
SuffixTreeEdge incomingEdge = null;
int nodeDepth = -1;
int label = -1;
Collection<SuffixTreeNode> children = null;
SuffixTreeNode parent = null;
int stringDepth;
int id = 0;
public static int c;
public int visits = 1;

public SuffixTreeNode(AbstractSuffixTree tree, SuffixTreeNode parent,
String incomingLabel, int depth, int label, int id) {
children = new ArrayList<SuffixTreeNode>();
incomingEdge = new SuffixTreeEdge(incomingLabel, label);
nodeDepth = depth;
this.label = label;
this.parent = parent;
stringDepth = parent.stringDepth + incomingLabel.length();
this.id = id;
this.tree = tree;
}

public SuffixTreeNode(AbstractSuffixTree tree) {
children = new ArrayList<SuffixTreeNode>();
nodeDepth = 0;
label = 0;
this.tree = tree;
}

public void addSuffix(List<String> suffix, int pathIndex) {
SuffixTreeNode insertAt = this;
insertAt = search(this, suffix);
insert(insertAt, suffix, pathIndex);
}

private SuffixTreeNode search(SuffixTreeNode startNode, List<String> suffix) {
if (suffix.isEmpty()) {
throw new IllegalArgumentException(
"Empty suffix. Probably no valid simple suffix tree exists for the input.");
}
Collection<SuffixTreeNode> children = startNode.children;
for (SuffixTreeNode child : children) {
if (child.incomingEdge.label.equals(suffix.get(0))) {
suffix.remove(0);
child.visits++;
if (child.visits > 1
&& child.stringDepth > tree.best.stringDepth) {
tree.best = child;
}
if (suffix.isEmpty()) {
return child;
}
return search(child, suffix);
}
}
return startNode;
}

private void insert(SuffixTreeNode insertAt, List<String> suffix,
int pathIndex) {
for (String x : suffix) {
SuffixTreeNode child = new SuffixTreeNode(tree, insertAt, x,
insertAt.nodeDepth + 1, pathIndex, id);
insertAt.children.add(child);
insertAt = child;
}
}

public String toString() {
StringBuilder result = new StringBuilder();
String incomingLabel = this.isRoot() ? "" : this.incomingEdge.label;
for (int i = 1; i <= this.nodeDepth; i++)
result.append("\t");
if (this.isRoot()) {
c = 1;
this.id = 1;
} else {
this.id = c;
result.append(this.parent.id + " -> ");
result.append(this.id + "[label=\"" + incomingLabel + "\"]" + "("
+ visits + "," + (stringDepth) + ")" + ";\n");
}
for (SuffixTreeNode child : children) {
c++;
child.id = c;
result.append(child.toString());
}
return result.toString();
}

public String printResult() {
if (parent == null) {
return "";
} else {
return this.parent.printResult() + this.incomingEdge.label;
}
}

public boolean isRoot() {
return this.parent == null;
}

public boolean isLeaf() {
return children.size() == 0;
}

}

class SuffixTreeEdge {

String label = null;
@SuppressWarnings("unused")
private int branchIndex = -1;

public SuffixTreeEdge(String label, int branchIndex) {
this.label = label;
this.branchIndex = branchIndex;
}

}

输出:

Longest repeating substring LLLLLLAATAALLL repetitions=2 length=14

编辑:对您的评论的回应。

目前我只是跟踪最长的重复 SuffixTreeNode(它是 AbstractSuffixTree 中的一个字段)。您可以修改它,以便它跟踪节点的 SortedQueue,按它们的 stringDepth 排序。

关于Java 字符串模式识别,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/3664861/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com