gpt4 book ai didi

java - 从 arraylist 中删除重复的字符串
转载 作者:行者123 更新时间:2023-12-02 03:47:52 25 4
gpt4 key购买 nike

我的程序正在打开一个文件,然后保存其单词及其距文件开头的字节距离。尽管该文件有太多我不想要的重复单词。我还希望我的列表按字母顺序排列。问题是,当我修复顺序时,重复项会被弄乱,反之亦然。这是我的代码:

import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;

class MyMain {
public static void main(String[] args) throws IOException {
ArrayList<DictPage> listOfWords = new ArrayList<DictPage>();
LinkedList<Page> Eurethrio = new LinkedList<Page>();
File file = new File("C:\\Kennedy.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
//This will reference one line at a time...
String line = null;
int line_count=0;
int byte_count;
int total_byte_count=0;
int fromIndex;

int kat = 0;
while( (line = br.readLine())!= null ){
line_count++;
fromIndex=0;
String [] tokens = line.split(",\\s+|\\s*\\\"\\s*|\\s+|\\.\\s*|\\s*\\:\\s*");
String line_rest=line;
for (int i=1; i <= tokens.length; i++) {
byte_count = line_rest.indexOf(tokens[i-1]);
//if ( tokens[i-1].length() != 0)
//System.out.println("\n(line:" + line_count + ", word:" + i + ", start_byte:" + (total_byte_count + fromIndex) + "' word_length:" + tokens[i-1].length() + ") = " + tokens[i-1]);
fromIndex = fromIndex + byte_count + 1 + tokens[i-1].length();
if (fromIndex < line.length())
line_rest = line.substring(fromIndex);
if(!listOfWords.contains(tokens[i-1])){//Na mhn apothikevetai h idia leksh
//listOfWords.add(tokens[i-1]);
listOfWords.add(new DictPage(tokens[i-1],kat));
kat++;
}

Eurethrio.add(new Page("Kennedy",fromIndex));
}
total_byte_count += fromIndex;
Eurethrio.add(new Page("Kennedy", total_byte_count));
}

Set<DictPage> hs = new HashSet<DictPage>();
hs.addAll(listOfWords);
listOfWords.clear();
listOfWords.addAll(hs);

if (listOfWords.size() > 0) {
Collections.sort(listOfWords, new Comparator<DictPage>() {
@Override
public int compare(final DictPage object1, final DictPage object2) {
return object1.getWord().compareTo(object2.getWord());
}
} );
}
//Ektypwsh leksewn...
for (int i = 0; i<listOfWords.size();i++){
System.out.println(""+listOfWords.get(i).getWord()+" "+listOfWords.get(i).getPage());
}
for (int i = 0;i<Eurethrio.size();i++){
System.out.println(""+Eurethrio.get(i).getFile()+" "+Eurethrio.get(i).getBytes());
}
}
}

最佳答案

使用 TreeSet 而不是 ArrayList,您将获得自动排序且没有重复。

关于java - 从 arraylist<Object> 中删除重复的字符串,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36155632/

25 4 0