Java 8 使用 Streams 算法搜索 ArrayList 失败

我们正在使用 Stream 来搜索字符串的 ArrayList 字典文件已排序并包含 307107 个小写单词
我们正在使用 findFirst 从 TextArea 中的文本中查找匹配项
只要单词拼写错误超出 3 个字符 搜索有有利的结果
如果拼写错误的单词是这样的“Charriage”,则结果与匹配完全不相近
明显的目标是在不需要查看大量单词的情况下获得尽可能接近正确的结果

这是我们正在测试的文本
Tak acheive it hommaker 和 aparent as Chariage NOT ME Charriag 添加缺失的元音到 Cjarroage

我们对流搜索过滤器进行了一些重大更改,并进行了合理的改进
我们将编辑发布的代码,以仅包含搜索失败的代码部分 在
对流过滤器进行的代码更改之后
在代码更改之前,如果searchString 在位置 1 处有一个拼写错误的字符 在字典中找不到结果 新的搜索过滤器修复了这个问题
我们还通过增加endsWith 的字符数量添加了更多搜索信息
所以仍然失败!如果 searchString(拼写错误的单词)在单词末尾缺少一个字符,并且该单词在位置 1 到 4 之间有一个不正确的字符,则搜索失败
我们正在努力添加和删除字符,但我们不确定这是否可行解决方案

如果您想要我们将在 GitHub 上发布的完整项目,请在评论中询问,我们将不胜感激。

问题仍然是当拼写错误的单词中缺少多个字符时如何修复此搜索过滤器?

经过几个小时的免费 txt 词典搜索后,这是最好的
A 侧栏事实之一,它有 115726 个长度 > 5 的单词,并且单词末尾有一个元音。这意味着它有 252234 个末尾没有元音的单词
这是否意味着我们有 32% 的机会通过在 searchString 的末尾添加元音来解决问题?不是一个问题,只是一个奇怪的事实!

这里是字典下载的链接,并将words_alpha.txt文件放在C盘上的C:/A_WORDS/words_alpha.txt"); words_alpha.txt


一只甜甜圈
浏览 48回答 3
3回答

qq_遁去的一_1

我正在添加 JavaFX 答案。这个应用程序使用Levenshtein Distance.&nbsp;您必须单击Check Spelling才能开始。您可以从列表中选择一个单词来替换当前正在检查的单词。我注意到Levenshtein Distance返回了很多单词,因此您可能需要找到其他方法来进一步减少列表。主要的import java.util.ArrayList;import java.util.Arrays;import java.util.List;import javafx.application.Application;import javafx.collections.FXCollections;import javafx.collections.ObservableList;import javafx.scene.Scene;import javafx.scene.control.Button;import javafx.scene.control.ListView;import javafx.scene.control.TextArea;import javafx.scene.control.TextField;import javafx.scene.layout.VBox;import javafx.stage.Stage;public class App extends Application{&nbsp; &nbsp; public static void main(String[] args)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; launch(args);&nbsp; &nbsp; }&nbsp; &nbsp; TextArea taWords = new TextArea("Tak Carrage thiss on hoemaker answe");&nbsp; &nbsp; TextField tfCurrentWordBeingChecked = new TextField();&nbsp; &nbsp; //TextField tfMisspelledWord = new TextField();&nbsp; &nbsp; ListView<String> lvReplacementWords = new ListView();&nbsp; &nbsp; TextField tfReplacementWord = new TextField();&nbsp; &nbsp; Button btnCheckSpelling = new Button("Check Spelling");&nbsp; &nbsp; Button btnReplaceWord = new Button("Replace Word");&nbsp; &nbsp; List<String> wordList = new ArrayList();&nbsp; &nbsp; List<String> returnList = new ArrayList();&nbsp; &nbsp; HandleLevenshteinDistance handleLevenshteinDistance = new HandleLevenshteinDistance();&nbsp; &nbsp; ObservableList<String> listViewData = FXCollections.observableArrayList();&nbsp; &nbsp; @Override&nbsp; &nbsp; public void start(Stage primaryStage)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; setupListView();&nbsp; &nbsp; &nbsp; &nbsp; handleBtnCheckSpelling();&nbsp; &nbsp; &nbsp; &nbsp; handleBtnReplaceWord();&nbsp; &nbsp; &nbsp; &nbsp; VBox root = new VBox(taWords, tfCurrentWordBeingChecked, lvReplacementWords, tfReplacementWord, btnCheckSpelling, btnReplaceWord);&nbsp; &nbsp; &nbsp; &nbsp; root.setSpacing(5);&nbsp; &nbsp; &nbsp; &nbsp; Scene scene = new Scene(root);&nbsp; &nbsp; &nbsp; &nbsp; primaryStage.setScene(scene);&nbsp; &nbsp; &nbsp; &nbsp; primaryStage.show();&nbsp; &nbsp; }&nbsp; &nbsp; public void handleBtnCheckSpelling()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; btnCheckSpelling.setOnAction(actionEvent -> {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if (btnCheckSpelling.getText().equals("Check Spelling")) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; wordList = new ArrayList(Arrays.asList(taWords.getText().split(" ")));&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; returnList = new ArrayList(Arrays.asList(taWords.getText().split(" ")));&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; loadWord();&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; btnCheckSpelling.setText("Check Next Word");&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; else if (btnCheckSpelling.getText().equals("Check Next Word")) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; loadWord();&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; }&nbsp; &nbsp; public void handleBtnReplaceWord()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; btnReplaceWord.setOnAction(actionEvent -> {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; int indexOfWordToReplace = returnList.indexOf(tfCurrentWordBeingChecked.getText());&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; returnList.set(indexOfWordToReplace, tfReplacementWord.getText());&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; taWords.setText(String.join(" ", returnList));&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; btnCheckSpelling.fire();&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; }&nbsp; &nbsp; public void setupListView()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; lvReplacementWords.setItems(listViewData);&nbsp; &nbsp; &nbsp; &nbsp; lvReplacementWords.getSelectionModel().selectedItemProperty().addListener((obs, oldSelection, newSelection) -> {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; tfReplacementWord.setText(newSelection);&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; }&nbsp; &nbsp; private void loadWord()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; if (wordList.size() > 0) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; tfCurrentWordBeingChecked.setText(wordList.get(0));&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; wordList.remove(0);&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; showPotentialCorrectSpellings();&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; private void showPotentialCorrectSpellings()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; List<String> potentialCorrentSpellings = handleLevenshteinDistance.getPotentialCorretSpellings(tfCurrentWordBeingChecked.getText().trim());&nbsp; &nbsp; &nbsp; &nbsp; listViewData.setAll(potentialCorrentSpellings);&nbsp; &nbsp; }}自定义Word类/**&nbsp;*&nbsp;* @author blj0011&nbsp;*/public class CustomWord{&nbsp; &nbsp; private int distance;&nbsp; &nbsp; private String word;&nbsp; &nbsp; public CustomWord(int distance, String word)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; this.distance = distance;&nbsp; &nbsp; &nbsp; &nbsp; this.word = word;&nbsp; &nbsp; }&nbsp; &nbsp; public String getWord()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; return word;&nbsp; &nbsp; }&nbsp; &nbsp; public void setWord(String word)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; this.word = word;&nbsp; &nbsp; }&nbsp; &nbsp; public int getDistance()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; return distance;&nbsp; &nbsp; }&nbsp; &nbsp; public void setDistance(int distance)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; this.distance = distance;&nbsp; &nbsp; }&nbsp; &nbsp; @Override&nbsp; &nbsp; public String toString()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; return "CustomWord{" + "distance=" + distance + ", word=" + word + '}';&nbsp; &nbsp; }}HandleLevenshteinDistance 类/**&nbsp;*&nbsp;* @author blj0011&nbsp;*/public class HandleLevenshteinDistance{&nbsp; &nbsp; private List<String> dictionary = new ArrayList<>();&nbsp; &nbsp; public HandleLevenshteinDistance()&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; try {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; //Load DictionaryFrom file&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; //See if the dictionary file exists. If it don't download it from Github.&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; File file = new File("alpha.txt");&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if (!file.exists()) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; FileUtils.copyURLToFile(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; new URL("https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; new File("alpha.txt"),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 5000,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 5000);&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; //Load file content to a List of Strings&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; dictionary = FileUtils.readLines(file, Charset.forName("UTF8"));&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; catch (IOException ex) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ex.printStackTrace();&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; public List<String> getPotentialCorretSpellings(String misspelledWord)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; LevenshteinDistance levenshteinDistance = new LevenshteinDistance();&nbsp; &nbsp; &nbsp; &nbsp; List<CustomWord> customWords = new ArrayList();&nbsp; &nbsp; &nbsp; &nbsp; dictionary.stream().forEach((wordInDictionary) -> {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; int distance = levenshteinDistance.apply(misspelledWord, wordInDictionary);&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if (distance <= 2) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; customWords.add(new CustomWord(distance, wordInDictionary));&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; &nbsp; &nbsp; Collections.sort(customWords, (CustomWord o1, CustomWord o2) -> o1.getDistance() - o2.getDistance());&nbsp; &nbsp; &nbsp; &nbsp; List<String> returnList = new ArrayList();&nbsp; &nbsp; &nbsp; &nbsp; customWords.forEach((item) -> {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; System.out.println(item.getDistance() + " - " + item.getWord());&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; returnList.add(item.getWord());&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; &nbsp; &nbsp; return returnList;&nbsp; &nbsp; }}

梦里花落0921

您只需要进一步了解词典即可我们确定您从词典中得到了很多建议的单词?我们测试了您的代码,有时它发现了 3000 个或更多可能的匹配项哇,所以这是一个很大的改进。它仍然需要大量的测试,我们使用这条线进行测试,获得了 100% 良好的结果。Tske Charriage 到 hommaker 以及 hommake 作为 hommaer我们担心的是,如果拼写者真的把这个词弄乱了,这项改进可能会解决一定程度的拼写错误。我们确信您知道,如果第一个字母是错误的,这将不起作用,就像仇外心理对仇外心理一样这是重大改进&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;cs.stream().filter(s&nbsp;->&nbsp;s.startsWith(strSF) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;||&nbsp;s.startsWith(nF,&nbsp;0) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&&&nbsp;s.length()&nbsp;>&nbsp;1&nbsp;&&&nbsp;s.length()&nbsp;<=&nbsp;W+3&nbsp;//&nbsp;<==&nbsp;HERE &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&&&nbsp;s.endsWith(nE) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&&&nbsp;s.startsWith(nF) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&&&nbsp;s.contains(nM))&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;.forEach(list&nbsp;::&nbsp;add);您可以将支票发送到我的地址 55 48 196 195

桃花长相依

我认为你应该使用类似于Levenshtein Distanceor的东西Jaro Winkler Distance。如果你可以使用Apache's Commons. 我建议使用Apache Commons Lang. 它有一个实现Levenshtein Distance。该示例演示了此实现。如果将距离设置为(distance <= 2),您可能会获得更多结果。import java.io.File;import java.io.IOException;import java.net.URL;import java.nio.charset.Charset;import java.util.List;import java.util.logging.Level;import java.util.logging.Logger;import org.apache.commons.io.FileUtils;import org.apache.commons.lang3.StringUtils;/** * * @author blj0011 */public class Main{    public static void main(String[] args)    {        try {            System.out.println("Hello World!");            File file = new File("alpha.txt");            if (!file.exists()) {                FileUtils.copyURLToFile(                        new URL("https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"),                        new File("alpha.txt"),                        5000,                        5000);            }            List<String> lines = FileUtils.readLines(file, Charset.forName("UTF8"));            //lines.forEach(System.out::println);            lines.stream().forEach(line -> {                int distance = StringUtils.getLevenshteinDistance(line, "zorilta");                //System.out.println(line + ": " + distance);                if (distance <= 1) {                    System.out.println("Did you mean: " + line);                }            });        }        catch (IOException ex) {            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);        }    }}输出距离<=1Building JavaTestingGround 1.0--------------------------------------------------------------------------- exec-maven-plugin:1.5.0:exec (default-cli) @ JavaTestingGround ---Hello World!Did you mean: zorilla------------------------------------------------------------------------BUILD SUCCESS------------------------------------------------------------------------Total time: 1.329 sFinished at: 2019-11-01T11:02:48-05:00Final Memory: 7M/30M距离 <= 2Hello World!Did you mean: coritaDid you mean: gorillaDid you mean: zorilDid you mean: zorillaDid you mean: zorillasDid you mean: zorilleDid you mean: zorilloDid you mean: zorils------------------------------------------------------------------------BUILD SUCCESS------------------------------------------------------------------------Total time: 1.501 sFinished at: 2019-11-01T14:03:33-05:00Final Memory: 7M/34M
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Java