我想基于遵循 Zipf 分布的单词(来自字典)创建数据源(用 Java 编写)。所以我来到了Apache commons 库的ZipfDistribution和NormalDistribution 。不幸的是,有关如何使用这些类的信息很少。我尝试做一些测试,但我不确定我是否以正确的方式使用它。我仅遵循每个构造函数的文档中所写的内容。但结果似乎并不“分布均匀”。
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.ZipfDistribution;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
public class ZipfDistributionDataSource extends RichSourceFunction<String> {
private static final String DISTINCT_WORDS_URL = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt";
public static void main(String[] args) throws Exception {
ZipfDistributionDataSource zipfDistributionDataSource = new ZipfDistributionDataSource();
StringBuffer stringBuffer = new StringBuffer(zipfDistributionDataSource.readDataFromResource());
String[] words = stringBuffer.toString().split("\n");
System.out.println("size: " + words.length);
System.out.println("Normal Distribution");
NormalDistribution normalDistribution = new NormalDistribution(words.length / 2, 1);
for (int i = 0; i < 10; i++) {
int sample = (int) normalDistribution.sample();
System.out.print("sample[" + sample + "]: ");
System.out.println(words[sample]);
}
System.out.println();
System.out.println("Zipf Distribution");
ZipfDistribution zipfDistribution = new ZipfDistribution(words.length - 1, 1);
for (int i = 0; i < 10; i++) {
int sample = zipfDistribution.sample();
System.out.print("sample[" + sample + "]: ");
System.out.println(words[sample]);
}
}
青春有我
相关分类