如何在时域计算音高基频 f( 0) )？

这是一个正确的实现。不是很健壮，但肯定有效。为了验证这一点，我们可以生成一个已知频率的信号，看看我们会得到什么结果：import numpy as npfrom scipy.io import wavfilefrom scipy.signal import correlate, fftconvolvefrom scipy.interpolate import interp1dfs = 44100frequency = 440length = 0.01 # in secondst = np.linspace(0, length, int(fs * length)) y = np.sin(frequency * 2 * np.pi * t)def parabolic(f, x):    xv = 1/2. * (f[x-1] - f[x+1]) / (f[x-1] - 2 * f[x] + f[x+1]) + x    yv = f[x] - 1/4. * (f[x-1] - f[x+1]) * (xv - x)    return (xv, yv)def freq_from_autocorr(sig, fs):    """    Estimate frequency using autocorrelation    """    corr = correlate(sig, sig, mode='full')    corr = corr[len(corr)//2:]    d = np.diff(corr)    start = np.nonzero(d > 0)[0][0]    peak = np.argmax(corr[start:]) + start    px, py = parabolic(corr, peak)    return fs / px结果运行freq_from_autocorr(y, fs)得到我们~442.014 Hz，大约 0.45% 的错误。奖金 - 我们可以改进我们可以通过稍微多一点的编码使它更精确和健壮：def indexes(y, thres=0.3, min_dist=1, thres_abs=False):    """Peak detection routine borrowed from     https://bitbucket.org/lucashnegri/peakutils/src/master/peakutils/peak.py    """    if isinstance(y, np.ndarray) and np.issubdtype(y.dtype, np.unsignedinteger):        raise ValueError("y must be signed")    if not thres_abs:        thres = thres * (np.max(y) - np.min(y)) + np.min(y)    min_dist = int(min_dist)    # compute first order difference    dy = np.diff(y)    # propagate left and right values successively to fill all plateau pixels (0-value)    zeros, = np.where(dy == 0)    # check if the signal is totally flat    if len(zeros) == len(y) - 1:        return np.array([])    if len(zeros):        # compute first order difference of zero indexes        zeros_diff = np.diff(zeros)        # check when zeros are not chained together        zeros_diff_not_one, = np.add(np.where(zeros_diff != 1), 1)        # make an array of the chained zero indexes        zero_plateaus = np.split(zeros, zeros_diff_not_one)        # fix if leftmost value in dy is zero        if zero_plateaus[0][0] == 0:            dy[zero_plateaus[0]] = dy[zero_plateaus[0][-1] + 1]            zero_plateaus.pop(0)        # fix if rightmost value of dy is zero        if len(zero_plateaus) and zero_plateaus[-1][-1] == len(dy) - 1:            dy[zero_plateaus[-1]] = dy[zero_plateaus[-1][0] - 1]            zero_plateaus.pop(-1)        # for each chain of zero indexes        for plateau in zero_plateaus:            median = np.median(plateau)            # set leftmost values to leftmost non zero values            dy[plateau[plateau < median]] = dy[plateau[0] - 1]            # set rightmost and middle values to rightmost non zero values            dy[plateau[plateau >= median]] = dy[plateau[-1] + 1]    # find the peaks by using the first order difference    peaks = np.where(        (np.hstack([dy, 0.0]) < 0.0)        & (np.hstack([0.0, dy]) > 0.0)        & (np.greater(y, thres))    )[0]    # handle multiple peaks, respecting the minimum distance    if peaks.size > 1 and min_dist > 1:        highest = peaks[np.argsort(y[peaks])][::-1]        rem = np.ones(y.size, dtype=bool)        rem[peaks] = False        for peak in highest:            if not rem[peak]:                sl = slice(max(0, peak - min_dist), peak + min_dist + 1)                rem[sl] = True                rem[peak] = False        peaks = np.arange(y.size)[~rem]    return peaksdef freq_from_autocorr_improved(signal, fs):    signal -= np.mean(signal)  # Remove DC offset    corr = fftconvolve(signal, signal[::-1], mode='full')    corr = corr[len(corr)//2:]    # Find the first peak on the left    i_peak = indexes(corr, thres=0.8, min_dist=5)[0]    i_interp = parabolic(corr, i_peak)[0]    return fs / i_interp, corr, i_interp运行freq_from_autocorr_improved(y, fs)产量~441.825 Hz，大约 0.41% 的误差。这种方法在更复杂的情况下会表现得更好，并且需要两倍的时间来计算。通过更长的采样时间（即设置length为例如 0.1 秒），我们将获得更准确的结果。

如何在时域计算音高基频 f( 0) )？

1回答