使用 go lang 获取 caf 音频文件的持续时间

您链接的该文件中的文档注释直接取自 Apple 的规范。在这些文档中，您会发现这两件重要的事情：“文件中音频的持续时间是 [有效帧数] 除以文件的音频描述块中指定的采样率。”好的，很酷，但是有多少有效帧？有两种可能的了解方式：如果 CAF 有一个数据包表，它必须包括有效帧的数量。完美的。唯一允许没有数据包表的 CAF 是那些具有恒定数据包大小的 CAF：“请注意，只要格式具有每个数据包的恒定帧数，您可以通过将 mSampleRate [每秒帧数] 值除以 mFramesPerPacket 值来计算每个数据包的持续时间。”这告诉您每个数据包的持续时间，但是由于数据包的大小是恒定的，因此数据包的数量只是audioDataSize / bytesPerPacket. 后一个值包含在音频描述中。前者通常直接嵌入到文件中，但允许将-1音频数据作为最后一个块，在这种情况下，它的大小为totalFileSize - startOfAudioData它像这样分解：如果有数据包表块，请使用它和音频描述：seconds = validFrames / sampleRate否则，数据包必须具有恒定大小：framesPerByte = framesPerPacket / bytesPerPacketseconds = framesPerByte * audioDataSize您拥有的库读取音频描述块，但我认为它不会读取数据包表。另外，如果块为-1，我不确定它会计算音频数据大小。也许两者兼有，在这种情况下，您可以使用上面的信息。如果没有，您可以自己解析文件，特别是如果您只关心持续时间。该文件以一个短标题开始，然后被分成“块”（又名 TLV）。这是一个示例实现，您可以将其用作起点或修改您链接的库：func readCAF() {     buf := []byte{        // file header        'c', 'a', 'f', 'f', // file type        0x0, 0x1, 0x0, 0x0, // file version, flags        // audio description        'd', 'e', 's', 'c', // chunk type        0x0, 0x0, 0x0, 0x0,        0x0, 0x0, 0x0, 0x20, // CAFAudioFormat size        0x40, 0xe5, 0x88, 0x80,        0x00, 0x00, 0x00, 0x00, // sample rate        'l', 'p', 'c', 'm', // fmt id        0x0, 0x0, 0x0, 0x0, // fmt flags        0x0, 0x0, 0x0, 0x1, // bytes per packet        0x0, 0x0, 0x0, 0x1, // frames per packet        0x0, 0x0, 0x0, 0x2, // channels per frame        0x0, 0x0, 0x0, 0x3, // bits per channel        // audio data        'd', 'a', 't', 'a', // chunk type        0xff, 0xff, 0xff, 0xff,        0xff, 0xff, 0xff, 0xff, // size of data section (-1 = til EOF)        // actual audio packets (in theory, anyway)        0x0,        0x0,        0x0,        0x0,        0x0,        0x0,    }    fileSize := len(buf)    br := bufio.NewReader(bytes.NewBuffer(buf))    type cafHdr struct {        Typ     [4]byte        Version uint16        _       uint16    }    type chunkHdr struct {        Typ [4]byte        Sz  int64    }    type audioDescription struct {        FramesPerSec     float64        FmtId            uint32        FmtFlags         uint32        BytesPerPacket   uint32        FramesPerPacket  uint32        ChannelsPerFrame uint32        BitsPerChannel   uint32    }    type packetTable struct {        NPackets, NValidFrames, NPrimingFr, NRemainingFr int64    }    const FileHeaderSz = 8    const ChunkHeaderSz = 12    const AudioDescSz = 32    const PacketHdrSz = 24    fileHdr := cafHdr{}    if err := binary.Read(br, binary.BigEndian, &fileHdr); err != nil {        panic(err)    }    if fileHdr.Typ != [4]byte{'c', 'a', 'f', 'f'} || fileHdr.Version != 1 {        panic("unknown file format")    }    remaining := int64(fileSize) - FileHeaderSz    audioDesc := audioDescription{}    packetTab := packetTable{}    var audioDataSz int64readChunks:    for {        hdr := chunkHdr{}        if err := binary.Read(br, binary.BigEndian, &hdr); err != nil {            panic(err)        }        remaining -= ChunkHeaderSz        switch hdr.Typ {        case [4]byte{'d', 'e', 's', 'c'}: // audio description            if err := binary.Read(br, binary.BigEndian, &audioDesc); err != nil {                panic(err)            }            hdr.Sz -= AudioDescSz            remaining -= AudioDescSz        case [4]byte{'p', 'a', 'k', 't'}: // packet table            if err := binary.Read(br, binary.BigEndian, &packetTab); err != nil {                panic(err)            }            hdr.Sz -= PacketHdrSz            remaining -= PacketHdrSz        case [4]byte{'d', 'a', 't', 'a'}: // audio data            if hdr.Sz > 0 {                audioDataSz = hdr.Sz            } else if hdr.Sz == -1 {                // if needed, read to EOF to determine byte size                audioDataSz = remaining                break readChunks            }        }        if hdr.Sz < 0 {            panic("invalid header size")        }        remaining -= hdr.Sz        // Skip to the next chunk. On 32 bit machines, Sz can overflow,        // so you should check for that (or use Seek if you're reading a file).        if n, err := br.Discard(int(hdr.Sz)); err != nil {            if err == io.EOF && int64(n) == hdr.Sz {                break            }            panic(err)        }    }    var seconds float64    // If the data included a packet table, the frames determines duration.    if packetTab.NValidFrames > 0 {        seconds = float64(packetTab.NValidFrames) / audioDesc.FramesPerSec    } else {        // If there no packet table, it must have a constant packet size.        if audioDesc.BytesPerPacket == 0 || audioDesc.FramesPerPacket == 0 {            panic("bad data")        }        framesPerByte := float64(audioDesc.FramesPerPacket) / float64(audioDesc.BytesPerPacket)        seconds = framesPerByte * float64(audioDataSz)    }    fmt.Printf("seconds: %f\n", seconds)}

使用 go lang 获取 caf 音频文件的持续时间

1回答