猿问

使用 go lang 获取 caf 音频文件的持续时间

我想使用 go 获取 .caf 音频文件的持续时间。我发现了一些解码器,但他们的 Duration() 方法只返回 0 并带有可能建议计算持续时间的方法的评论,有没有人知道这些评论是否合法,如果是,我如何计算持续时间?如果没有简单的解决方案,我会接受“不可能”作为答案。


func (d *Decoder) Duration() time.Duration {

    //duration := time.Duration((float64(p.Size) / float64(p.AvgBytesPerSec)) * float64(time.Second))

    //duration := time.Duration(float64(p.NumSampleFrames) / float64(p.SampleRate) * float64(time.Second))


    return 0

}

一个实现示例,尽管我很乐意使用任何易于安装的实现:https ://github.com/mattetti/audio/blob/master/caf/decoder.go


鸿蒙传说
浏览 138回答 1
1回答

桃花长相依

您链接的该文件中的文档注释直接取自 Apple 的规范。在这些文档中,您会发现这两件重要的事情:“文件中音频的持续时间是 [有效帧数] 除以文件的音频描述块中指定的采样率。”好的,很酷,但是有多少有效帧?有两种可能的了解方式:如果 CAF 有一个数据包表,它必须包括有效帧的数量。完美的。唯一允许没有数据包表的 CAF 是那些具有恒定数据包大小的 CAF:“请注意,只要格式具有每个数据包的恒定帧数,您可以通过将 mSampleRate [每秒帧数] 值除以 mFramesPerPacket 值来计算每个数据包的持续时间。”这告诉您每个数据包的持续时间,但是由于数据包的大小是恒定的,因此数据包的数量只是audioDataSize / bytesPerPacket.&nbsp;后一个值包含在音频描述中。前者通常直接嵌入到文件中,但允许将-1音频数据作为最后一个块,在这种情况下,它的大小为totalFileSize - startOfAudioData它像这样分解:如果有数据包表块,请使用它和音频描述:seconds = validFrames / sampleRate否则,数据包必须具有恒定大小:framesPerByte = framesPerPacket / bytesPerPacketseconds = framesPerByte * audioDataSize您拥有的库读取音频描述块,但我认为它不会读取数据包表。另外,如果块为-1,我不确定它会计算音频数据大小。也许两者兼有,在这种情况下,您可以使用上面的信息。如果没有,您可以自己解析文件,特别是如果您只关心持续时间。该文件以一个短标题开始,然后被分成“块”(又名 TLV)。这是一个示例实现,您可以将其用作起点或修改您链接的库:func readCAF() {&nbsp;&nbsp; &nbsp; buf := []byte{&nbsp; &nbsp; &nbsp; &nbsp; // file header&nbsp; &nbsp; &nbsp; &nbsp; 'c', 'a', 'f', 'f', // file type&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x1, 0x0, 0x0, // file version, flags&nbsp; &nbsp; &nbsp; &nbsp; // audio description&nbsp; &nbsp; &nbsp; &nbsp; 'd', 'e', 's', 'c', // chunk type&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x0,&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x20, // CAFAudioFormat size&nbsp; &nbsp; &nbsp; &nbsp; 0x40, 0xe5, 0x88, 0x80,&nbsp; &nbsp; &nbsp; &nbsp; 0x00, 0x00, 0x00, 0x00, // sample rate&nbsp; &nbsp; &nbsp; &nbsp; 'l', 'p', 'c', 'm', // fmt id&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x0, // fmt flags&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x1, // bytes per packet&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x1, // frames per packet&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x2, // channels per frame&nbsp; &nbsp; &nbsp; &nbsp; 0x0, 0x0, 0x0, 0x3, // bits per channel&nbsp; &nbsp; &nbsp; &nbsp; // audio data&nbsp; &nbsp; &nbsp; &nbsp; 'd', 'a', 't', 'a', // chunk type&nbsp; &nbsp; &nbsp; &nbsp; 0xff, 0xff, 0xff, 0xff,&nbsp; &nbsp; &nbsp; &nbsp; 0xff, 0xff, 0xff, 0xff, // size of data section (-1 = til EOF)&nbsp; &nbsp; &nbsp; &nbsp; // actual audio packets (in theory, anyway)&nbsp; &nbsp; &nbsp; &nbsp; 0x0,&nbsp; &nbsp; &nbsp; &nbsp; 0x0,&nbsp; &nbsp; &nbsp; &nbsp; 0x0,&nbsp; &nbsp; &nbsp; &nbsp; 0x0,&nbsp; &nbsp; &nbsp; &nbsp; 0x0,&nbsp; &nbsp; &nbsp; &nbsp; 0x0,&nbsp; &nbsp; }&nbsp; &nbsp; fileSize := len(buf)&nbsp; &nbsp; br := bufio.NewReader(bytes.NewBuffer(buf))&nbsp; &nbsp; type cafHdr struct {&nbsp; &nbsp; &nbsp; &nbsp; Typ&nbsp; &nbsp; &nbsp;[4]byte&nbsp; &nbsp; &nbsp; &nbsp; Version uint16&nbsp; &nbsp; &nbsp; &nbsp; _&nbsp; &nbsp; &nbsp; &nbsp;uint16&nbsp; &nbsp; }&nbsp; &nbsp; type chunkHdr struct {&nbsp; &nbsp; &nbsp; &nbsp; Typ [4]byte&nbsp; &nbsp; &nbsp; &nbsp; Sz&nbsp; int64&nbsp; &nbsp; }&nbsp; &nbsp; type audioDescription struct {&nbsp; &nbsp; &nbsp; &nbsp; FramesPerSec&nbsp; &nbsp; &nbsp;float64&nbsp; &nbsp; &nbsp; &nbsp; FmtId&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; uint32&nbsp; &nbsp; &nbsp; &nbsp; FmtFlags&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;uint32&nbsp; &nbsp; &nbsp; &nbsp; BytesPerPacket&nbsp; &nbsp;uint32&nbsp; &nbsp; &nbsp; &nbsp; FramesPerPacket&nbsp; uint32&nbsp; &nbsp; &nbsp; &nbsp; ChannelsPerFrame uint32&nbsp; &nbsp; &nbsp; &nbsp; BitsPerChannel&nbsp; &nbsp;uint32&nbsp; &nbsp; }&nbsp; &nbsp; type packetTable struct {&nbsp; &nbsp; &nbsp; &nbsp; NPackets, NValidFrames, NPrimingFr, NRemainingFr int64&nbsp; &nbsp; }&nbsp; &nbsp; const FileHeaderSz = 8&nbsp; &nbsp; const ChunkHeaderSz = 12&nbsp; &nbsp; const AudioDescSz = 32&nbsp; &nbsp; const PacketHdrSz = 24&nbsp; &nbsp; fileHdr := cafHdr{}&nbsp; &nbsp; if err := binary.Read(br, binary.BigEndian, &fileHdr); err != nil {&nbsp; &nbsp; &nbsp; &nbsp; panic(err)&nbsp; &nbsp; }&nbsp; &nbsp; if fileHdr.Typ != [4]byte{'c', 'a', 'f', 'f'} || fileHdr.Version != 1 {&nbsp; &nbsp; &nbsp; &nbsp; panic("unknown file format")&nbsp; &nbsp; }&nbsp; &nbsp; remaining := int64(fileSize) - FileHeaderSz&nbsp; &nbsp; audioDesc := audioDescription{}&nbsp; &nbsp; packetTab := packetTable{}&nbsp; &nbsp; var audioDataSz int64readChunks:&nbsp; &nbsp; for {&nbsp; &nbsp; &nbsp; &nbsp; hdr := chunkHdr{}&nbsp; &nbsp; &nbsp; &nbsp; if err := binary.Read(br, binary.BigEndian, &hdr); err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; panic(err)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; remaining -= ChunkHeaderSz&nbsp; &nbsp; &nbsp; &nbsp; switch hdr.Typ {&nbsp; &nbsp; &nbsp; &nbsp; case [4]byte{'d', 'e', 's', 'c'}: // audio description&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if err := binary.Read(br, binary.BigEndian, &audioDesc); err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; panic(err)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; hdr.Sz -= AudioDescSz&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; remaining -= AudioDescSz&nbsp; &nbsp; &nbsp; &nbsp; case [4]byte{'p', 'a', 'k', 't'}: // packet table&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if err := binary.Read(br, binary.BigEndian, &packetTab); err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; panic(err)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; hdr.Sz -= PacketHdrSz&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; remaining -= PacketHdrSz&nbsp; &nbsp; &nbsp; &nbsp; case [4]byte{'d', 'a', 't', 'a'}: // audio data&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if hdr.Sz > 0 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; audioDataSz = hdr.Sz&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; } else if hdr.Sz == -1 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; // if needed, read to EOF to determine byte size&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; audioDataSz = remaining&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; break readChunks&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; if hdr.Sz < 0 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; panic("invalid header size")&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; remaining -= hdr.Sz&nbsp; &nbsp; &nbsp; &nbsp; // Skip to the next chunk. On 32 bit machines, Sz can overflow,&nbsp; &nbsp; &nbsp; &nbsp; // so you should check for that (or use Seek if you're reading a file).&nbsp; &nbsp; &nbsp; &nbsp; if n, err := br.Discard(int(hdr.Sz)); err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if err == io.EOF && int64(n) == hdr.Sz {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; break&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; panic(err)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; var seconds float64&nbsp; &nbsp; // If the data included a packet table, the frames determines duration.&nbsp; &nbsp; if packetTab.NValidFrames > 0 {&nbsp; &nbsp; &nbsp; &nbsp; seconds = float64(packetTab.NValidFrames) / audioDesc.FramesPerSec&nbsp; &nbsp; } else {&nbsp; &nbsp; &nbsp; &nbsp; // If there no packet table, it must have a constant packet size.&nbsp; &nbsp; &nbsp; &nbsp; if audioDesc.BytesPerPacket == 0 || audioDesc.FramesPerPacket == 0 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; panic("bad data")&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; framesPerByte := float64(audioDesc.FramesPerPacket) / float64(audioDesc.BytesPerPacket)&nbsp; &nbsp; &nbsp; &nbsp; seconds = framesPerByte * float64(audioDataSz)&nbsp; &nbsp; }&nbsp; &nbsp; fmt.Printf("seconds: %f\n", seconds)}
随时随地看视频慕课网APP

相关分类

Go
我要回答