"moov atom not found": 不完全で再生できないMP4をPythonで復元する

画面にソースコードが表示されているノートPCのイラスト

MP4ファイルを保存中に、「なんだこれ。時間かかるな。」と思ってcontrol + Cで中断してしまいました。動画の記録自体は終わっていたので、m2tsファイルみたいにぶった切っても平気だと思っていたのです。完全に愚かでした。

ffmpegに食わせてみると“moov atom not found”と表示されます。調べてみると、MP4ファイルは動画と音声が細切れにごちゃ混ぜに保存されており、それを読み解くテーブルが別に記録されているそうです。私はそのテーブル作成中にアボートしてしまったので、動画自体は残っていますが、どこが動画でどこが音声かわからない代物が出来上がってしまったわけです。

検索の結果、似たような事象に対してPythonスクリプトで解読して復元に成功した例がありました。

qiita.com

とはいえ、そのようなデータを扱うにはフォーマットについて勉強が必要なので、しばらく考えないことにしました。最近、ようやく重い腰を上げて取り組んだところ、バイナリデータに不慣れなため時間はかかりましたが成功したので、後続の方のためにコードを公開します。

AACに絶望

上の例(finsta360.py)では、H.264はNAL Unit、AACはADTSフォーマットで格納されていました。つまり、それぞれのヘッダを見つけることでパースできます。しかしながら、私の場合はH.264がNAL Unitというのは同じでしたが、AACがADTSヘッダのついていない生データ(RDB: Raw Data Block)でした（しかもH.264の間は1ブロックとは限らない）。RDBには以下のようにヘッダらしきものはついていますが、AACはブロックが可変長のため、ブロック境界の判別は簡単な条件分岐では行えません。

[ID Syn Ele (3 bit)][Ele Instance Tag (4)][Bitstream][ID TERM (3)]

RDBの構成（参考）

例えば、ステレオの場合は0x20 or 0x21で始まりますが、その後はいろんなパターンがあり、高々8 bitの目印ではビットストリーム中に容易に現れるため、単純にはいきません。

チャンクの判別

いったんH.264チャンク、AACチャンクの判別について整理します。（説明の順番が変ですね。わかりにくいかも。すみません。）

finsta360.pyでは、ADTSヘッダのあるなしでH.264、AACどちらかを判別しています。私の動画の場合、ADTSヘッダは判別に使えませんが、幸運にもH.264チャンクに特徴的なパターンがありました。

H.264データは以下のようなNAL Unitに格納されています。

[Bitstreamのサイズ(4 Byte)][Bitstream]

このUnitの塊（チャンク）は必ず[0x00000002][0x09F0]で始まっていました！　というわけで以下の戦略で判別を行います。

6バイト読み込み、0x209F0だったらH.264チャンク、そうでなかったらAACチャンク
H.264チャンクの場合、次のNAL Unitまでどんどん飛ぶ。飛んだ先が0x20 or 0x21だったらAACチャンクなので打ち切り、AACチャンクへ。
AACチャンクの場合、0x209F0まで探す。打ち切り、H.264チャンクへ。

AACチャンクは1チャンクに1サンプル（＝ブロック）とは限りませんが、その判別は後で考えることにします。（finsta360.pyをもとに）コードにしたものが以下です。

recover_sample_tables_from_mdat_fast()改

def recover_sample_tables_from_mdat_fast(filename, verbose=False):
    mov_table = []
    aac_table = []

    with open(filename, 'rb') as f_in:

        # look for 'mdat'
        src_cur = 0
        while True:
            f_in.seek(src_cur)
            if f_in.tell() != src_cur: raise ValueError(f'seek failed? {f_in.tell()} != {src_cur}')

            n, atom_type = read_atom_head(f_in)
            if atom_type == 'mdat': break
            src_cur += n

        # 'mdat' is found
        mdat_start = src_cur
        if n == 0:
            # mdat from impcomplete mp4 file
            f_in.seek(0, 2)
            mdat_end = f_in.tell()
            # seek the data_start position
            # 8 bytes for the header PLUS 8 bytes for the reserved space of the size
            # f_in.seek(src_cur + 16)
            f_in.seek(src_cur)
            buf = f_in.read(4)
            head = struct.unpack('>I', buf)[0]
            if head == 1:
                f_in.seek(src_cur + 16)
            else:
                f_in.seek(src_cur + 8)
        else:
            mdat_end   = src_cur + n

        n = 0
        while True:
            cur = f_in.tell()
            if cur >= mdat_end: break

            # buf = f_in.read(4)
            buf = f_in.read(6)

            # if buf[0] != 0xFF or buf[1] != 0xF1 or buf[2] != 0x4C or (buf[3] & 0b11111100) != 0x80:
            if struct.unpack('>I', buf[:4])[0] == 2 and buf[4] == 0x09 and buf[5] == 0xF0:
                # h264 chunk
                # frame_length = struct.unpack('>I', buf)[0] + 4
                # if cur+frame_length >= mdat_end: break
                frame_length = 6
                while True:
                    buf = f_in.read(4)
                    if (buf[0] & 0b11111110) == 0x20: break
                    frame_length += struct.unpack('>I', buf)[0] + 4
                    if cur+frame_length >= mdat_end: break
                    f_in.seek(cur + frame_length)

                if verbose: print(f'{n}: [mov] {cur}, {frame_length}')
                mov_table.append((cur, frame_length))
                f_in.seek(cur+frame_length)
            else:
                # buf_2 = f_in.read(2)

                # from https://wiki.multimedia.cx/index.php/ADTS
                # AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
                # 0th-byte 1st      2nd      3rd      4th      5th      6th      (7th      8th     )
                # 0xFF     0xF1     0x4C     0X80 -- typical case for Insta360 ONE-X
                # M 13 frame length, this value must include 7 or 9 bytes of header length
                #   FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
                # frame_length = ((buf[3] & 0b11) << 11) | (buf_2[0] << 3) | (buf_2[1] >> 5)
                # if cur+frame_length >= mdat_end: break

                frame_length = 6
                while True:
                    buf = f_in.read(6)
                    # if is_aac_header(buf, frame_length): break
                    if struct.unpack('>I', buf[:4])[0] == 2 and buf[4] == 0x09 and buf[5] == 0xF0: break
                    frame_length += 1
                    if cur + frame_length >= mdat_end: break
                    f_in.seek(cur + frame_length)

                # if verbose: print(f'{n}: [aac] {cur}, {frame_length}')
                aac_table.append((cur, frame_length))
                f_in.seek(cur+frame_length)

            n += 1

    return mov_table, aac_table

サンプルとは、MP4のstscに記録されるデータ単位のことです。既存の動画を解析したところ、H.264はチャンクにNAL Unitが複数あっても1サンプルみたいですが、AACはブロックそれ自体が1サンプルのようです。（MP4の構造について大変有用なページが以下です。）

github.com

また、BoxについてはAppleのAtomのページも役に立ちます。

https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-SW1

AACブロック境界の判別

AACも複数のブロックを1サンプルとしても（つまりstscを(1, 1, 1)とする）再生できたりするのでは？と思いました。しかしながら、sample_durationが整数のため、stscの変更に辻褄を合わせた数字（小数になりました）にしても音ズレしてしまうでしょう。

なんとか判別できないかと調べていたら、壊れたAACファイルを修復する製品のページに詳しい資料がありました。AACブロックには6種類のパターンがあるようです。

aeroquartet.com

それをもとに以下のコードを書いてみました。

is_aac_header()

def is_aac_header(buf, frame_length):
    cpe = (buf[0] & 0b11100000) >> 5
    element_instance_tag = (buf[0] & 0b00011110) >> 1
    common_window = (buf[0] & 0b00000001)

    if cpe != 0b001 or element_instance_tag != 0b0000:
        # not stereo

        # print('not stereo')
        return False
    elif common_window == 1:
        window_sequence = (buf[1] & 0b01100000) >> 5

        if window_sequence != 0b10:
            # common case
            always_0 = (buf[1] & 0b10000000) >> 7
            maxsfb = ((buf[1] & 0b00001111) << 2) | ((buf[2] & 0b11000000) >> 6)
            predictor = (buf[2] & 0b00100000) >> 5
            gain = ((buf[2] & 0b00000111) << 5) | ((buf[3] & 0b11111000) >> 3)
            inc = (buf[4] & 0b01111100) >> 2

            if always_0 == 0 and 40 <= maxsfb and maxsfb <= 51 and predictor == 0 and 100 <= gain and gain <= 228 and inc != 0:
                print('common case')
                return True
            else:
                # print('not common case')
                return False
        else:
            # eight-short-sequence
            always_10 = (buf[1] & 0b01100000) >> 5
            maxsfb = (buf[1] & 0b00001111)
            gain = ((buf[3] & 0b01111111) << 1) | ((buf[4] & 0b10000000) >> 7)

            if always_10 == 0b10 and maxsfb >= 8 and 100 <= gain and gain <= 200:
                print('eight-short-sequence')
                return True
            else:
                # print('not eight-short-sequence')
                return False
    else:
        pulse_data = (buf[3] & 0b00001000) >> 3
        tns_data = (buf[3] & 0b00000100) >> 2
        gain_control_data = (buf[3] & 0b00000010) >> 1

        if pulse_data != 0 or tns_data != 0 or gain_control_data != 0:
            maxsfb = ((buf[2] & 0b00001111) << 2) | ((buf[3] & 0b11000000) >> 6)

            if maxsfb != 0:
                gain = buf[1]
                always_0 = (buf[2] & 0b10000000) >> 7

                if 100 <= gain and gain <= 200 and always_0 == 0:
                    window_sequence = (buf[2] & 0b01100000) >> 5

                    if window_sequence != 0b10:
                        # multiple windows

                        if 48 <= maxsfb and maxsfb <= 51:
                            print('multiple windows')
                            return True
                        else:
                            # print('not multiple windows')
                            return False
                    else:
                        # multiple windows and eight-short-sequence
                        maxsfb = (buf[2] & 0b00001111)

                        if maxsfb >= 8:
                            print('multiple windows and eight-short-sequence')
                            return True
                        else:
                            # print('not multiple windows and eight-short-sequence')
                            return False
                else:
                    # print('not multiple windows type')
                    return False
            else:
                # degenerated block

                if frame_length == 7:
                    print('degenerated block')
                    return True
                else:
                    # print('not degenerated block')
                    return False
        else:
            # degenerated block with padding
            # gain = ((buf[1] & 0b01111111) << 1) | ((buf[2] & 0b10000000) >> 7)
            maxsfb = ((buf[2] & 0b00000111) << 3) | ((buf[3] & 0b11100000) >> 5)
            maxsfb2 = (buf[5] & 0b00011111)

            if maxsfb == 0 and maxsfb2 == 0:
                print('degenerated block with padding')
                return True
            else:
                # print('not degenerated block with padding')
                return False

ところが、資料を正しく読み解けていないかもしれませんが、どうしてもパターン判別の誤爆が免れませんでした。詰んだなと思いつつ、壊れたAACの復元について調べていたらFAAD2というデコーダーがraw AACファイルをADTSフォーマットのAACファイルに変換できるとのこと！　AACのテーブル作成は諦め、H.264、AACそれぞれを抽出・復元し、あとでffmpegなりを使ってmuxする方針に切り替えることにしました。

AACの復元

というわけで、mdatからH.264チャンクを取り除いたものをファイルヘッダもなしにそのままバイナリに垂れ流すコードを書いてみました（付録）。FAAD2に食わせてみると神！　復元できました（ADTSフォーマットの.aac）。音程が変でしたが、サンプリングレートを指定するオプションをつけてみたら解消しました。

H.264の復元

H.264の復元には苦労しました。Boxの内容は複数のファイルをリファレンスに、ファイル固有のところだけ修正し、あとはコピペで済ませました。それでも、finsta360.pyの元記事にもあるように、親ボックスのサイズを間違えると全然内容がズレちゃうので大変でした。

Boxの構成もInsta360 ONE Xと違っていたので修正しています。mvhd_durationとか、その他の部分も（私の思うように）直してあります。

というわけで、やっとffmpegに怒られない（video onlyの）ファイルが出来上がりました。

復元結果

上で復元したH.264とAACをmuxしたところ、ほぼ完璧に元動画が復元できました！　別々に処理したせいか、少し音ズレしています（2時間くらい再生すると目立つ程度）。また、数か所画像が緑に乱れるところがあります。疲れたのでそれらは追いませんでした。

感想

AACのフレーム境界の判別に躍起になっていたため、時間がかかってしまいました。たぶんさらに時間をかけても私には無理だったと思います。早くFAAD2を知っておきたかった。

付録

こちらはGPL 3.0を継承し、その下で公開します。

mov.py

#!/usr/bin/env python
# coding: utf-8

# finsta360 - python script to finalize incomplete MP4 of Insta360 ONE-X
# https://github.com/kichiki/finsta360

import sys
import os.path

import struct
from datetime import datetime, timedelta
import gc
#from tqdm import tqdm


# ## parsing mp4

def parse_mvhd(buf):
    # Movie Header Atoms

    version            = buf[0]
    creation_time      = struct.unpack('>I', buf[4:8])[0]
    modification_time  = struct.unpack('>I', buf[8:12])[0]
    time_scale         = struct.unpack('>I', buf[12:16])[0]
    duration           = struct.unpack('>I', buf[16:20])[0]
    preferred_rate     = struct.unpack('>I', buf[20:24])[0]
    preferred_volume   = struct.unpack('>H', buf[24:26])[0]
    # next 10 bytes are reserved
    matrix_structure   = struct.unpack('>IIIIIIIII', buf[36:72])
    preview_time       = struct.unpack('>I', buf[72:76])[0]
    preview_duration   = struct.unpack('>I', buf[76:80])[0]
    poster_time        = struct.unpack('>I', buf[80:84])[0]
    selection_time     = struct.unpack('>I', buf[84:88])[0]
    selection_duration = struct.unpack('>I', buf[88:92])[0]
    current_time       = struct.unpack('>I', buf[92:96])[0]
    next_track_id      = struct.unpack('>I', buf[96:100])[0]

    print(f'version            : {version}')
    print(f'creation time      : {datetime(1904,1,1) + timedelta(seconds=creation_time)}')
    print(f'modification_time  : {datetime(1904,1,1) + timedelta(seconds=modification_time)}')
    print(f'time scale         : {time_scale}')
    print(f'duration           : {duration} / {duration/time_scale} sec / {duration/time_scale/60} min')
    print(f'preferred_rate     : {preferred_rate}')
    print(f'preferred_volume   : {preferred_volume}')
    print(f'matrix_structure   : {matrix_structure}')
    print(f'preview_time       : {preview_time}')
    print(f'preview_duration   : {preview_duration}')
    print(f'poster_time        : {poster_time}')
    print(f'selection_time     : {selection_time}')
    print(f'selection_duration : {selection_duration}')
    print(f'current_time       : {current_time}')
    print(f'next_track_id      : {next_track_id}')

def parse_tkhd(buf):
    #Track Header Atoms

    version   = buf[0]
    flags     = buf[1:4]
    creation_time      = struct.unpack('>I', buf[4:8])[0]
    modification_time  = struct.unpack('>I', buf[8:12])[0]
    track_id           = struct.unpack('>I', buf[12:16])[0]
    # next 4 bytes are reserved
    duration           = struct.unpack('>I', buf[20:24])[0]
    # next 8 bytes is reserved
    layer              = struct.unpack('>H', buf[32:34])[0]
    alternate_group    = struct.unpack('>H', buf[34:36])[0]
    volume             = struct.unpack('>H', buf[36:38])[0]
    # next 2 bytes are reserved
    matrix_structure   = struct.unpack('>IIIIIIIII', buf[40:76])
    track_width        = struct.unpack('>I', buf[76:80])[0]
    track_height       = struct.unpack('>I', buf[80:84])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'creation time     : {datetime(1904,1,1) + timedelta(seconds=creation_time)}')
    print(f'modification_time : {datetime(1904,1,1) + timedelta(seconds=modification_time)}')
    print(f'track_id          : {track_id}')
    print(f'duration          : {duration}')
    print(f'layer             : {layer}')
    print(f'alternate_group   : {alternate_group}')
    print(f'volume            : {volume}')
    print(f'matrix_structure  : {matrix_structure}')
    print(f'track_width       : {track_width}')
    print(f'track_height      : {track_height}')

def parse_mdhd(buf):
    #Media Header Atoms

    version   = buf[0]
    flags     = buf[1:4]
    creation_time      = struct.unpack('>I', buf[4:8])[0]
    modification_time  = struct.unpack('>I', buf[8:12])[0]
    time_scale         = struct.unpack('>I', buf[12:16])[0]
    duration           = struct.unpack('>I', buf[16:20])[0]
    language           = struct.unpack('>H', buf[20:22])[0]
    quality            = struct.unpack('>H', buf[22:24])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'creation time     : {datetime(1904,1,1) + timedelta(seconds=creation_time)}')
    print(f'modification_time : {datetime(1904,1,1) + timedelta(seconds=modification_time)}')
    print(f'time scale        : {time_scale}')
    print(f'duration          : {duration} / {duration/time_scale} sec / {duration/time_scale/60} min')
    print(f'language          : {language}')
    print(f'quality           : {quality}')


def parse_stsd(buf):
    #Sample Description Atoms

    print('DATA:')
    print_binaries(buf)
    print(f'size of buf: {len(buf)}')

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    sample_description_table = []
    for i in range(n_entries):
        i0 = 8 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        sample_description_size = struct.unpack('>I', buf[i0:i0+4])[0]
        data_format = str(buf[i0+4:i0+8], 'utf-8')
        data_reference_index = struct.unpack('>H', buf[i0+14:i0+16])[0]
        sample_description_table.append(
            (sample_description_size, data_format, data_reference_index))
        print('%d: size: 0x%X, format: %s, ref_index: 0x%X' % (
            i, sample_description_size, data_format, data_reference_index))

def parse_stsz(buf):
    #Sample Size Atoms

    version   = buf[0]
    flags     = buf[1:4]
    sample_size = struct.unpack('>I', buf[4:8])[0]
    n_entries = struct.unpack('>I', buf[8:12])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'sample_size       : {sample_size}')
    print(f'number of entries : {n_entries}')

    sizes = []
    for i in range(n_entries):
        i0 = 12 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        size = struct.unpack('>I', buf[i0:i1])[0]
        sizes.append(size)
        print(f'  {i}: {size}')

def parse_stsc(buf):
    #Sample-to-Chunk Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    stoc = []
    for i in range(n_entries):
        i0 = 8 + i*12
        i1 = i0 + 12
        if len(buf) < i1: break
        first_chunk       = struct.unpack('>I', buf[i0:i0+4])[0]
        samples_per_chunk = struct.unpack('>I', buf[i0+4:i0+8])[0]
        sample_desc_id    = struct.unpack('>I', buf[i0+8:i0+12])[0]
        stoc.append((first_chunk, samples_per_chunk, sample_desc_id))
        print(f'  {i}: {(first_chunk, samples_per_chunk, sample_desc_id)}')

def parse_stco(buf):
    #Chunk Offset Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    chunk_offset_table = []
    for i in range(n_entries):
        i0 = 8 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        offset = struct.unpack('>I', buf[i0:i1])[0]
        chunk_offset_table.append(offset)
        print(f'  {i}: {offset}')

def parse_co64(buf):
    #64-bit chunk offset atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    chunk_offset_table = []
    for i in range(n_entries):
        i0 = 8 + i*8
        i1 = i0 + 8
        if len(buf) < i1: break
        offset = struct.unpack('>Q', buf[i0:i1])[0]
        chunk_offset_table.append(offset)
        print(f'  {i}: {offset}')

def parse_stts(buf):
    #Time-to-Sample Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    time_to_sample_table = []
    for i in range(n_entries):
        i0 = 8 + i*8
        i1 = i0 + 8
        if len(buf) < i1: break
        sample_count    = struct.unpack('>I', buf[i0:i0+4])[0]
        sample_duration = struct.unpack('>I', buf[i0+4:i0+8])[0]
        time_to_sample_table.append((sample_count, sample_duration))
        print(f'  {i}: {(sample_count, sample_duration)}')

def parse_stss(buf):
    #Sync Sample Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    sync_sample_table = []
    for i in range(n_entries):
        i0 = 8 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        sample = struct.unpack('>I', buf[i0:i1])[0]
        sync_sample_table.append(sample)
        print(f'  {i}: {sample}')

def parse_uuid(buf):
    print_binaries(buf[:16])
    print('%s' % str(buf[16:], 'utf-8'))


def print_binaries(buf, cur=None):
    if cur is None: cur = 0
    for i in range(0, len(buf), 8):
        print('%010X : ' % (i+cur), end='')
        j = min(i+8, len(buf))
        buf_ = buf[i:j]
        print(' '.join(['%02X'%(b) for b in buf_]), end='')
        print(' : ', end='')
        print(''.join(['%c'%(b) for b in buf_]))        


def print_atom_headers(f, verbose=False, pre_label=''):
    atom_start = f.tell()
    buf = f.read(8)

    n = struct.unpack('>I', buf[:4])[0]
    atom_type = str(buf[4:], 'utf-8')

    if n == 1:
        # decode 64-bit size
        buf = f.read(8)
        n = struct.unpack('>Q', buf)[0]
    #elif n == 0:
    #    raise ValueError('not implemented yet')

    #print(f'{atom_type} (size: {n})')
    if not pre_label is None:
        print('%s%s (size: 0x%X)' % (pre_label, atom_type, n))
    else:
        print('%s (size: 0x%X)' % (atom_type, n))
    data_start = f.tell()
    if verbose: print_binaries(buf, atom_start)


    if not atom_type in ('moov', 'trak', 'mdia', 'minf', 'edts', 'dinf', 'stbl'):
        if n > 8:
            if atom_type == 'uuid':
                n_ = n
            else:
                n_ = min(n, 128)

            buf = f.read(n_-8)
            if atom_type == 'mvhd':
                parse_mvhd(buf)
            elif atom_type == 'tkhd':
                parse_tkhd(buf)
            elif atom_type == 'mdhd':
                parse_mdhd(buf)
            elif atom_type == 'stsd':
                parse_stsd(buf)
            elif atom_type == 'stsz':
                parse_stsz(buf)
            elif atom_type == 'stsc':
                parse_stsc(buf)
            elif atom_type == 'stco':
                parse_stco(buf)
            elif atom_type == 'co64':
                parse_co64(buf)
            elif atom_type == 'stts':
                parse_stts(buf)
            elif atom_type == 'stss':
                parse_stss(buf)
            elif atom_type == 'uuid':
                parse_uuid(buf)
            else:
                print('DATA:')
                print_binaries(buf, cur=data_start)
    else:
        # sub Atoms
        sub_end = atom_start + n
        sub_cur = data_start
        while True:
            f.seek(sub_cur)
            if f.tell() != sub_cur: raise ValueError(f'seek failed? {f.tell()} != {sub_cur}')
            sub_n, sub_type = print_atom_headers(f, verbose=False, pre_label=pre_label+atom_type+ ' / ')
            if sub_n == 0: break
            sub_cur += sub_n
            if sub_cur >= sub_end: break
            print('')

    return n, atom_type


def print_atoms(filename, verbose=False):
    with open(filename, 'rb') as f:
        f.seek(0, 2)
        file_size = f.tell()
        print('file size : 0x%010X' % (file_size))
        print('')

        cur = 0
        while True:
            f.seek(cur)
            if f.tell() != cur: raise ValueError(f'seek failed? {f.tell()} != {cur}')
            n, _ = print_atom_headers(f, verbose=verbose)
            print('size : 0x%X' % (n))
            if n == 0: break
            cur += n
            if cur >= file_size: break
            print('')


# ## extracting `moov` as a reference

def read_atom_head(f):
    cur = f.tell()
    buf = f.read(8)

    n = struct.unpack('>I', buf[:4])[0]
    atom_type = str(buf[4:], 'utf-8')

    buf2 = None
    if n == 1:
        # decode 64-bit size
        buf2 = f.read(8)
        n = struct.unpack('>Q', buf2)[0]

    del buf
    del buf2
    gc.collect()

    return n, atom_type


def extract_moov(src_filename, dst_filename, n_chunk=65536, verbose=False):
    with open(src_filename, 'rb') as f_src,        open(dst_filename, 'wb') as f_dst:

        f_src.seek(0, 2)
        src_end = f_src.tell()

        # look for 'moov'
        src_cur = 0
        while True:
            f_src.seek(src_cur)
            if f_src.tell() != src_cur: raise ValueError(f'seek failed? {f_src.tell()} != {src_cur}')

            n, atom_type = read_atom_head(f_src)
            if atom_type == 'moov': break
            src_cur += n

        # 'moov' is found
        moov_start = src_cur

        # copy moov
        f_src.seek(moov_start)
        if f_src.tell() != moov_start: raise ValueError(f'seek failed? {f_src.tell()} != {moov_start}')

        if verbose:
            it_moov = tqdm(range(moov_start, src_end, n_chunk))
        else:
            it_moov = range(moov_start, src_end, n_chunk)
        #for src_cur in tqdm(range(moov_start, src_end, n_chunk)):
        for src_cur in it_moov:
            f_dst.write(f_src.read(n_chunk))
        if src_end - src_cur > 0:
            f_dst.write(f_src.read(src_end - src_cur))


# ## regenerating sample tables from `mdat`

def is_aac_header(buf, frame_length):
    cpe = (buf[0] & 0b11100000) >> 5
    element_instance_tag = (buf[0] & 0b00011110) >> 1
    common_window = (buf[0] & 0b00000001)

    if cpe != 0b001 or element_instance_tag != 0b0000:
        # not stereo

        # print('not stereo')
        return False
    elif common_window == 1:
        window_sequence = (buf[1] & 0b01100000) >> 5

        if window_sequence != 0b10:
            # common case
            always_0 = (buf[1] & 0b10000000) >> 7
            maxsfb = ((buf[1] & 0b00001111) << 2) | ((buf[2] & 0b11000000) >> 6)
            predictor = (buf[2] & 0b00100000) >> 5
            gain = ((buf[2] & 0b00000111) << 5) | ((buf[3] & 0b11111000) >> 3)
            inc = (buf[4] & 0b01111100) >> 2

            if always_0 == 0 and 40 <= maxsfb and maxsfb <= 51 and predictor == 0 and 100 <= gain and gain <= 228 and inc != 0:
                print('common case')
                return True
            else:
                # print('not common case')
                return False
        else:
            # eight-short-sequence
            always_10 = (buf[1] & 0b01100000) >> 5
            maxsfb = (buf[1] & 0b00001111)
            gain = ((buf[3] & 0b01111111) << 1) | ((buf[4] & 0b10000000) >> 7)

            if always_10 == 0b10 and maxsfb >= 8 and 100 <= gain and gain <= 200:
                print('eight-short-sequence')
                return True
            else:
                # print('not eight-short-sequence')
                return False
    else:
        pulse_data = (buf[3] & 0b00001000) >> 3
        tns_data = (buf[3] & 0b00000100) >> 2
        gain_control_data = (buf[3] & 0b00000010) >> 1

        if pulse_data != 0 or tns_data != 0 or gain_control_data != 0:
            maxsfb = ((buf[2] & 0b00001111) << 2) | ((buf[3] & 0b11000000) >> 6)

            if maxsfb != 0:
                gain = buf[1]
                always_0 = (buf[2] & 0b10000000) >> 7

                if 100 <= gain and gain <= 200 and always_0 == 0:
                    window_sequence = (buf[2] & 0b01100000) >> 5

                    if window_sequence != 0b10:
                        # multiple windows

                        if 48 <= maxsfb and maxsfb <= 51:
                            print('multiple windows')
                            return True
                        else:
                            # print('not multiple windows')
                            return False
                    else:
                        # multiple windows and eight-short-sequence
                        maxsfb = (buf[2] & 0b00001111)

                        if maxsfb >= 8:
                            print('multiple windows and eight-short-sequence')
                            return True
                        else:
                            # print('not multiple windows and eight-short-sequence')
                            return False
                else:
                    # print('not multiple windows type')
                    return False
            else:
                # degenerated block

                if frame_length == 7:
                    print('degenerated block')
                    return True
                else:
                    # print('not degenerated block')
                    return False
        else:
            # degenerated block with padding
            # gain = ((buf[1] & 0b01111111) << 1) | ((buf[2] & 0b10000000) >> 7)
            maxsfb = ((buf[2] & 0b00000111) << 3) | ((buf[3] & 0b11100000) >> 5)
            maxsfb2 = (buf[5] & 0b00011111)

            if maxsfb == 0 and maxsfb2 == 0:
                print('degenerated block with padding')
                return True
            else:
                # print('not degenerated block with padding')
                return False


def recover_sample_tables_from_mdat_fast(filename, verbose=False):
    mov_table = []
    aac_table = []

    with open(filename, 'rb') as f_in:

        # look for 'mdat'
        src_cur = 0
        while True:
            f_in.seek(src_cur)
            if f_in.tell() != src_cur: raise ValueError(f'seek failed? {f_in.tell()} != {src_cur}')

            n, atom_type = read_atom_head(f_in)
            if atom_type == 'mdat': break
            src_cur += n

        # 'mdat' is found
        mdat_start = src_cur
        if n == 0:
            # mdat from impcomplete mp4 file
            f_in.seek(0, 2)
            mdat_end = f_in.tell()
            # seek the data_start position
            # 8 bytes for the header PLUS 8 bytes for the reserved space of the size
            # f_in.seek(src_cur + 16)
            f_in.seek(src_cur)
            buf = f_in.read(4)
            head = struct.unpack('>I', buf)[0]
            if head == 1:
                f_in.seek(src_cur + 16)
            else:
                f_in.seek(src_cur + 8)
        else:
            mdat_end   = src_cur + n

        # n = 0
        # while True:
        #     cur = f_in.tell()
        #     if cur >= mdat_end: break

        #     # buf = f_in.read(4)
        #     buf = f_in.read(6)

        #     # if buf[0] != 0xFF or buf[1] != 0xF1 or buf[2] != 0x4C or (buf[3] & 0b11111100) != 0x80:
        #     # if buf[0] != 0x21 or (buf[1] & 0b11101111) != 0x0A or (buf[2] & 0b11111110) != 0x14:
        #     if buf[0] != 0x21:
        #         # h264 chunk
        #         frame_length = struct.unpack('>I', buf)[0] + 4
        #         if cur+frame_length >= mdat_end: break

        #         if verbose: print(f'{n}: [mov] {cur}, {frame_length}')
        #         mov_table.append((cur, frame_length))
        #         f_in.seek(cur+frame_length)
        #     else:
        #         # buf_2 = f_in.read(2)

        #         # from https://wiki.multimedia.cx/index.php/ADTS
        #         # AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
        #         # 0th-byte 1st      2nd      3rd      4th      5th      6th      (7th      8th     )
        #         # 0xFF     0xF1     0x4C     0X80 -- typical case for Insta360 ONE-X
        #         # M 13 frame length, this value must include 7 or 9 bytes of header length
        #         #   FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
        #         # frame_length = ((buf[3] & 0b11) << 11) | (buf_2[0] << 3) | (buf_2[1] >> 5)
        #         # if cur+frame_length >= mdat_end: break

        #         frame_length = 0
        #         while True:
        #             if cur + frame_length >= mdat_end:
        #                 break

        #             frame_length_2 = struct.unpack('>I', buf)[0] + 4
        #             if frame_length_2 < 1048576 and cur + frame_length + frame_length_2 <= mdat_end:
        #                 f_in.seek(cur + frame_length + frame_length_2)
        #                 buf_2 = f_in.read(3)
        #                 if buf_2[0] == 0x21:# and (buf_2[1] & 0b11101111) == 0x0A and (buf_2[2] & 0b11111110) == 0x14:
        #                     # frame_length -= 6
        #                     break

        #             # flag = (buf[0] == 0x21)# and (buf[1] & 0b11101111) == 0x0A and (buf[2] & 0b11111110) == 0x14)
        #             # if frame_length > 0 and flag:
        #             #     break

        #             frame_length += 1
        #             f_in.seek(cur + frame_length)
        #             buf = f_in.read(4)

        #         if verbose: print(f'{n}: [aac] {cur}, {frame_length}')
        #         aac_table.append((cur, frame_length))
        #         f_in.seek(cur+frame_length)

        #     n += 1
        #     # frame_length = struct.unpack('>I', buf)[0] + 4
        #     # if cur + frame_length > mdat_end:
        #     #     break
        #     # f_in.seek(cur + frame_length)
        #     # buf_2 = f_in.read(4)
        #     # if buf_2[0] == 0x21:
        #     #     if verbose:
        #     #         print(f'{n}: [mov] {cur}, {frame_length}')
        #     #     mov_table.append((cur, frame_length))
        #     #     if verbose:
        #     #         print(f'{n}: [aac] {cur}, {frame_length}')
        #     #     aac_table.append((cur, frame_length_2))
        #     #     n += 1
        #     #     frame_length_2 = 0
        #     # else:
        #     #     while True:
        #     #         cur += 1
        #     #         if cur > mdat_end or
        #     #         frame_length_2 += 1
        #     #     frame_length_2 = struct.unpack('>I', buf_2)[0] + 4
        #     #     if cur + frame_length + frame_length_2 > mdat_end:
        #     #         break
        #     #     f_in.seek(cur + frame_length + frame_length_2)
        #     #     buf_3 = f_in.read(4)
        #     #     if buf_3 = f_in.read(1)
        #     #     frame_length_2 += 1
        #     #     f_in.seek(cur + 1)

        n = 0
        while True:
            cur = f_in.tell()
            if cur >= mdat_end: break

            # buf = f_in.read(4)
            buf = f_in.read(6)

            # if buf[0] != 0xFF or buf[1] != 0xF1 or buf[2] != 0x4C or (buf[3] & 0b11111100) != 0x80:
            if struct.unpack('>I', buf[:4])[0] == 2 and buf[4] == 0x09 and buf[5] == 0xF0:
                # h264 chunk
                # frame_length = struct.unpack('>I', buf)[0] + 4
                # if cur+frame_length >= mdat_end: break
                frame_length = 6
                while True:
                    buf = f_in.read(4)
                    if (buf[0] & 0b11111110) == 0x20: break
                    frame_length += struct.unpack('>I', buf)[0] + 4
                    if cur+frame_length >= mdat_end: break
                    f_in.seek(cur + frame_length)

                if verbose: print(f'{n}: [mov] {cur}, {frame_length}')
                mov_table.append((cur, frame_length))
                f_in.seek(cur+frame_length)
            else:
                # buf_2 = f_in.read(2)

                # from https://wiki.multimedia.cx/index.php/ADTS
                # AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
                # 0th-byte 1st      2nd      3rd      4th      5th      6th      (7th      8th     )
                # 0xFF     0xF1     0x4C     0X80 -- typical case for Insta360 ONE-X
                # M 13 frame length, this value must include 7 or 9 bytes of header length
                #   FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
                # frame_length = ((buf[3] & 0b11) << 11) | (buf_2[0] << 3) | (buf_2[1] >> 5)
                # if cur+frame_length >= mdat_end: break

                frame_length = 6
                while True:
                    buf = f_in.read(6)
                    # if is_aac_header(buf, frame_length): break
                    if struct.unpack('>I', buf[:4])[0] == 2 and buf[4] == 0x09 and buf[5] == 0xF0: break
                    frame_length += 1
                    if cur + frame_length >= mdat_end: break
                    f_in.seek(cur + frame_length)

                # if verbose: print(f'{n}: [aac] {cur}, {frame_length}')
                aac_table.append((cur, frame_length))
                f_in.seek(cur+frame_length)

            n += 1

    return mov_table, aac_table


# ## rebuilding `moov` from sample tables

def copy_atom_box(target_type, target_size, f_src, f_dst, only_header=True):
    src_size, atom_type = read_atom_head(f_src)
    if atom_type != target_type: raise ValueError(f'{target_type} not found but {atom_type}')

    if target_size is None: target_size = src_size

    f_dst.write(struct.pack('>I', target_size))
    f_dst.write(target_type.encode('utf-8'))

    if not only_header:
        f_dst.write(f_src.read(target_size-8))

    return src_size


def recover_moov_from_sample_tables(
    moov_const,
    ref_filename, dst_filename,
    mov_table, aac_table,
    full_copy=True, n_chunk=65536,
    verbose=False,
    ):

    # constants
    # mov_sample_duration = 1001
    # aac_sample_duration = 1024

    # mvhd_timescale = 48000
    # mov_timescale = 30000
    # aac_timescale = 48000
    mov_sample_duration = moov_const[0]
    aac_sample_duration = moov_const[1]
    mvhd_timescale = moov_const[2]
    mov_timescale = moov_const[3]
    aac_timescale = moov_const[4]

    n_mov_table = len(mov_table)
    n_aac_table = len(aac_table)

    mov_mdhd_duration = n_mov_table * mov_sample_duration
    aac_mdhd_duration = n_aac_table * aac_sample_duration
    mov_tkhd_duration = int(mov_mdhd_duration * mvhd_timescale / mov_timescale)
    aac_tkhd_duration = int(aac_mdhd_duration * mvhd_timescale / aac_timescale)
    if mov_tkhd_duration >= aac_tkhd_duration:
        mvhd_duration = mov_tkhd_duration
    else:
        mvhd_duration = aac_tkhd_duration


    sample_size_tables = []
    sample_size_tables.append([s for o, s in mov_table])
    sample_size_tables.append([s for o, s in aac_table])

    chunk_offset_tables = []
    chunk_offset_tables.append([o for o, s in mov_table])
    chunk_offset_tables.append([o for o, s in aac_table])


    # moov structure is assumed to be in the fixed format (for now)
    mov_stsz_size = len(sample_size_tables[0])* 4 + 20
    aac_stsz_size = len(sample_size_tables[1])* 4 + 20

    # mov_co64_size = len(chunk_offset_tables[0])* 8 + 16
    # aac_co64_size = len(chunk_offset_tables[1])* 8 + 16
    mov_stco_size = len(chunk_offset_tables[0])* 4 + 16
    aac_stco_size = len(chunk_offset_tables[1])* 4 + 16

    mov_stss_size = ((len(sample_size_tables[0])-1)//150 + 1)* 4 + 16


    # mov_stbl_size = 8 + 0x141 + 0x18 + 0x1C + mov_stsz_size + mov_co64_size + mov_stss_size
    # aac_stbl_size = 8 + 0x82  + 0x18 + 0x1C + aac_stsz_size + aac_co64_size
    mov_stbl_size = 8 + 0xAB + 0x18 + 0x1C + mov_stsz_size + mov_stco_size + mov_stss_size
    aac_stbl_size = 8 + 0x67  + 0x18 + 0x14B75C + aac_stsz_size + aac_stco_size + 0x1A + 0x1C

    mov_minf_size = 8 + 0x14 + 0x24 + mov_stbl_size
    aac_minf_size = 8 + 0x10 + 0x24 + aac_stbl_size

    # mov_mdia_size = 8 + 0x20 + 0x2E + mov_minf_size
    # aac_mdia_size = 8 + 0x20 + 0x2E + aac_minf_size
    mov_mdia_size = 8 + 0x20 + 0x2D + mov_minf_size
    aac_mdia_size = 8 + 0x20 + 0x2D + aac_minf_size

    # mov_trak_size = 8 + 0x5C + 0x24 + mov_mdia_size + 0x618
    # aac_trak_size = 8 + 0x5C + 0x24 + aac_mdia_size
    mov_trak_size = 8 + 0x5C + 0x24 + mov_mdia_size
    aac_trak_size = 8 + 0x5C + 0x30 + aac_mdia_size

    # moov_size = 8 + 0x6C + 0x73 + mov_trak_size + aac_trak_size
    # moov_size = 8 + 0x6C + 0x62 + mov_trak_size + aac_trak_size
    moov_size = 8 + 0x6C + 0x62 + mov_trak_size


    with open(ref_filename, 'rb') as f_moov,        open(dst_filename, 'wb') as f_dst:

        f_moov.seek(0, 2)
        file_size = f_moov.tell()

        cur = 0
        f_moov.seek(cur)
        if f_moov.tell() != cur: raise ValueError(f'seek failed? {f_moov.tell()} != {cur}')

        # moov
        copy_atom_box('moov', moov_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('mvhd', None, f_moov, f_dst, only_header=False)
        # mvhd : duration = mov_tkhd_duration
        n = copy_atom_box('mvhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:12])
        f_dst.write(struct.pack('>I', mvhd_timescale))
        #duration           = struct.unpack('>I', buf[16:20])[0]
        # f_dst.write(struct.pack('>I', mov_tkhd_duration))
        f_dst.write(struct.pack('>I', mvhd_duration))
        # the rest is unchanged
        f_dst.write(buf[20:])
        #...
        #next_track_id      = struct.unpack('>I', buf[96:100])[0]
        if n != (100+8): raise ValueError(f'ERROR: mov tkhd box size is not 108 but {n}')

        #copy_atom_box('udta', None, f_moov, f_dst, only_header=False)

        # movie track
        # trak
        copy_atom_box('trak', mov_trak_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('tkhd', None, f_moov, f_dst, only_header=False)
        # tkhd : duration = mov_tkhd_duration
        n = copy_atom_box('tkhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:20])
        #duration           = struct.unpack('>I', buf[20:24])[0]
        f_dst.write(struct.pack('>I', mov_tkhd_duration))
        # the rest is unchanged
        f_dst.write(buf[24:76])
        #...
        f_dst.write(struct.pack('>I', 83886080))
        f_dst.write(struct.pack('>I', 47185920))
        #track_height       = struct.unpack('>I', buf[80:84])[0]
        if n != (84+8): raise ValueError(f'ERROR: mov tkhd box size is not 92 but {n}')

        # copy_atom_box('edts', None, f_moov, f_dst, only_header=False)
        copy_atom_box('edts', None, f_moov, f_dst, only_header=True)

        n = copy_atom_box('elst', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:8])
        f_dst.write(struct.pack('>I', mov_tkhd_duration))
        f_dst.write(buf[12:])
        if n != (20+8): raise ValueError(f'ERROR: mov elst box size is not 28 but {n}')

        # mdia
        copy_atom_box('mdia', mov_mdia_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('mdhd', None, f_moov, f_dst, only_header=False)
        # mdhd : duration = mov_mdhd_duration
        n = copy_atom_box('mdhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:16])
        #duration           = struct.unpack('>I', buf[16:20])[0]
        f_dst.write(struct.pack('>I', mov_mdhd_duration))
        # the rest is unchanged
        f_dst.write(buf[20:])
        #...
        #quality            = struct.unpack('>H', buf[22:24])[0]
        if n != (24+8): raise ValueError(f'ERROR: mov mdhd box size is not 32 but {n}')

        copy_atom_box('hdlr', None, f_moov, f_dst, only_header=False)

        # minf
        copy_atom_box('minf', mov_minf_size, f_moov, f_dst, only_header=True)
        copy_atom_box('vmhd', None, f_moov, f_dst, only_header=False)
        copy_atom_box('dinf', None, f_moov, f_dst, only_header=False)

        # stbl
        copy_atom_box('stbl', mov_stbl_size, f_moov, f_dst, only_header=True)
        copy_atom_box('stsd', None, f_moov, f_dst, only_header=False)

        #copy_atom_box('stts', None, f_moov, f_dst, only_header=False)
        # stts : sample_count = n_mov_table
        n = copy_atom_box('stts', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 1)) # n_entries
        f_dst.write(struct.pack('>I', n_mov_table)) # sample_count
        f_dst.write(struct.pack('>I', mov_sample_duration)) # sample_duration

        # stss
        n = copy_atom_box('stss', mov_stss_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        mov_stss_entries = (len(sample_size_tables[0])-1)//150 + 1
        f_dst.write(struct.pack('>I', mov_stss_entries)) # n_entries
        ss = 1
        for i_ss in range(mov_stss_entries):
            f_dst.write(struct.pack('>I', ss))
            ss += 150

        copy_atom_box('stsc', None, f_moov, f_dst, only_header=False)

        # stsz
        n = copy_atom_box('stsz', mov_stsz_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 0)) # sample_size
        f_dst.write(struct.pack('>I', len(sample_size_tables[0]))) # n_entries
        for sz in sample_size_tables[0]:
            f_dst.write(struct.pack('>I', sz))

        # co64
        # n = copy_atom_box('co64', mov_co64_size, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', len(chunk_offset_tables[0]))) # n_entries
        # for co in chunk_offset_tables[0]:
        #     f_dst.write(struct.pack('>Q', co))
        n = copy_atom_box('stco', mov_stco_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', len(chunk_offset_tables[0]))) # n_entries
        for co in chunk_offset_tables[0]:
            f_dst.write(struct.pack('>I', co))

        # uuid
        # copy_atom_box('uuid', None, f_moov, f_dst, only_header=False)


        # # audio track
        # # trak
        # copy_atom_box('trak', aac_trak_size, f_moov, f_dst, only_header=True)

        # #copy_atom_box('tkhd', None, f_moov, f_dst, only_header=False)
        # # tkhd : duration = aac_mdhd_duration
        # n = copy_atom_box('tkhd', None, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # # the following is unchanged
        # f_dst.write(buf[:20])
        # #duration           = struct.unpack('>I', buf[20:24])[0]
        # f_dst.write(struct.pack('>I', aac_mdhd_duration))
        # # the rest is unchanged
        # f_dst.write(buf[24:])
        # #...
        # #track_height       = struct.unpack('>I', buf[80:84])[0]
        # if n != (84+8): raise ValueError(f'ERROR: audio tkhd box size is not 92 but {n}')

        # # copy_atom_box('edts', None, f_moov, f_dst, only_header=False)
        # copy_atom_box('edts', None, f_moov, f_dst, only_header=True)

        # n = copy_atom_box('elst', None, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:12])
        # f_dst.write(struct.pack('>I', aac_tkhd_duration))
        # f_dst.write(buf[16:])
        # if n != (32+8): raise ValueError(f'ERROR: mov elst box size is not 40 but {n}')

        # # mdia
        # copy_atom_box('mdia', aac_mdia_size, f_moov, f_dst, only_header=True)

        # #copy_atom_box('mdhd', None, f_moov, f_dst, only_header=False)
        # # mdhd : duration = aac_mdhd_duration
        # n = copy_atom_box('mdhd', None, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # # the following is unchanged
        # f_dst.write(buf[:16])
        # #duration           = struct.unpack('>I', buf[16:20])[0]
        # f_dst.write(struct.pack('>I', aac_mdhd_duration))
        # # the rest is unchanged
        # f_dst.write(buf[20:])
        # #...
        # #quality            = struct.unpack('>H', buf[22:24])[0]
        # if n != (24+8): raise ValueError(f'ERROR: audio mdhd box size is not 32 but {n}')

        # copy_atom_box('hdlr', None, f_moov, f_dst, only_header=False)

        # # minf
        # copy_atom_box('minf', aac_minf_size, f_moov, f_dst, only_header=True)
        # copy_atom_box('smhd', None, f_moov, f_dst, only_header=False)
        # copy_atom_box('dinf', None, f_moov, f_dst, only_header=False)

        # # stbl
        # copy_atom_box('stbl', aac_stbl_size, f_moov, f_dst, only_header=True)
        # copy_atom_box('stsd', None, f_moov, f_dst, only_header=False)

        # #copy_atom_box('stts', None, f_moov, f_dst, only_header=False)
        # # stts : sample_count = n_aac_table
        # n = copy_atom_box('stts', None, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', 1)) # n_entries
        # f_dst.write(struct.pack('>I', n_aac_table)) # sample_count
        # f_dst.write(struct.pack('>I', aac_sample_duration)) # sample_duration

        # # copy_atom_box('stsc', None, f_moov, f_dst, only_header=False)
        # cur = f_dst.tell()
        # n = copy_atom_box('stsc', None, f_moov, f_dst, only_header=True)
        # f_dst.seek(cur)
        # f_dst.write(struct.pack('>I', 28))
        # f_dst.seek(cur+8)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', 1)) # n_entries
        # f_dst.write(struct.pack('>I', 1))
        # f_dst.write(struct.pack('>I', 1))
        # f_dst.write(struct.pack('>I', 1))

        # # stsz
        # n = copy_atom_box('stsz', aac_stsz_size, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', 0)) # sample_size
        # f_dst.write(struct.pack('>I', len(sample_size_tables[1]))) # n_entries
        # for sz in sample_size_tables[1]:
        #     f_dst.write(struct.pack('>I', sz))

        # # co64
        # # n = copy_atom_box('co64', aac_co64_size, f_moov, f_dst, only_header=True)
        # # buf = f_moov.read(n-8)
        # # f_dst.write(buf[:4]) # version + flags
        # # f_dst.write(struct.pack('>I', len(chunk_offset_tables[1]))) # n_entries
        # # for co in chunk_offset_tables[1]:
        # #     f_dst.write(struct.pack('>Q', co))
        # n = copy_atom_box('stco', aac_stco_size, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', len(chunk_offset_tables[1]))) # n_entries
        # for co in chunk_offset_tables[1]:
        #     f_dst.write(struct.pack('>I', co))

        # copy_atom_box('sgpd', None, f_moov, f_dst, only_header=False)

        # copy_atom_box('sbgp', None, f_moov, f_dst, only_header=False)

        # to skip the audio track
        print_atom_headers(f_moov)

        copy_atom_box('udta', None, f_moov, f_dst, only_header=False)

        if not full_copy: return

        # just copy the rest of reference moov file
        moov_cur = f_moov.tell()
        f_moov.seek(0, 2)
        moov_size = f_moov.tell()
        f_moov.seek(moov_cur)
        if f_moov.tell() != moov_cur: raise ValueError(f'seek failed? {f_moov.tell()} != {moov_cur}')
        if verbose:
            it_moov = tqdm(range(moov_cur, moov_size, n_chunk))
        else:
            it_moov = range(moov_cur, moov_size, n_chunk)
        #for moov_cur in tqdm(range(moov_cur, moov_size, n_chunk)):
        for moov_cur in it_moov:
            f_dst.write(f_moov.read(n_chunk))
        if moov_size - moov_cur > 0:
            f_dst.write(f_moov.read(moov_size - moov_cur))


# ## merging the recovered `moov`

def merge_moov(
    src_filename,
    moov_filename,
    dst_filename,
    n_chunk=65536,
    verbose=False):

    with open(src_filename, 'rb') as f_src,        open(moov_filename, 'rb') as f_moov,        open(dst_filename, 'wb') as f_dst:

        f_src.seek(0, 2)
        file_size = f_src.tell()

        cur = 0
        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')

        # ftyp
        n, atom_type = read_atom_head(f_src)
        if atom_type != 'ftyp': raise ValueError('ftyp not found')

        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')
        buf = f_src.read(n)
        f_dst.write(buf)
        if verbose:
            print_binaries(buf)
        cur += n

        # cur += 16
        # free
        n, atom_type = read_atom_head(f_src)
        if atom_type != 'free': raise ValueError('ftyp not found')

        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')
        buf = f_src.read(n)
        f_dst.write(buf)
        if verbose:
            print_binaries(buf)
        cur += n
        # mdat
        n, atom_type = read_atom_head(f_src)
        if atom_type != 'mdat': raise ValueError('mdat not found')

        # if n != 0: raise ValueError('size would be zero...')

        # # fixed mdat header
        # if verbose:
        #     print_binaries(struct.pack('>Icccc', 1, b'm', b'd', b'a', b't'))
        #     print_binaries(struct.pack('>Q', file_size-0x20))
        # f_dst.write(struct.pack('>Icccc', 1, b'm', b'd', b'a', b't'))
        # f_dst.write(struct.pack('>Q', file_size-0x20))

        # cur += 16
        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')
        if verbose:
            it_cur = tqdm(range(cur, file_size, n_chunk))
        else:
            it_cur = range(cur, file_size, n_chunk)
        #for cur in tqdm(range(cur, file_size, n_chunk)):
        for cur in it_cur:
            f_dst.write(f_src.read(n_chunk))
        if file_size - cur > 0:
            f_dst.write(f_src.read(file_size - cur))
        print('')

        temp = f_dst.tell()
        f_dst.seek(40)
        f_dst.write(struct.pack('>Icccc', file_size - 0x20, b'm', b'd', b'a', b't'))
        f_dst.seek(temp + 8)

        # search moov
        f_moov.seek(0, 2)
        moov_size = f_moov.tell()
        if verbose:
            print(f'moov_size: {moov_size}')

        moov_cur = 0
        f_moov.seek(moov_cur)
        if f_moov.tell() != moov_cur: raise ValueError(f'seek failed? {f_moov.tell()} != {moov_cur}')
        n, atom_type = read_atom_head(f_moov)
        if atom_type != 'moov': raise ValueError(f'something is wrong...')

        # copy moov
        f_moov.seek(moov_cur)
        if f_moov.tell() != moov_cur: raise ValueError(f'seek failed? {f_moov.tell()} != {moov_cur}')
        if verbose:
            it_moov = tqdm(range(moov_cur, moov_size, n_chunk))
        else:
            it_moov = range(moov_cur, moov_size, n_chunk)
        #for moov_cur in tqdm(range(moov_cur, moov_size, n_chunk)):
        for moov_cur in it_moov:
            f_dst.write(f_moov.read(n_chunk))
        if moov_size - moov_cur > 0:
            f_dst.write(f_moov.read(moov_size - moov_cur))


# # main program to recover corrupted MP4

def finsta360(
    moov_const,
    src_filename,
    ref_filename=None,
    dst_filename=None,
    keep_temp=False,
    verbose=False):

    if ref_filename is None:
        # check mode
        print_atoms(src_filename)
        return

    # temporary files
    ref_moov_filename = 'finsta360_ref.moov'
    new_moov_filename = 'finsta360_new.moov'

    # 1) extract reference moov
    print('')
    print('########################################')
    print(f'# 1) extracting reference moov from\n\t{ref_filename}')
    extract_moov(ref_filename, ref_moov_filename)
    if verbose:
        print_atoms(ref_moov_filename)

    # 2) regenerate sample tables from mdat
    print('')
    print('########################################')
    print(f'# 2) regenerate sample tables from mdat in\n\t{src_filename}')
    mov_table, aac_table = recover_sample_tables_from_mdat_fast(
        src_filename,
        verbose=verbose)
    if verbose:
        print(f'number of samples (movie) : {len(mov_table)}')
        print(f'number of samples (audio) : {len(aac_table)}')
        # constants
        # mov_sample_duration = 1001
        # aac_sample_duration = 1024

        # mvhd_timescale = 48000
        # mov_timescale = 30000
        # aac_timescale = 48000
        mov_sample_duration = moov_const[0]
        aac_sample_duration = moov_const[1]
        mvhd_timescale = moov_const[2]
        mov_timescale = moov_const[3]
        aac_timescale = moov_const[4]

        n_mov_table = len(mov_table)
        n_aac_table = len(aac_table)

        mov_mdhd_duration = n_mov_table * mov_sample_duration
        aac_mdhd_duration = n_aac_table * aac_sample_duration
        mov_tkhd_duration = int(mov_mdhd_duration * mvhd_timescale / mov_timescale)

        # mvhd
        mvhd_duration_sec = mov_tkhd_duration / mvhd_timescale
        print(f'mvhd duration  : {mvhd_duration_sec} sec / {mvhd_duration_sec/60} min')
        # movie mdhd
        mov_duration_sec = mov_mdhd_duration / mov_timescale
        print(f'movie duration : {mov_duration_sec} sec / {mov_duration_sec/60} min')
        # audio mdhd
        aac_duration_sec = aac_mdhd_duration / aac_timescale
        print(f'audio duration : {aac_duration_sec} sec / {aac_duration_sec/60} min')

    # 3) rebuilding moov from the sample tables
    print('')
    print('########################################')
    print(f'# 3) rebuilding moov from the sample tables')
    recover_moov_from_sample_tables(
        moov_const,
        ref_moov_filename,
        new_moov_filename,
        mov_table, aac_table,
        full_copy=True,
    )
    if verbose:
        print_atoms(new_moov_filename)

    if dst_filename is None:
        # test mode
        if not keep_temp:
            os.remove(ref_moov_filename)
            os.remove(new_moov_filename)
        return

    # 4) merging the rebuilt moov into the source
    print('')
    print('########################################')
    print(f'# 4) merging the rebuilt moov into\n\t{src_filename}\nas\n\t{dst_filename}')
    merge_moov(
        src_filename,
        new_moov_filename,
        dst_filename,
    )


    if not keep_temp:
        os.remove(ref_moov_filename)
        os.remove(new_moov_filename)


def usage():
    print('finsta360.py : to finalize incomplete MP4 of Insta360 ONE-X')
    print('https://github.com/kichiki/finsta360')
    print('USAGE: finsta360.py [options]')
    print('\t-s file : source file, that is, corrupted mp4 (insv) file')
    print('\t-r file : complete mp4 (insv) file as a reference')
    print('\t-o file : output recovered mp4 (insv) file')
    print('\t-v      : to set verbose mode')
    print('\t-k      : to keep temporary files')
    print('\t          (reference and recovered moov files, finsta360*.moov)')
    print('If you provide only source file (-s), program prints the metadata')
    print('If you dont provide output file (-o), program just runs without writing')
    sys.exit ()


if __name__ == '__main__':
    src_filename = None
    ref_filename = None
    dst_filename = None
    verbose = False
    keep_temp = False
    i = 1
    while i < len(sys.argv):
        if sys.argv[i] == '-s':
            src_filename = sys.argv[i+1]
            i += 2
        elif sys.argv[i] == '-r':
            ref_filename = sys.argv[i+1]
            i += 2
        elif sys.argv[i] == '-o':
            dst_filename = sys.argv[i+1]
            i += 2
        elif sys.argv[i] == '-v':
            verbose = True
            i += 1
        elif sys.argv[i] == '-k':
            keep_temp = True
            i += 1
        else:
            usage()
            break

    if src_filename is None:
        print(f'you must provie source file {src_filename}')
        usage()
        sys.exit()
    if not os.path.exists(src_filename):
        print(f'source file {src_filename} does not exist')
        sys.exit()
    if not ref_filename is None and not os.path.exists(ref_filename):
        print(f'reference file {ref_filename} does not exist')
        sys.exit()
    if not dst_filename is None and os.path.exists(dst_filename):
        print(f'output file {dst_filename} already exists')
        sys.exit()


    # constants
    mov_sample_duration = 3000
    aac_sample_duration = 1024

    mvhd_timescale = 90000
    mov_timescale = 90000
    aac_timescale = 48000

    moov_const = (mov_sample_duration,
                  aac_sample_duration,
                  mvhd_timescale,
                  mov_timescale,
                  aac_timescale)


    # with open('aac.aac', 'rb') as f_in:
    #     while True:
    #         cur = f_in.tell()
    #         buf = f_in.read(16)
    #         if buf[0] != 0xFF or buf[1] != 0xF1 or buf[2] != 0x4C or (buf[3] & 0b11111100) != 0x80:
    #             hoge = 0
    #         else:
    #             print_binaries(buf[7:])
    #         f_in.seek(cur+4)
    finsta360(
        moov_const,
        src_filename,
        ref_filename,
        dst_filename,
        keep_temp,
        verbose)


    sys.exit()

rawaac.py

#!/usr/bin/env python
# coding: utf-8

# finsta360 - python script to finalize incomplete MP4 of Insta360 ONE-X
# https://github.com/kichiki/finsta360

import sys
import os.path

import struct
from datetime import datetime, timedelta
import gc
#from tqdm import tqdm


# ## parsing mp4

def parse_mvhd(buf):
    # Movie Header Atoms

    version            = buf[0]
    creation_time      = struct.unpack('>I', buf[4:8])[0]
    modification_time  = struct.unpack('>I', buf[8:12])[0]
    time_scale         = struct.unpack('>I', buf[12:16])[0]
    duration           = struct.unpack('>I', buf[16:20])[0]
    preferred_rate     = struct.unpack('>I', buf[20:24])[0]
    preferred_volume   = struct.unpack('>H', buf[24:26])[0]
    # next 10 bytes are reserved
    matrix_structure   = struct.unpack('>IIIIIIIII', buf[36:72])
    preview_time       = struct.unpack('>I', buf[72:76])[0]
    preview_duration   = struct.unpack('>I', buf[76:80])[0]
    poster_time        = struct.unpack('>I', buf[80:84])[0]
    selection_time     = struct.unpack('>I', buf[84:88])[0]
    selection_duration = struct.unpack('>I', buf[88:92])[0]
    current_time       = struct.unpack('>I', buf[92:96])[0]
    next_track_id      = struct.unpack('>I', buf[96:100])[0]

    print(f'version            : {version}')
    print(f'creation time      : {datetime(1904,1,1) + timedelta(seconds=creation_time)}')
    print(f'modification_time  : {datetime(1904,1,1) + timedelta(seconds=modification_time)}')
    print(f'time scale         : {time_scale}')
    print(f'duration           : {duration} / {duration/time_scale} sec / {duration/time_scale/60} min')
    print(f'preferred_rate     : {preferred_rate}')
    print(f'preferred_volume   : {preferred_volume}')
    print(f'matrix_structure   : {matrix_structure}')
    print(f'preview_time       : {preview_time}')
    print(f'preview_duration   : {preview_duration}')
    print(f'poster_time        : {poster_time}')
    print(f'selection_time     : {selection_time}')
    print(f'selection_duration : {selection_duration}')
    print(f'current_time       : {current_time}')
    print(f'next_track_id      : {next_track_id}')

def parse_tkhd(buf):
    #Track Header Atoms

    version   = buf[0]
    flags     = buf[1:4]
    creation_time      = struct.unpack('>I', buf[4:8])[0]
    modification_time  = struct.unpack('>I', buf[8:12])[0]
    track_id           = struct.unpack('>I', buf[12:16])[0]
    # next 4 bytes are reserved
    duration           = struct.unpack('>I', buf[20:24])[0]
    # next 8 bytes is reserved
    layer              = struct.unpack('>H', buf[32:34])[0]
    alternate_group    = struct.unpack('>H', buf[34:36])[0]
    volume             = struct.unpack('>H', buf[36:38])[0]
    # next 2 bytes are reserved
    matrix_structure   = struct.unpack('>IIIIIIIII', buf[40:76])
    track_width        = struct.unpack('>I', buf[76:80])[0]
    track_height       = struct.unpack('>I', buf[80:84])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'creation time     : {datetime(1904,1,1) + timedelta(seconds=creation_time)}')
    print(f'modification_time : {datetime(1904,1,1) + timedelta(seconds=modification_time)}')
    print(f'track_id          : {track_id}')
    print(f'duration          : {duration}')
    print(f'layer             : {layer}')
    print(f'alternate_group   : {alternate_group}')
    print(f'volume            : {volume}')
    print(f'matrix_structure  : {matrix_structure}')
    print(f'track_width       : {track_width}')
    print(f'track_height      : {track_height}')

def parse_mdhd(buf):
    #Media Header Atoms

    version   = buf[0]
    flags     = buf[1:4]
    creation_time      = struct.unpack('>I', buf[4:8])[0]
    modification_time  = struct.unpack('>I', buf[8:12])[0]
    time_scale         = struct.unpack('>I', buf[12:16])[0]
    duration           = struct.unpack('>I', buf[16:20])[0]
    language           = struct.unpack('>H', buf[20:22])[0]
    quality            = struct.unpack('>H', buf[22:24])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'creation time     : {datetime(1904,1,1) + timedelta(seconds=creation_time)}')
    print(f'modification_time : {datetime(1904,1,1) + timedelta(seconds=modification_time)}')
    print(f'time scale        : {time_scale}')
    print(f'duration          : {duration} / {duration/time_scale} sec / {duration/time_scale/60} min')
    print(f'language          : {language}')
    print(f'quality           : {quality}')


def parse_stsd(buf):
    #Sample Description Atoms

    print('DATA:')
    print_binaries(buf)
    print(f'size of buf: {len(buf)}')

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    sample_description_table = []
    for i in range(n_entries):
        i0 = 8 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        sample_description_size = struct.unpack('>I', buf[i0:i0+4])[0]
        data_format = str(buf[i0+4:i0+8], 'utf-8')
        data_reference_index = struct.unpack('>H', buf[i0+14:i0+16])[0]
        sample_description_table.append(
            (sample_description_size, data_format, data_reference_index))
        print('%d: size: 0x%X, format: %s, ref_index: 0x%X' % (
            i, sample_description_size, data_format, data_reference_index))

def parse_stsz(buf):
    #Sample Size Atoms

    version   = buf[0]
    flags     = buf[1:4]
    sample_size = struct.unpack('>I', buf[4:8])[0]
    n_entries = struct.unpack('>I', buf[8:12])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'sample_size       : {sample_size}')
    print(f'number of entries : {n_entries}')

    sizes = []
    for i in range(n_entries):
        i0 = 12 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        size = struct.unpack('>I', buf[i0:i1])[0]
        sizes.append(size)
        print(f'  {i}: {size}')

def parse_stsc(buf):
    #Sample-to-Chunk Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    stoc = []
    for i in range(n_entries):
        i0 = 8 + i*12
        i1 = i0 + 12
        if len(buf) < i1: break
        first_chunk       = struct.unpack('>I', buf[i0:i0+4])[0]
        samples_per_chunk = struct.unpack('>I', buf[i0+4:i0+8])[0]
        sample_desc_id    = struct.unpack('>I', buf[i0+8:i0+12])[0]
        stoc.append((first_chunk, samples_per_chunk, sample_desc_id))
        print(f'  {i}: {(first_chunk, samples_per_chunk, sample_desc_id)}')

def parse_stco(buf):
    #Chunk Offset Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    chunk_offset_table = []
    for i in range(n_entries):
        i0 = 8 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        offset = struct.unpack('>I', buf[i0:i1])[0]
        chunk_offset_table.append(offset)
        print(f'  {i}: {offset}')

def parse_co64(buf):
    #64-bit chunk offset atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    chunk_offset_table = []
    for i in range(n_entries):
        i0 = 8 + i*8
        i1 = i0 + 8
        if len(buf) < i1: break
        offset = struct.unpack('>Q', buf[i0:i1])[0]
        chunk_offset_table.append(offset)
        print(f'  {i}: {offset}')

def parse_stts(buf):
    #Time-to-Sample Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    time_to_sample_table = []
    for i in range(n_entries):
        i0 = 8 + i*8
        i1 = i0 + 8
        if len(buf) < i1: break
        sample_count    = struct.unpack('>I', buf[i0:i0+4])[0]
        sample_duration = struct.unpack('>I', buf[i0+4:i0+8])[0]
        time_to_sample_table.append((sample_count, sample_duration))
        print(f'  {i}: {(sample_count, sample_duration)}')

def parse_stss(buf):
    #Sync Sample Atoms

    version   = buf[0]
    flags     = buf[1:4]
    n_entries = struct.unpack('>I', buf[4:8])[0]

    print(f'version           : {version}')
    print(f'flags             : {flags}')
    print(f'number of entries : {n_entries}')

    sync_sample_table = []
    for i in range(n_entries):
        i0 = 8 + i*4
        i1 = i0 + 4
        if len(buf) < i1: break
        sample = struct.unpack('>I', buf[i0:i1])[0]
        sync_sample_table.append(sample)
        print(f'  {i}: {sample}')

def parse_uuid(buf):
    print_binaries(buf[:16])
    print('%s' % str(buf[16:], 'utf-8'))


def print_binaries(buf, cur=None):
    if cur is None: cur = 0
    for i in range(0, len(buf), 8):
        print('%010X : ' % (i+cur), end='')
        j = min(i+8, len(buf))
        buf_ = buf[i:j]
        print(' '.join(['%02X'%(b) for b in buf_]), end='')
        print(' : ', end='')
        print(''.join(['%c'%(b) for b in buf_]))        


def print_atom_headers(f, verbose=False, pre_label=''):
    atom_start = f.tell()
    buf = f.read(8)

    n = struct.unpack('>I', buf[:4])[0]
    atom_type = str(buf[4:], 'utf-8')

    if n == 1:
        # decode 64-bit size
        buf = f.read(8)
        n = struct.unpack('>Q', buf)[0]
    #elif n == 0:
    #    raise ValueError('not implemented yet')

    #print(f'{atom_type} (size: {n})')
    if not pre_label is None:
        print('%s%s (size: 0x%X)' % (pre_label, atom_type, n))
    else:
        print('%s (size: 0x%X)' % (atom_type, n))
    data_start = f.tell()
    if verbose: print_binaries(buf, atom_start)


    if not atom_type in ('moov', 'trak', 'mdia', 'minf', 'edts', 'dinf', 'stbl'):
        if n > 8:
            if atom_type == 'uuid':
                n_ = n
            else:
                n_ = min(n, 128)

            buf = f.read(n_-8)
            if atom_type == 'mvhd':
                parse_mvhd(buf)
            elif atom_type == 'tkhd':
                parse_tkhd(buf)
            elif atom_type == 'mdhd':
                parse_mdhd(buf)
            elif atom_type == 'stsd':
                parse_stsd(buf)
            elif atom_type == 'stsz':
                parse_stsz(buf)
            elif atom_type == 'stsc':
                parse_stsc(buf)
            elif atom_type == 'stco':
                parse_stco(buf)
            elif atom_type == 'co64':
                parse_co64(buf)
            elif atom_type == 'stts':
                parse_stts(buf)
            elif atom_type == 'stss':
                parse_stss(buf)
            elif atom_type == 'uuid':
                parse_uuid(buf)
            else:
                print('DATA:')
                print_binaries(buf, cur=data_start)
    else:
        # sub Atoms
        sub_end = atom_start + n
        sub_cur = data_start
        while True:
            f.seek(sub_cur)
            if f.tell() != sub_cur: raise ValueError(f'seek failed? {f.tell()} != {sub_cur}')
            sub_n, sub_type = print_atom_headers(f, verbose=False, pre_label=pre_label+atom_type+ ' / ')
            if sub_n == 0: break
            sub_cur += sub_n
            if sub_cur >= sub_end: break
            print('')

    return n, atom_type


def print_atoms(filename, verbose=False):
    with open(filename, 'rb') as f:
        f.seek(0, 2)
        file_size = f.tell()
        print('file size : 0x%010X' % (file_size))
        print('')

        cur = 0
        while True:
            f.seek(cur)
            if f.tell() != cur: raise ValueError(f'seek failed? {f.tell()} != {cur}')
            n, _ = print_atom_headers(f, verbose=verbose)
            print('size : 0x%X' % (n))
            if n == 0: break
            cur += n
            if cur >= file_size: break
            print('')


# ## extracting `moov` as a reference

def read_atom_head(f):
    cur = f.tell()
    buf = f.read(8)

    n = struct.unpack('>I', buf[:4])[0]
    atom_type = str(buf[4:], 'utf-8')

    buf2 = None
    if n == 1:
        # decode 64-bit size
        buf2 = f.read(8)
        n = struct.unpack('>Q', buf2)[0]

    del buf
    del buf2
    gc.collect()

    return n, atom_type


def extract_moov(src_filename, dst_filename, n_chunk=65536, verbose=False):
    with open(src_filename, 'rb') as f_src,        open(dst_filename, 'wb') as f_dst:

        f_src.seek(0, 2)
        src_end = f_src.tell()

        # look for 'moov'
        src_cur = 0
        while True:
            f_src.seek(src_cur)
            if f_src.tell() != src_cur: raise ValueError(f'seek failed? {f_src.tell()} != {src_cur}')

            n, atom_type = read_atom_head(f_src)
            if atom_type == 'moov': break
            src_cur += n

        # 'moov' is found
        moov_start = src_cur

        # copy moov
        f_src.seek(moov_start)
        if f_src.tell() != moov_start: raise ValueError(f'seek failed? {f_src.tell()} != {moov_start}')

        if verbose:
            it_moov = tqdm(range(moov_start, src_end, n_chunk))
        else:
            it_moov = range(moov_start, src_end, n_chunk)
        #for src_cur in tqdm(range(moov_start, src_end, n_chunk)):
        for src_cur in it_moov:
            f_dst.write(f_src.read(n_chunk))
        if src_end - src_cur > 0:
            f_dst.write(f_src.read(src_end - src_cur))


# ## regenerating sample tables from `mdat`

def is_aac_header(buf, frame_length):
    cpe = (buf[0] & 0b11100000) >> 5
    element_instance_tag = (buf[0] & 0b00011110) >> 1
    common_window = (buf[0] & 0b00000001)

    if cpe != 0b001 or element_instance_tag != 0b0000:
        # not stereo

        # print('not stereo')
        return False
    elif common_window == 1:
        window_sequence = (buf[1] & 0b01100000) >> 5

        if window_sequence != 0b10:
            # common case
            always_0 = (buf[1] & 0b10000000) >> 7
            maxsfb = ((buf[1] & 0b00001111) << 2) | ((buf[2] & 0b11000000) >> 6)
            predictor = (buf[2] & 0b00100000) >> 5
            gain = ((buf[2] & 0b00000111) << 5) | ((buf[3] & 0b11111000) >> 3)
            inc = (buf[4] & 0b01111100) >> 2

            if always_0 == 0 and 40 <= maxsfb and maxsfb <= 51 and predictor == 0 and 100 <= gain and gain <= 228 and inc != 0:
                print('common case')
                return True
            else:
                # print('not common case')
                return False
        else:
            # eight-short-sequence
            always_10 = (buf[1] & 0b01100000) >> 5
            maxsfb = (buf[1] & 0b00001111)
            gain = ((buf[3] & 0b01111111) << 1) | ((buf[4] & 0b10000000) >> 7)

            if always_10 == 0b10 and maxsfb >= 8 and 100 <= gain and gain <= 200:
                print('eight-short-sequence')
                return True
            else:
                # print('not eight-short-sequence')
                return False
    else:
        pulse_data = (buf[3] & 0b00001000) >> 3
        tns_data = (buf[3] & 0b00000100) >> 2
        gain_control_data = (buf[3] & 0b00000010) >> 1

        if pulse_data != 0 or tns_data != 0 or gain_control_data != 0:
            maxsfb = ((buf[2] & 0b00001111) << 2) | ((buf[3] & 0b11000000) >> 6)

            if maxsfb != 0:
                gain = buf[1]
                always_0 = (buf[2] & 0b10000000) >> 7

                if 100 <= gain and gain <= 200 and always_0 == 0:
                    window_sequence = (buf[2] & 0b01100000) >> 5

                    if window_sequence != 0b10:
                        # multiple windows

                        if 48 <= maxsfb and maxsfb <= 51:
                            print('multiple windows')
                            return True
                        else:
                            # print('not multiple windows')
                            return False
                    else:
                        # multiple windows and eight-short-sequence
                        maxsfb = (buf[2] & 0b00001111)

                        if maxsfb >= 8:
                            print('multiple windows and eight-short-sequence')
                            return True
                        else:
                            # print('not multiple windows and eight-short-sequence')
                            return False
                else:
                    # print('not multiple windows type')
                    return False
            else:
                # degenerated block

                if frame_length == 7:
                    print('degenerated block')
                    return True
                else:
                    # print('not degenerated block')
                    return False
        else:
            # degenerated block with padding
            # gain = ((buf[1] & 0b01111111) << 1) | ((buf[2] & 0b10000000) >> 7)
            maxsfb = ((buf[2] & 0b00000111) << 3) | ((buf[3] & 0b11100000) >> 5)
            maxsfb2 = (buf[5] & 0b00011111)

            if maxsfb == 0 and maxsfb2 == 0:
                print('degenerated block with padding')
                return True
            else:
                # print('not degenerated block with padding')
                return False


def recover_sample_tables_from_mdat_fast(filename, verbose=False):
    mov_table = []
    aac_table = []

    with open(filename, 'rb') as f_in:

        # look for 'mdat'
        src_cur = 0
        while True:
            f_in.seek(src_cur)
            if f_in.tell() != src_cur: raise ValueError(f'seek failed? {f_in.tell()} != {src_cur}')

            n, atom_type = read_atom_head(f_in)
            if atom_type == 'mdat': break
            src_cur += n

        # 'mdat' is found
        mdat_start = src_cur
        if n == 0:
            # mdat from impcomplete mp4 file
            f_in.seek(0, 2)
            mdat_end = f_in.tell()
            # seek the data_start position
            # 8 bytes for the header PLUS 8 bytes for the reserved space of the size
            # f_in.seek(src_cur + 16)
            f_in.seek(src_cur)
            buf = f_in.read(4)
            head = struct.unpack('>I', buf)[0]
            if head == 1:
                f_in.seek(src_cur + 16)
            else:
                f_in.seek(src_cur + 8)
        else:
            mdat_end   = src_cur + n

        # n = 0
        # while True:
        #     cur = f_in.tell()
        #     if cur >= mdat_end: break

        #     # buf = f_in.read(4)
        #     buf = f_in.read(6)

        #     # if buf[0] != 0xFF or buf[1] != 0xF1 or buf[2] != 0x4C or (buf[3] & 0b11111100) != 0x80:
        #     # if buf[0] != 0x21 or (buf[1] & 0b11101111) != 0x0A or (buf[2] & 0b11111110) != 0x14:
        #     if buf[0] != 0x21:
        #         # h264 chunk
        #         frame_length = struct.unpack('>I', buf)[0] + 4
        #         if cur+frame_length >= mdat_end: break

        #         if verbose: print(f'{n}: [mov] {cur}, {frame_length}')
        #         mov_table.append((cur, frame_length))
        #         f_in.seek(cur+frame_length)
        #     else:
        #         # buf_2 = f_in.read(2)

        #         # from https://wiki.multimedia.cx/index.php/ADTS
        #         # AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
        #         # 0th-byte 1st      2nd      3rd      4th      5th      6th      (7th      8th     )
        #         # 0xFF     0xF1     0x4C     0X80 -- typical case for Insta360 ONE-X
        #         # M 13 frame length, this value must include 7 or 9 bytes of header length
        #         #   FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
        #         # frame_length = ((buf[3] & 0b11) << 11) | (buf_2[0] << 3) | (buf_2[1] >> 5)
        #         # if cur+frame_length >= mdat_end: break

        #         frame_length = 0
        #         while True:
        #             if cur + frame_length >= mdat_end:
        #                 break

        #             frame_length_2 = struct.unpack('>I', buf)[0] + 4
        #             if frame_length_2 < 1048576 and cur + frame_length + frame_length_2 <= mdat_end:
        #                 f_in.seek(cur + frame_length + frame_length_2)
        #                 buf_2 = f_in.read(3)
        #                 if buf_2[0] == 0x21:# and (buf_2[1] & 0b11101111) == 0x0A and (buf_2[2] & 0b11111110) == 0x14:
        #                     # frame_length -= 6
        #                     break

        #             # flag = (buf[0] == 0x21)# and (buf[1] & 0b11101111) == 0x0A and (buf[2] & 0b11111110) == 0x14)
        #             # if frame_length > 0 and flag:
        #             #     break

        #             frame_length += 1
        #             f_in.seek(cur + frame_length)
        #             buf = f_in.read(4)

        #         if verbose: print(f'{n}: [aac] {cur}, {frame_length}')
        #         aac_table.append((cur, frame_length))
        #         f_in.seek(cur+frame_length)

        #     n += 1
        #     # frame_length = struct.unpack('>I', buf)[0] + 4
        #     # if cur + frame_length > mdat_end:
        #     #     break
        #     # f_in.seek(cur + frame_length)
        #     # buf_2 = f_in.read(4)
        #     # if buf_2[0] == 0x21:
        #     #     if verbose:
        #     #         print(f'{n}: [mov] {cur}, {frame_length}')
        #     #     mov_table.append((cur, frame_length))
        #     #     if verbose:
        #     #         print(f'{n}: [aac] {cur}, {frame_length}')
        #     #     aac_table.append((cur, frame_length_2))
        #     #     n += 1
        #     #     frame_length_2 = 0
        #     # else:
        #     #     while True:
        #     #         cur += 1
        #     #         if cur > mdat_end or
        #     #         frame_length_2 += 1
        #     #     frame_length_2 = struct.unpack('>I', buf_2)[0] + 4
        #     #     if cur + frame_length + frame_length_2 > mdat_end:
        #     #         break
        #     #     f_in.seek(cur + frame_length + frame_length_2)
        #     #     buf_3 = f_in.read(4)
        #     #     if buf_3 = f_in.read(1)
        #     #     frame_length_2 += 1
        #     #     f_in.seek(cur + 1)

        n = 0
        while True:
            cur = f_in.tell()
            if cur >= mdat_end: break

            # buf = f_in.read(4)
            buf = f_in.read(6)

            # if buf[0] != 0xFF or buf[1] != 0xF1 or buf[2] != 0x4C or (buf[3] & 0b11111100) != 0x80:
            if struct.unpack('>I', buf[:4])[0] == 2 and buf[4] == 0x09 and buf[5] == 0xF0:
                # h264 chunk
                # frame_length = struct.unpack('>I', buf)[0] + 4
                # if cur+frame_length >= mdat_end: break
                frame_length = 6
                while True:
                    buf = f_in.read(4)
                    if (buf[0] & 0b11111110) == 0x20: break
                    frame_length += struct.unpack('>I', buf)[0] + 4
                    if cur+frame_length >= mdat_end: break
                    f_in.seek(cur + frame_length)

                # if verbose: print(f'{n}: [mov] {cur}, {frame_length}')
                mov_table.append((cur, frame_length))
                f_in.seek(cur+frame_length)
            else:
                # buf_2 = f_in.read(2)

                # from https://wiki.multimedia.cx/index.php/ADTS
                # AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
                # 0th-byte 1st      2nd      3rd      4th      5th      6th      (7th      8th     )
                # 0xFF     0xF1     0x4C     0X80 -- typical case for Insta360 ONE-X
                # M 13 frame length, this value must include 7 or 9 bytes of header length
                #   FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
                # frame_length = ((buf[3] & 0b11) << 11) | (buf_2[0] << 3) | (buf_2[1] >> 5)
                # if cur+frame_length >= mdat_end: break

                frame_length = 6
                while True:
                    buf = f_in.read(6)
                    # if is_aac_header(buf, frame_length): break
                    if struct.unpack('>I', buf[:4])[0] == 2 and buf[4] == 0x09 and buf[5] == 0xF0: break
                    frame_length += 1
                    if cur + frame_length >= mdat_end: break
                    f_in.seek(cur + frame_length)

                if verbose: print(f'{n}: [aac] {cur}, {frame_length}')
                aac_table.append((cur, frame_length))
                f_in.seek(cur+frame_length)

            n += 1

    return mov_table, aac_table


# ## rebuilding `moov` from sample tables

def copy_atom_box(target_type, target_size, f_src, f_dst, only_header=True):
    src_size, atom_type = read_atom_head(f_src)
    if atom_type != target_type: raise ValueError(f'{target_type} not found but {atom_type}')

    if target_size is None: target_size = src_size

    f_dst.write(struct.pack('>I', target_size))
    f_dst.write(target_type.encode('utf-8'))

    if not only_header:
        f_dst.write(f_src.read(target_size-8))

    return src_size


def recover_moov_from_sample_tables(
    moov_const,
    ref_filename, dst_filename,
    mov_table, aac_table,
    full_copy=True, n_chunk=65536,
    verbose=False,
    ):

    # constants
    # mov_sample_duration = 1001
    # aac_sample_duration = 1024

    # mvhd_timescale = 48000
    # mov_timescale = 30000
    # aac_timescale = 48000
    mov_sample_duration = moov_const[0]
    aac_sample_duration = moov_const[1]
    mvhd_timescale = moov_const[2]
    mov_timescale = moov_const[3]
    aac_timescale = moov_const[4]

    n_mov_table = len(mov_table)
    n_aac_table = len(aac_table)

    mov_mdhd_duration = n_mov_table * mov_sample_duration
    aac_mdhd_duration = n_aac_table * aac_sample_duration
    mov_tkhd_duration = int(mov_mdhd_duration * mvhd_timescale / mov_timescale)
    aac_tkhd_duration = int(aac_mdhd_duration * mvhd_timescale / aac_timescale)
    if mov_tkhd_duration >= aac_tkhd_duration:
        mvhd_duration = mov_tkhd_duration
    else:
        mvhd_duration = aac_tkhd_duration


    sample_size_tables = []
    sample_size_tables.append([s for o, s in mov_table])
    sample_size_tables.append([s for o, s in aac_table])

    chunk_offset_tables = []
    chunk_offset_tables.append([o for o, s in mov_table])
    chunk_offset_tables.append([o for o, s in aac_table])


    # moov structure is assumed to be in the fixed format (for now)
    mov_stsz_size = len(sample_size_tables[0])* 4 + 20
    aac_stsz_size = len(sample_size_tables[1])* 4 + 20

    # mov_co64_size = len(chunk_offset_tables[0])* 8 + 16
    # aac_co64_size = len(chunk_offset_tables[1])* 8 + 16
    mov_stco_size = len(chunk_offset_tables[0])* 4 + 16
    aac_stco_size = len(chunk_offset_tables[1])* 4 + 16

    mov_stss_size = ((len(sample_size_tables[0])-1)//150 + 1)* 4 + 16


    # mov_stbl_size = 8 + 0x141 + 0x18 + 0x1C + mov_stsz_size + mov_co64_size + mov_stss_size
    # aac_stbl_size = 8 + 0x82  + 0x18 + 0x1C + aac_stsz_size + aac_co64_size
    mov_stbl_size = 8 + 0xAB + 0x18 + 0x1C + mov_stsz_size + mov_stco_size + mov_stss_size
    aac_stbl_size = 8 + 0x67  + 0x18 + 0x14B75C + aac_stsz_size + aac_stco_size + 0x1A + 0x1C

    mov_minf_size = 8 + 0x14 + 0x24 + mov_stbl_size
    aac_minf_size = 8 + 0x10 + 0x24 + aac_stbl_size

    # mov_mdia_size = 8 + 0x20 + 0x2E + mov_minf_size
    # aac_mdia_size = 8 + 0x20 + 0x2E + aac_minf_size
    mov_mdia_size = 8 + 0x20 + 0x2D + mov_minf_size
    aac_mdia_size = 8 + 0x20 + 0x2D + aac_minf_size

    # mov_trak_size = 8 + 0x5C + 0x24 + mov_mdia_size + 0x618
    # aac_trak_size = 8 + 0x5C + 0x24 + aac_mdia_size
    mov_trak_size = 8 + 0x5C + 0x24 + mov_mdia_size
    aac_trak_size = 8 + 0x5C + 0x30 + aac_mdia_size

    # moov_size = 8 + 0x6C + 0x73 + mov_trak_size + aac_trak_size
    moov_size = 8 + 0x6C + 0x62 + mov_trak_size + aac_trak_size


    with open(ref_filename, 'rb') as f_moov,        open(dst_filename, 'wb') as f_dst:

        f_moov.seek(0, 2)
        file_size = f_moov.tell()

        cur = 0
        f_moov.seek(cur)
        if f_moov.tell() != cur: raise ValueError(f'seek failed? {f_moov.tell()} != {cur}')

        # moov
        copy_atom_box('moov', moov_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('mvhd', None, f_moov, f_dst, only_header=False)
        # mvhd : duration = mov_tkhd_duration
        n = copy_atom_box('mvhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:12])
        f_dst.write(struct.pack('>I', mvhd_timescale))
        #duration           = struct.unpack('>I', buf[16:20])[0]
        # f_dst.write(struct.pack('>I', mov_tkhd_duration))
        f_dst.write(struct.pack('>I', mvhd_duration))
        # the rest is unchanged
        f_dst.write(buf[20:])
        #...
        #next_track_id      = struct.unpack('>I', buf[96:100])[0]
        if n != (100+8): raise ValueError(f'ERROR: mov tkhd box size is not 108 but {n}')

        #copy_atom_box('udta', None, f_moov, f_dst, only_header=False)

        # movie track
        # trak
        copy_atom_box('trak', mov_trak_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('tkhd', None, f_moov, f_dst, only_header=False)
        # tkhd : duration = mov_tkhd_duration
        n = copy_atom_box('tkhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:20])
        #duration           = struct.unpack('>I', buf[20:24])[0]
        f_dst.write(struct.pack('>I', mov_tkhd_duration))
        # the rest is unchanged
        f_dst.write(buf[24:76])
        #...
        #track_height       = struct.unpack('>I', buf[80:84])[0]
        if n != (84+8): raise ValueError(f'ERROR: mov tkhd box size is not 92 but {n}')

        # copy_atom_box('edts', None, f_moov, f_dst, only_header=False)
        copy_atom_box('edts', None, f_moov, f_dst, only_header=True)

        n = copy_atom_box('elst', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:8])
        f_dst.write(struct.pack('>I', mov_tkhd_duration))
        f_dst.write(buf[12:])
        if n != (20+8): raise ValueError(f'ERROR: mov elst box size is not 28 but {n}')

        # mdia
        copy_atom_box('mdia', mov_mdia_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('mdhd', None, f_moov, f_dst, only_header=False)
        # mdhd : duration = mov_mdhd_duration
        n = copy_atom_box('mdhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:16])
        #duration           = struct.unpack('>I', buf[16:20])[0]
        f_dst.write(struct.pack('>I', mov_mdhd_duration))
        # the rest is unchanged
        f_dst.write(buf[20:])
        #...
        #quality            = struct.unpack('>H', buf[22:24])[0]
        if n != (24+8): raise ValueError(f'ERROR: mov mdhd box size is not 32 but {n}')

        copy_atom_box('hdlr', None, f_moov, f_dst, only_header=False)

        # minf
        copy_atom_box('minf', mov_minf_size, f_moov, f_dst, only_header=True)
        copy_atom_box('vmhd', None, f_moov, f_dst, only_header=False)
        copy_atom_box('dinf', None, f_moov, f_dst, only_header=False)

        # stbl
        copy_atom_box('stbl', mov_stbl_size, f_moov, f_dst, only_header=True)
        copy_atom_box('stsd', None, f_moov, f_dst, only_header=False)

        #copy_atom_box('stts', None, f_moov, f_dst, only_header=False)
        # stts : sample_count = n_mov_table
        n = copy_atom_box('stts', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 1)) # n_entries
        f_dst.write(struct.pack('>I', n_mov_table)) # sample_count
        f_dst.write(struct.pack('>I', mov_sample_duration)) # sample_duration

        # stss
        n = copy_atom_box('stss', mov_stss_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        mov_stss_entries = (len(sample_size_tables[0])-1)//150 + 1
        f_dst.write(struct.pack('>I', mov_stss_entries)) # n_entries
        ss = 1
        for i_ss in range(mov_stss_entries):
            f_dst.write(struct.pack('>I', ss))
            ss += 150

        copy_atom_box('stsc', None, f_moov, f_dst, only_header=False)

        # stsz
        n = copy_atom_box('stsz', mov_stsz_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 0)) # sample_size
        f_dst.write(struct.pack('>I', len(sample_size_tables[0]))) # n_entries
        for sz in sample_size_tables[0]:
            f_dst.write(struct.pack('>I', sz))

        # co64
        # n = copy_atom_box('co64', mov_co64_size, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', len(chunk_offset_tables[0]))) # n_entries
        # for co in chunk_offset_tables[0]:
        #     f_dst.write(struct.pack('>Q', co))
        n = copy_atom_box('stco', mov_stco_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', len(chunk_offset_tables[0]))) # n_entries
        for co in chunk_offset_tables[0]:
            f_dst.write(struct.pack('>I', co))

        # uuid
        # copy_atom_box('uuid', None, f_moov, f_dst, only_header=False)


        # audio track
        # trak
        copy_atom_box('trak', aac_trak_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('tkhd', None, f_moov, f_dst, only_header=False)
        # tkhd : duration = aac_mdhd_duration
        n = copy_atom_box('tkhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:20])
        #duration           = struct.unpack('>I', buf[20:24])[0]
        f_dst.write(struct.pack('>I', aac_mdhd_duration))
        # the rest is unchanged
        f_dst.write(buf[24:])
        #...
        #track_height       = struct.unpack('>I', buf[80:84])[0]
        if n != (84+8): raise ValueError(f'ERROR: audio tkhd box size is not 92 but {n}')

        # copy_atom_box('edts', None, f_moov, f_dst, only_header=False)
        copy_atom_box('edts', None, f_moov, f_dst, only_header=True)

        n = copy_atom_box('elst', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:12])
        f_dst.write(struct.pack('>I', aac_tkhd_duration))
        f_dst.write(buf[16:])
        if n != (32+8): raise ValueError(f'ERROR: mov elst box size is not 40 but {n}')

        # mdia
        copy_atom_box('mdia', aac_mdia_size, f_moov, f_dst, only_header=True)

        #copy_atom_box('mdhd', None, f_moov, f_dst, only_header=False)
        # mdhd : duration = aac_mdhd_duration
        n = copy_atom_box('mdhd', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        # the following is unchanged
        f_dst.write(buf[:16])
        #duration           = struct.unpack('>I', buf[16:20])[0]
        f_dst.write(struct.pack('>I', aac_mdhd_duration))
        # the rest is unchanged
        f_dst.write(buf[20:])
        #...
        #quality            = struct.unpack('>H', buf[22:24])[0]
        if n != (24+8): raise ValueError(f'ERROR: audio mdhd box size is not 32 but {n}')

        copy_atom_box('hdlr', None, f_moov, f_dst, only_header=False)

        # minf
        copy_atom_box('minf', aac_minf_size, f_moov, f_dst, only_header=True)
        copy_atom_box('smhd', None, f_moov, f_dst, only_header=False)
        copy_atom_box('dinf', None, f_moov, f_dst, only_header=False)

        # stbl
        copy_atom_box('stbl', aac_stbl_size, f_moov, f_dst, only_header=True)
        copy_atom_box('stsd', None, f_moov, f_dst, only_header=False)

        #copy_atom_box('stts', None, f_moov, f_dst, only_header=False)
        # stts : sample_count = n_aac_table
        n = copy_atom_box('stts', None, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 1)) # n_entries
        f_dst.write(struct.pack('>I', n_aac_table)) # sample_count
        f_dst.write(struct.pack('>I', aac_sample_duration)) # sample_duration

        # copy_atom_box('stsc', None, f_moov, f_dst, only_header=False)
        cur = f_dst.tell()
        n = copy_atom_box('stsc', None, f_moov, f_dst, only_header=True)
        f_dst.seek(cur)
        f_dst.write(struct.pack('>I', 28))
        f_dst.seek(cur+8)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 1)) # n_entries
        f_dst.write(struct.pack('>I', 1))
        f_dst.write(struct.pack('>I', 1))
        f_dst.write(struct.pack('>I', 1))

        # stsz
        n = copy_atom_box('stsz', aac_stsz_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', 0)) # sample_size
        f_dst.write(struct.pack('>I', len(sample_size_tables[1]))) # n_entries
        for sz in sample_size_tables[1]:
            f_dst.write(struct.pack('>I', sz))

        # co64
        # n = copy_atom_box('co64', aac_co64_size, f_moov, f_dst, only_header=True)
        # buf = f_moov.read(n-8)
        # f_dst.write(buf[:4]) # version + flags
        # f_dst.write(struct.pack('>I', len(chunk_offset_tables[1]))) # n_entries
        # for co in chunk_offset_tables[1]:
        #     f_dst.write(struct.pack('>Q', co))
        n = copy_atom_box('stco', aac_stco_size, f_moov, f_dst, only_header=True)
        buf = f_moov.read(n-8)
        f_dst.write(buf[:4]) # version + flags
        f_dst.write(struct.pack('>I', len(chunk_offset_tables[1]))) # n_entries
        for co in chunk_offset_tables[1]:
            f_dst.write(struct.pack('>I', co))

        copy_atom_box('sgpd', None, f_moov, f_dst, only_header=False)

        copy_atom_box('sbgp', None, f_moov, f_dst, only_header=False)

        copy_atom_box('udta', None, f_moov, f_dst, only_header=False)

        if not full_copy: return

        # just copy the rest of reference moov file
        moov_cur = f_moov.tell()
        f_moov.seek(0, 2)
        moov_size = f_moov.tell()
        f_moov.seek(moov_cur)
        if f_moov.tell() != moov_cur: raise ValueError(f'seek failed? {f_moov.tell()} != {moov_cur}')
        if verbose:
            it_moov = tqdm(range(moov_cur, moov_size, n_chunk))
        else:
            it_moov = range(moov_cur, moov_size, n_chunk)
        #for moov_cur in tqdm(range(moov_cur, moov_size, n_chunk)):
        for moov_cur in it_moov:
            f_dst.write(f_moov.read(n_chunk))
        if moov_size - moov_cur > 0:
            f_dst.write(f_moov.read(moov_size - moov_cur))


# ## merging the recovered `moov`

def merge_moov(
    src_filename,
    moov_filename,
    dst_filename,
    n_chunk=65536,
    verbose=False):

    with open(src_filename, 'rb') as f_src,        open(moov_filename, 'rb') as f_moov,        open(dst_filename, 'wb') as f_dst:

        f_src.seek(0, 2)
        file_size = f_src.tell()

        cur = 0
        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')

        # ftyp
        n, atom_type = read_atom_head(f_src)
        if atom_type != 'ftyp': raise ValueError('ftyp not found')

        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')
        buf = f_src.read(n)
        f_dst.write(buf)
        if verbose:
            print_binaries(buf)
        cur += n

        cur += 16
        # mdat
        n, atom_type = read_atom_head(f_src)
        if atom_type != 'mdat': raise ValueError('mdat not found')

        if n != 0: raise ValueError('size would be zero...')

        # fixed mdat header
        if verbose:
            print_binaries(struct.pack('>Icccc', 1, b'm', b'd', b'a', b't'))
            print_binaries(struct.pack('>Q', file_size-0x20))
        f_dst.write(struct.pack('>Icccc', 1, b'm', b'd', b'a', b't'))
        f_dst.write(struct.pack('>Q', file_size-0x20))

        cur += 16
        f_src.seek(cur)
        if f_src.tell() != cur: raise ValueError(f'seek failed? {f_src.tell()} != {cur}')
        if verbose:
            it_cur = tqdm(range(cur, file_size, n_chunk))
        else:
            it_cur = range(cur, file_size, n_chunk)
        #for cur in tqdm(range(cur, file_size, n_chunk)):
        for cur in it_cur:
            f_dst.write(f_src.read(n_chunk))
        if file_size - cur > 0:
            f_dst.write(f_src.read(file_size - cur))
        print('')

        # search moov
        f_moov.seek(0, 2)
        moov_size = f_moov.tell()
        if verbose:
            print(f'moov_size: {moov_size}')

        moov_cur = 0
        f_moov.seek(moov_cur)
        if f_moov.tell() != moov_cur: raise ValueError(f'seek failed? {f_moov.tell()} != {moov_cur}')
        n, atom_type = read_atom_head(f_moov)
        if atom_type != 'moov': raise ValueError(f'something is wrong...')

        # copy moov
        f_moov.seek(moov_cur)
        if f_moov.tell() != moov_cur: raise ValueError(f'seek failed? {f_moov.tell()} != {moov_cur}')
        if verbose:
            it_moov = tqdm(range(moov_cur, moov_size, n_chunk))
        else:
            it_moov = range(moov_cur, moov_size, n_chunk)
        #for moov_cur in tqdm(range(moov_cur, moov_size, n_chunk)):
        for moov_cur in it_moov:
            f_dst.write(f_moov.read(n_chunk))
        if moov_size - moov_cur > 0:
            f_dst.write(f_moov.read(moov_size - moov_cur))


# # main program to recover corrupted MP4

def finsta360(
    moov_const,
    src_filename,
    ref_filename=None,
    dst_filename=None,
    keep_temp=False,
    verbose=False):

    if ref_filename is None:
        # check mode
        print_atoms(src_filename)
        return

    # temporary files
    ref_moov_filename = 'finsta360_ref.moov'
    new_moov_filename = 'finsta360_new.moov'

    # 1) extract reference moov
    print('')
    print('########################################')
    print(f'# 1) extracting reference moov from\n\t{ref_filename}')
    extract_moov(ref_filename, ref_moov_filename)
    if verbose:
        print_atoms(ref_moov_filename)

    # 2) regenerate sample tables from mdat
    print('')
    print('########################################')
    print(f'# 2) regenerate sample tables from mdat in\n\t{src_filename}')
    mov_table, aac_table = recover_sample_tables_from_mdat_fast(
        src_filename,
        verbose=True)
    sys.exit()
    if verbose:
        print(f'number of samples (movie) : {len(mov_table)}')
        print(f'number of samples (audio) : {len(aac_table)}')
        # constants
        # mov_sample_duration = 1001
        # aac_sample_duration = 1024

        # mvhd_timescale = 48000
        # mov_timescale = 30000
        # aac_timescale = 48000
        mov_sample_duration = moov_const[0]
        aac_sample_duration = moov_const[1]
        mvhd_timescale = moov_const[2]
        mov_timescale = moov_const[3]
        aac_timescale = moov_const[4]

        n_mov_table = len(mov_table)
        n_aac_table = len(aac_table)

        mov_mdhd_duration = n_mov_table * mov_sample_duration
        aac_mdhd_duration = n_aac_table * aac_sample_duration
        mov_tkhd_duration = int(mov_mdhd_duration * mvhd_timescale / mov_timescale)

        # mvhd
        mvhd_duration_sec = mov_tkhd_duration / mvhd_timescale
        print(f'mvhd duration  : {mvhd_duration_sec} sec / {mvhd_duration_sec/60} min')
        # movie mdhd
        mov_duration_sec = mov_mdhd_duration / mov_timescale
        print(f'movie duration : {mov_duration_sec} sec / {mov_duration_sec/60} min')
        # audio mdhd
        aac_duration_sec = aac_mdhd_duration / aac_timescale
        print(f'audio duration : {aac_duration_sec} sec / {aac_duration_sec/60} min')

    # 3) rebuilding moov from the sample tables
    print('')
    print('########################################')
    print(f'# 3) rebuilding moov from the sample tables')
    recover_moov_from_sample_tables(
        moov_const,
        ref_moov_filename,
        new_moov_filename,
        mov_table, aac_table,
        full_copy=True,
    )
    if verbose:
        print_atoms(new_moov_filename)

    if dst_filename is None:
        # test mode
        if not keep_temp:
            os.remove(ref_moov_filename)
            os.remove(new_moov_filename)
        return

    # 4) merging the rebuilt moov into the source
    print('')
    print('########################################')
    print(f'# 4) merging the rebuilt moov into\n\t{src_filename}\nas\n\t{dst_filename}')
    merge_moov(
        src_filename,
        new_moov_filename,
        dst_filename,
    )


    if not keep_temp:
        os.remove(ref_moov_filename)
        os.remove(new_moov_filename)


def usage():
    print('finsta360.py : to finalize incomplete MP4 of Insta360 ONE-X')
    print('https://github.com/kichiki/finsta360')
    print('USAGE: finsta360.py [options]')
    print('\t-s file : source file, that is, corrupted mp4 (insv) file')
    print('\t-r file : complete mp4 (insv) file as a reference')
    print('\t-o file : output recovered mp4 (insv) file')
    print('\t-v      : to set verbose mode')
    print('\t-k      : to keep temporary files')
    print('\t          (reference and recovered moov files, finsta360*.moov)')
    print('If you provide only source file (-s), program prints the metadata')
    print('If you dont provide output file (-o), program just runs without writing')
    sys.exit ()


if __name__ == '__main__':
    # src_filename = None
    # ref_filename = None
    # dst_filename = None
    # verbose = False
    # keep_temp = False
    # i = 1
    # while i < len(sys.argv):
    #     if sys.argv[i] == '-s':
    #         src_filename = sys.argv[i+1]
    #         i += 2
    #     elif sys.argv[i] == '-r':
    #         ref_filename = sys.argv[i+1]
    #         i += 2
    #     elif sys.argv[i] == '-o':
    #         dst_filename = sys.argv[i+1]
    #         i += 2
    #     elif sys.argv[i] == '-v':
    #         verbose = True
    #         i += 1
    #     elif sys.argv[i] == '-k':
    #         keep_temp = True
    #         i += 1
    #     else:
    #         usage()
    #         break

    # if src_filename is None:
    #     print(f'you must provie source file {src_filename}')
    #     usage()
    #     sys.exit()
    # if not os.path.exists(src_filename):
    #     print(f'source file {src_filename} does not exist')
    #     sys.exit()
    # if not ref_filename is None and not os.path.exists(ref_filename):
    #     print(f'reference file {ref_filename} does not exist')
    #     sys.exit()
    # if not dst_filename is None and os.path.exists(dst_filename):
    #     print(f'output file {dst_filename} already exists')
    #     sys.exit()


    # # constants
    # mov_sample_duration = 3000
    # aac_sample_duration = 1024

    # mvhd_timescale = 1000
    # mov_timescale = 90000
    # aac_timescale = 48000

    # moov_const = (mov_sample_duration,
    #               aac_sample_duration,
    #               mvhd_timescale,
    #               mov_timescale,
    #               aac_timescale)


    # finsta360(
    #     moov_const,
    #     src_filename,
    #     ref_filename,
    #     dst_filename,
    #     keep_temp,
    #     verbose)


    # sys.exit()

    if len(sys.argv) != 3:
        print(f'Usage: python {sys.argv[0]} in.mp4 out.aac')
        sys.exit(1)

    filename_in = sys.argv[1]
    filename_out = sys.argv[2]

    _, aac_table = recover_sample_tables_from_mdat_fast(filename_in, verbose=True)

    with open(filename_in, 'rb') as f_in, open(filename_out, 'wb') as f_out:
        # f_out.seek(7)
        # num = 0
        for i in range(len(aac_table)):
            offset, length = aac_table[i]
            f_in.seek(offset)
            for j in range(length):
                f_out.write(f_in.read(1))
                # num += 1

        # MACHIGAI
        # frame_length = 7 + num
        # buf_int = [0] * 7
        # buf_int[0] = 0xFF
        # buf_int[1] = 0xF1
        # buf_int[2] = 0x4C
        # buf_int[3] = 0b10000000 | ((frame_length & 0b11_00000000_000) >> 11)
        # buf_int[4] = ((frame_length & 0b00_11111111_000) >> 3)
        # buf_int[5] = 0b00011111 | ((frame_length & 0b00_00000000_111) << 5)
        # buf_int[6] = 0b11111100
        # f_out.seek(0)
        # f_out.write(bytes(buf_int))

付録2

（正常な）MP4からサンプルの最初の6バイトをビット列で表示するコードです。AACのフレーム境界を判別できないかと作りました。stscを読み解く部分が大変でした。

chunk.py

#!/usr/bin/env python
from datetime import datetime
from datetime import timedelta
import os.path
import struct
import sys


def parse_box(f):
    buf = f.read(8)
    box_size = struct.unpack('>I', buf[:4])[0]
    box_type = str(buf[4:], 'utf-8')

    if box_size == 1:
        buf = f.read(8)
        box_size = struct.unpack('>Q', buf)[0]

    return box_size, box_type


def main(filename_in):
    containers = ('moov', 'trak', 'edts', 'mdia', 'minf', 'dinf', 'stbl')

    with open(filename_in, 'rb') as f_in:
        f_in.seek(0, 2)
        file_size = f_in.tell()

        cur = 0
        sc_table = []
        sz_table = []
        co_table = []
        while True:
            f_in.seek(cur)

            box_size, box_type = parse_box(f_in)
            if box_size == 0:
                box_size = file_size - cur

            if box_type in containers:
                box_size, box_type = parse_box(f_in)
                if box_size == 0:
                    box_size = file_size - cur
                cur += 8
            elif box_type == 'stsc':
                # Sample-to-Chunk Atoms
                buf = f_in.read(box_size - 8)

                version   = buf[0]
                flags     = buf[1:4]
                n_entries = struct.unpack('>I', buf[4:8])[0]

                for i in range(n_entries):
                    i0 = 8 + i*12
                    i1 = i0 + 12
                    if len(buf) < i1: break
                    first_chunk       = struct.unpack('>I', buf[i0:i0+4])[0]
                    samples_per_chunk = struct.unpack('>I', buf[i0+4:i0+8])[0]
                    sample_desc_id    = struct.unpack('>I', buf[i0+8:i0+12])[0]
                    sc_table.append((first_chunk, samples_per_chunk, sample_desc_id))
                    # print(f'{i:6d}: {(first_chunk, samples_per_chunk, sample_desc_id)}')
            elif box_type == 'stsz':
                #Sample Size Atoms
                buf = f_in.read(box_size - 8)

                version   = buf[0]
                flags     = buf[1:4]
                sample_size = struct.unpack('>I', buf[4:8])[0]
                n_entries = struct.unpack('>I', buf[8:12])[0]

                for i in range(n_entries):
                    i0 = 12 + i*4
                    i1 = i0 + 4
                    if len(buf) < i1: break
                    size = struct.unpack('>I', buf[i0:i1])[0]
                    sz_table.append(size)
                    # print(f'{i:6d}: {size}')
            elif box_type == 'stco':
                #Chunk Offset Atoms
                buf = f_in.read(box_size - 8)

                version   = buf[0]
                flags     = buf[1:4]
                n_entries = struct.unpack('>I', buf[4:8])[0]

                for i in range(n_entries):
                    i0 = 8 + i*4
                    i1 = i0 + 4
                    if len(buf) < i1: break
                    offset = struct.unpack('>I', buf[i0:i1])[0]
                    co_table.append(offset)
                    # print(f'{i:6d}: {offset}')
            elif box_type == 'co64':
                #64-bit chunk offset atoms
                buf = f_in.read(box_size - 8)

                version   = buf[0]
                flags     = buf[1:4]
                n_entries = struct.unpack('>I', buf[4:8])[0]

                for i in range(n_entries):
                    i0 = 8 + i*8
                    i1 = i0 + 8
                    if len(buf) < i1: break
                    offset = struct.unpack('>Q', buf[i0:i1])[0]
                    co_table.append(offset)
                    # print(f'{i:6d}: {offset}')

            if len(sc_table) != 0 and len(sz_table) != 0 and len(co_table) != 0:
                print('########################            ########################')
                i = 0
                l = 0
                while True:
                    m0, n, _ = sc_table[i]
                    if i + 1 < len(sc_table):
                        m1 = sc_table[i + 1][0]
                    else:
                        m1 = len(co_table) + 1

                    j = m0 - 1
                    while True:
                        offset = co_table[j]

                        k = 0
                        while True:
                            cur_temp = f_in.tell()
                            f_in.seek(offset)
                            buf = bytes([0x00, 0x00]) + f_in.read(6)
                            f_in.seek(cur_temp)
                            binary = struct.unpack('>Q', buf)[0]
                            mark = ' '
                            if sz_table[l] < 100: mark = 'v'
                            print(f'{mark}{offset:10d} {sz_table[l]:6d} {binary:059_b}')
                            offset += sz_table[l]
                            l += 1

                            k += 1
                            if k >= n:
                                break

                        j += 1
                        if j >= m1 - 1:
                            break

                    i += 1
                    if i >= len(sc_table):
                        break
                print('')

                sc_table = []
                sz_table = []
                co_table = []

            cur += box_size
            if cur >= file_size:
                break


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print(f'Usage: python {sys.argv[0]} in.mp4')
        sys.exit(1)

    filename_in = sys.argv[1]

    main(filename_in)