Home Reference Source

src/utils/mp4-tools.ts

  1. import { ElementaryStreamTypes } from '../loader/fragment';
  2. import { sliceUint8 } from './typed-array';
  3. import { utf8ArrayToStr } from '../demux/id3';
  4. import { logger } from '../utils/logger';
  5. import Hex from './hex';
  6. import type { PassthroughTrack, UserdataSample } from '../types/demuxer';
  7. import type { DecryptData } from '../loader/level-key';
  8.  
  9. const UINT32_MAX = Math.pow(2, 32) - 1;
  10. const push = [].push;
  11.  
  12. // We are using fixed track IDs for driving the MP4 remuxer
  13. // instead of following the TS PIDs.
  14. // There is no reason not to do this and some browsers/SourceBuffer-demuxers
  15. // may not like if there are TrackID "switches"
  16. // See https://github.com/video-dev/hls.js/issues/1331
  17. // Here we are mapping our internal track types to constant MP4 track IDs
  18. // With MSE currently one can only have one track of each, and we are muxing
  19. // whatever video/audio rendition in them.
  20. export const RemuxerTrackIdConfig = {
  21. video: 1,
  22. audio: 2,
  23. id3: 3,
  24. text: 4,
  25. };
  26.  
  27. export function bin2str(data: Uint8Array): string {
  28. return String.fromCharCode.apply(null, data);
  29. }
  30.  
  31. export function readUint16(buffer: Uint8Array, offset: number): number {
  32. const val = (buffer[offset] << 8) | buffer[offset + 1];
  33. return val < 0 ? 65536 + val : val;
  34. }
  35.  
  36. export function readUint32(buffer: Uint8Array, offset: number): number {
  37. const val = readSint32(buffer, offset);
  38. return val < 0 ? 4294967296 + val : val;
  39. }
  40.  
  41. export function readSint32(buffer: Uint8Array, offset: number): number {
  42. return (
  43. (buffer[offset] << 24) |
  44. (buffer[offset + 1] << 16) |
  45. (buffer[offset + 2] << 8) |
  46. buffer[offset + 3]
  47. );
  48. }
  49.  
  50. export function writeUint32(buffer: Uint8Array, offset: number, value: number) {
  51. buffer[offset] = value >> 24;
  52. buffer[offset + 1] = (value >> 16) & 0xff;
  53. buffer[offset + 2] = (value >> 8) & 0xff;
  54. buffer[offset + 3] = value & 0xff;
  55. }
  56.  
  57. // Find the data for a box specified by its path
  58. export function findBox(data: Uint8Array, path: string[]): Uint8Array[] {
  59. const results = [] as Uint8Array[];
  60. if (!path.length) {
  61. // short-circuit the search for empty paths
  62. return results;
  63. }
  64. const end = data.byteLength;
  65.  
  66. for (let i = 0; i < end; ) {
  67. const size = readUint32(data, i);
  68. const type = bin2str(data.subarray(i + 4, i + 8));
  69. const endbox = size > 1 ? i + size : end;
  70.  
  71. if (type === path[0]) {
  72. if (path.length === 1) {
  73. // this is the end of the path and we've found the box we were
  74. // looking for
  75. results.push(data.subarray(i + 8, endbox));
  76. } else {
  77. // recursively search for the next box along the path
  78. const subresults = findBox(data.subarray(i + 8, endbox), path.slice(1));
  79. if (subresults.length) {
  80. push.apply(results, subresults);
  81. }
  82. }
  83. }
  84. i = endbox;
  85. }
  86.  
  87. // we've finished searching all of data
  88. return results;
  89. }
  90.  
  91. type SidxInfo = {
  92. earliestPresentationTime: number;
  93. timescale: number;
  94. version: number;
  95. referencesCount: number;
  96. references: any[];
  97. };
  98.  
  99. export function parseSegmentIndex(sidx: Uint8Array): SidxInfo | null {
  100. const references: any[] = [];
  101.  
  102. const version = sidx[0];
  103.  
  104. // set initial offset, we skip the reference ID (not needed)
  105. let index = 8;
  106.  
  107. const timescale = readUint32(sidx, index);
  108. index += 4;
  109.  
  110. // TODO: parse earliestPresentationTime and firstOffset
  111. // usually zero in our case
  112. const earliestPresentationTime = 0;
  113. const firstOffset = 0;
  114.  
  115. if (version === 0) {
  116. index += 8;
  117. } else {
  118. index += 16;
  119. }
  120.  
  121. // skip reserved
  122. index += 2;
  123.  
  124. let startByte = sidx.length + firstOffset;
  125.  
  126. const referencesCount = readUint16(sidx, index);
  127. index += 2;
  128.  
  129. for (let i = 0; i < referencesCount; i++) {
  130. let referenceIndex = index;
  131.  
  132. const referenceInfo = readUint32(sidx, referenceIndex);
  133. referenceIndex += 4;
  134.  
  135. const referenceSize = referenceInfo & 0x7fffffff;
  136. const referenceType = (referenceInfo & 0x80000000) >>> 31;
  137.  
  138. if (referenceType === 1) {
  139. // eslint-disable-next-line no-console
  140. console.warn('SIDX has hierarchical references (not supported)');
  141. return null;
  142. }
  143.  
  144. const subsegmentDuration = readUint32(sidx, referenceIndex);
  145. referenceIndex += 4;
  146.  
  147. references.push({
  148. referenceSize,
  149. subsegmentDuration, // unscaled
  150. info: {
  151. duration: subsegmentDuration / timescale,
  152. start: startByte,
  153. end: startByte + referenceSize - 1,
  154. },
  155. });
  156.  
  157. startByte += referenceSize;
  158.  
  159. // Skipping 1 bit for |startsWithSap|, 3 bits for |sapType|, and 28 bits
  160. // for |sapDelta|.
  161. referenceIndex += 4;
  162.  
  163. // skip to next ref
  164. index = referenceIndex;
  165. }
  166.  
  167. return {
  168. earliestPresentationTime,
  169. timescale,
  170. version,
  171. referencesCount,
  172. references,
  173. };
  174. }
  175.  
  176. /**
  177. * Parses an MP4 initialization segment and extracts stream type and
  178. * timescale values for any declared tracks. Timescale values indicate the
  179. * number of clock ticks per second to assume for time-based values
  180. * elsewhere in the MP4.
  181. *
  182. * To determine the start time of an MP4, you need two pieces of
  183. * information: the timescale unit and the earliest base media decode
  184. * time. Multiple timescales can be specified within an MP4 but the
  185. * base media decode time is always expressed in the timescale from
  186. * the media header box for the track:
  187. * ```
  188. * moov > trak > mdia > mdhd.timescale
  189. * moov > trak > mdia > hdlr
  190. * ```
  191. * @param initSegment {Uint8Array} the bytes of the init segment
  192. * @return {InitData} a hash of track type to timescale values or null if
  193. * the init segment is malformed.
  194. */
  195.  
  196. export interface InitDataTrack {
  197. timescale: number;
  198. id: number;
  199. codec: string;
  200. }
  201.  
  202. type HdlrType = ElementaryStreamTypes.AUDIO | ElementaryStreamTypes.VIDEO;
  203.  
  204. export interface InitData extends Array<any> {
  205. [index: number]:
  206. | {
  207. timescale: number;
  208. type: HdlrType;
  209. default?: {
  210. duration: number;
  211. flags: number;
  212. };
  213. }
  214. | undefined;
  215. audio?: InitDataTrack;
  216. video?: InitDataTrack;
  217. caption?: InitDataTrack;
  218. }
  219.  
  220. export function parseInitSegment(initSegment: Uint8Array): InitData {
  221. const result: InitData = [];
  222. const traks = findBox(initSegment, ['moov', 'trak']);
  223. for (let i = 0; i < traks.length; i++) {
  224. const trak = traks[i];
  225. const tkhd = findBox(trak, ['tkhd'])[0];
  226. if (tkhd) {
  227. let version = tkhd[0];
  228. let index = version === 0 ? 12 : 20;
  229. const trackId = readUint32(tkhd, index);
  230. const mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
  231. if (mdhd) {
  232. version = mdhd[0];
  233. index = version === 0 ? 12 : 20;
  234. const timescale = readUint32(mdhd, index);
  235. const hdlr = findBox(trak, ['mdia', 'hdlr'])[0];
  236. if (hdlr) {
  237. const hdlrType = bin2str(hdlr.subarray(8, 12));
  238. const type: HdlrType | undefined = {
  239. soun: ElementaryStreamTypes.AUDIO as const,
  240. vide: ElementaryStreamTypes.VIDEO as const,
  241. }[hdlrType];
  242. if (type) {
  243. // Parse codec details
  244. const stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0];
  245. let codec;
  246. if (stsd) {
  247. codec = bin2str(stsd.subarray(12, 16));
  248. // TODO: Parse codec details to be able to build MIME type.
  249. // stsd.start += 8;
  250. // const codecBox = findBox(stsd, [codec])[0];
  251. // if (codecBox) {
  252. // TODO: Codec parsing support for avc1, mp4a, hevc, av01...
  253. // }
  254. }
  255. result[trackId] = { timescale, type };
  256. result[type] = { timescale, id: trackId, codec };
  257. }
  258. }
  259. }
  260. }
  261. }
  262.  
  263. const trex = findBox(initSegment, ['moov', 'mvex', 'trex']);
  264. trex.forEach((trex) => {
  265. const trackId = readUint32(trex, 4);
  266. const track = result[trackId];
  267. if (track) {
  268. track.default = {
  269. duration: readUint32(trex, 12),
  270. flags: readUint32(trex, 20),
  271. };
  272. }
  273. });
  274.  
  275. return result;
  276. }
  277.  
  278. export function patchEncyptionData(
  279. initSegment: Uint8Array | undefined,
  280. decryptdata: DecryptData | null
  281. ): Uint8Array | undefined {
  282. if (!initSegment || !decryptdata) {
  283. return initSegment;
  284. }
  285. const keyId = decryptdata.keyId;
  286. if (keyId && decryptdata.isCommonEncryption) {
  287. const traks = findBox(initSegment, ['moov', 'trak']);
  288. traks.forEach((trak) => {
  289. const stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0];
  290.  
  291. // skip the sample entry count
  292. const sampleEntries = stsd.subarray(8);
  293. let encBoxes = findBox(sampleEntries, ['enca']);
  294. const isAudio = encBoxes.length > 0;
  295. if (!isAudio) {
  296. encBoxes = findBox(sampleEntries, ['encv']);
  297. }
  298. encBoxes.forEach((enc) => {
  299. const encBoxChildren = isAudio ? enc.subarray(28) : enc.subarray(78);
  300. const sinfBoxes = findBox(encBoxChildren, ['sinf']);
  301. sinfBoxes.forEach((sinf) => {
  302. const tenc = parseSinf(sinf);
  303. if (tenc) {
  304. // Look for default key id (keyID offset is always 8 within the tenc box):
  305. const tencKeyId = tenc.subarray(8, 24);
  306. if (!tencKeyId.some((b) => b !== 0)) {
  307. logger.log(
  308. `[eme] Patching keyId in 'enc${
  309. isAudio ? 'a' : 'v'
  310. }>sinf>>tenc' box: ${Hex.hexDump(tencKeyId)} -> ${Hex.hexDump(
  311. keyId
  312. )}`
  313. );
  314. tenc.set(keyId, 8);
  315. }
  316. }
  317. });
  318. });
  319. });
  320. }
  321.  
  322. return initSegment;
  323. }
  324.  
  325. export function parseSinf(sinf: Uint8Array): Uint8Array | null {
  326. const schm = findBox(sinf, ['schm'])[0];
  327. if (schm) {
  328. const scheme = bin2str(schm.subarray(4, 8));
  329. if (scheme === 'cbcs' || scheme === 'cenc') {
  330. return findBox(sinf, ['schi', 'tenc'])[0];
  331. }
  332. }
  333. logger.error(`[eme] missing 'schm' box`);
  334. return null;
  335. }
  336.  
  337. /**
  338. * Determine the base media decode start time, in seconds, for an MP4
  339. * fragment. If multiple fragments are specified, the earliest time is
  340. * returned.
  341. *
  342. * The base media decode time can be parsed from track fragment
  343. * metadata:
  344. * ```
  345. * moof > traf > tfdt.baseMediaDecodeTime
  346. * ```
  347. * It requires the timescale value from the mdhd to interpret.
  348. *
  349. * @param initData {InitData} a hash of track type to timescale values
  350. * @param fmp4 {Uint8Array} the bytes of the mp4 fragment
  351. * @return {number} the earliest base media decode start time for the
  352. * fragment, in seconds
  353. */
  354. export function getStartDTS(initData: InitData, fmp4: Uint8Array): number {
  355. // we need info from two children of each track fragment box
  356. return (
  357. findBox(fmp4, ['moof', 'traf']).reduce((result: number | null, traf) => {
  358. const tfdt = findBox(traf, ['tfdt'])[0];
  359. const version = tfdt[0];
  360. const start = findBox(traf, ['tfhd']).reduce(
  361. (result: number | null, tfhd) => {
  362. // get the track id from the tfhd
  363. const id = readUint32(tfhd, 4);
  364. const track = initData[id];
  365. if (track) {
  366. let baseTime = readUint32(tfdt, 4);
  367. if (version === 1) {
  368. baseTime *= Math.pow(2, 32);
  369. baseTime += readUint32(tfdt, 8);
  370. }
  371. // assume a 90kHz clock if no timescale was specified
  372. const scale = track.timescale || 90e3;
  373. // convert base time to seconds
  374. const startTime = baseTime / scale;
  375. if (
  376. isFinite(startTime) &&
  377. (result === null || startTime < result)
  378. ) {
  379. return startTime;
  380. }
  381. }
  382. return result;
  383. },
  384. null
  385. );
  386. if (
  387. start !== null &&
  388. isFinite(start) &&
  389. (result === null || start < result)
  390. ) {
  391. return start;
  392. }
  393. return result;
  394. }, null) || 0
  395. );
  396. }
  397.  
  398. /*
  399. For Reference:
  400. aligned(8) class TrackFragmentHeaderBox
  401. extends FullBox(‘tfhd’, 0, tf_flags){
  402. unsigned int(32) track_ID;
  403. // all the following are optional fields
  404. unsigned int(64) base_data_offset;
  405. unsigned int(32) sample_description_index;
  406. unsigned int(32) default_sample_duration;
  407. unsigned int(32) default_sample_size;
  408. unsigned int(32) default_sample_flags
  409. }
  410. */
  411. export function getDuration(data: Uint8Array, initData: InitData) {
  412. let rawDuration = 0;
  413. let videoDuration = 0;
  414. let audioDuration = 0;
  415. const trafs = findBox(data, ['moof', 'traf']);
  416. for (let i = 0; i < trafs.length; i++) {
  417. const traf = trafs[i];
  418. // There is only one tfhd & trun per traf
  419. // This is true for CMAF style content, and we should perhaps check the ftyp
  420. // and only look for a single trun then, but for ISOBMFF we should check
  421. // for multiple track runs.
  422. const tfhd = findBox(traf, ['tfhd'])[0];
  423. // get the track id from the tfhd
  424. const id = readUint32(tfhd, 4);
  425. const track = initData[id];
  426. if (!track) {
  427. continue;
  428. }
  429. const trackDefault = track.default;
  430. const tfhdFlags = readUint32(tfhd, 0) | trackDefault?.flags!;
  431. let sampleDuration: number | undefined = trackDefault?.duration;
  432. if (tfhdFlags & 0x000008) {
  433. // 0x000008 indicates the presence of the default_sample_duration field
  434. if (tfhdFlags & 0x000002) {
  435. // 0x000002 indicates the presence of the sample_description_index field, which precedes default_sample_duration
  436. // If present, the default_sample_duration exists at byte offset 12
  437. sampleDuration = readUint32(tfhd, 12);
  438. } else {
  439. // Otherwise, the duration is at byte offset 8
  440. sampleDuration = readUint32(tfhd, 8);
  441. }
  442. }
  443. // assume a 90kHz clock if no timescale was specified
  444. const timescale = track.timescale || 90e3;
  445. const truns = findBox(traf, ['trun']);
  446. for (let j = 0; j < truns.length; j++) {
  447. rawDuration = computeRawDurationFromSamples(truns[j]);
  448. if (!rawDuration && sampleDuration) {
  449. const sampleCount = readUint32(truns[j], 4);
  450. rawDuration = sampleDuration * sampleCount;
  451. }
  452. if (track.type === ElementaryStreamTypes.VIDEO) {
  453. videoDuration += rawDuration / timescale;
  454. } else if (track.type === ElementaryStreamTypes.AUDIO) {
  455. audioDuration += rawDuration / timescale;
  456. }
  457. }
  458. }
  459. if (videoDuration === 0 && audioDuration === 0) {
  460. // If duration samples are not available in the traf use sidx subsegment_duration
  461. let sidxDuration = 0;
  462. const sidxs = findBox(data, ['sidx']);
  463. for (let i = 0; i < sidxs.length; i++) {
  464. const sidx = parseSegmentIndex(sidxs[i]);
  465. if (sidx?.references) {
  466. sidxDuration += sidx.references.reduce(
  467. (dur, ref) => dur + ref.info.duration || 0,
  468. 0
  469. );
  470. }
  471. }
  472.  
  473. return sidxDuration;
  474. }
  475. if (videoDuration) {
  476. return videoDuration;
  477. }
  478. return audioDuration;
  479. }
  480.  
  481. /*
  482. For Reference:
  483. aligned(8) class TrackRunBox
  484. extends FullBox(‘trun’, version, tr_flags) {
  485. unsigned int(32) sample_count;
  486. // the following are optional fields
  487. signed int(32) data_offset;
  488. unsigned int(32) first_sample_flags;
  489. // all fields in the following array are optional
  490. {
  491. unsigned int(32) sample_duration;
  492. unsigned int(32) sample_size;
  493. unsigned int(32) sample_flags
  494. if (version == 0)
  495. { unsigned int(32)
  496. else
  497. { signed int(32)
  498. }[ sample_count ]
  499. }
  500. */
  501. export function computeRawDurationFromSamples(trun): number {
  502. const flags = readUint32(trun, 0);
  503. // Flags are at offset 0, non-optional sample_count is at offset 4. Therefore we start 8 bytes in.
  504. // Each field is an int32, which is 4 bytes
  505. let offset = 8;
  506. // data-offset-present flag
  507. if (flags & 0x000001) {
  508. offset += 4;
  509. }
  510. // first-sample-flags-present flag
  511. if (flags & 0x000004) {
  512. offset += 4;
  513. }
  514.  
  515. let duration = 0;
  516. const sampleCount = readUint32(trun, 4);
  517. for (let i = 0; i < sampleCount; i++) {
  518. // sample-duration-present flag
  519. if (flags & 0x000100) {
  520. const sampleDuration = readUint32(trun, offset);
  521. duration += sampleDuration;
  522. offset += 4;
  523. }
  524. // sample-size-present flag
  525. if (flags & 0x000200) {
  526. offset += 4;
  527. }
  528. // sample-flags-present flag
  529. if (flags & 0x000400) {
  530. offset += 4;
  531. }
  532. // sample-composition-time-offsets-present flag
  533. if (flags & 0x000800) {
  534. offset += 4;
  535. }
  536. }
  537. return duration;
  538. }
  539.  
  540. export function offsetStartDTS(
  541. initData: InitData,
  542. fmp4: Uint8Array,
  543. timeOffset: number
  544. ) {
  545. findBox(fmp4, ['moof', 'traf']).forEach((traf) => {
  546. findBox(traf, ['tfhd']).forEach((tfhd) => {
  547. // get the track id from the tfhd
  548. const id = readUint32(tfhd, 4);
  549. const track = initData[id];
  550. if (!track) {
  551. return;
  552. }
  553. // assume a 90kHz clock if no timescale was specified
  554. const timescale = track.timescale || 90e3;
  555. // get the base media decode time from the tfdt
  556. findBox(traf, ['tfdt']).forEach((tfdt) => {
  557. const version = tfdt[0];
  558. let baseMediaDecodeTime = readUint32(tfdt, 4);
  559.  
  560. if (version === 0) {
  561. baseMediaDecodeTime -= timeOffset * timescale;
  562. baseMediaDecodeTime = Math.max(baseMediaDecodeTime, 0);
  563. writeUint32(tfdt, 4, baseMediaDecodeTime);
  564. } else {
  565. baseMediaDecodeTime *= Math.pow(2, 32);
  566. baseMediaDecodeTime += readUint32(tfdt, 8);
  567. baseMediaDecodeTime -= timeOffset * timescale;
  568. baseMediaDecodeTime = Math.max(baseMediaDecodeTime, 0);
  569. const upper = Math.floor(baseMediaDecodeTime / (UINT32_MAX + 1));
  570. const lower = Math.floor(baseMediaDecodeTime % (UINT32_MAX + 1));
  571. writeUint32(tfdt, 4, upper);
  572. writeUint32(tfdt, 8, lower);
  573. }
  574. });
  575. });
  576. });
  577. }
  578.  
  579. // TODO: Check if the last moof+mdat pair is part of the valid range
  580. export function segmentValidRange(data: Uint8Array): SegmentedRange {
  581. const segmentedRange: SegmentedRange = {
  582. valid: null,
  583. remainder: null,
  584. };
  585.  
  586. const moofs = findBox(data, ['moof']);
  587. if (!moofs) {
  588. return segmentedRange;
  589. } else if (moofs.length < 2) {
  590. segmentedRange.remainder = data;
  591. return segmentedRange;
  592. }
  593. const last = moofs[moofs.length - 1];
  594. // Offset by 8 bytes; findBox offsets the start by as much
  595. segmentedRange.valid = sliceUint8(data, 0, last.byteOffset - 8);
  596. segmentedRange.remainder = sliceUint8(data, last.byteOffset - 8);
  597. return segmentedRange;
  598. }
  599.  
  600. export interface SegmentedRange {
  601. valid: Uint8Array | null;
  602. remainder: Uint8Array | null;
  603. }
  604.  
  605. export function appendUint8Array(
  606. data1: Uint8Array,
  607. data2: Uint8Array
  608. ): Uint8Array {
  609. const temp = new Uint8Array(data1.length + data2.length);
  610. temp.set(data1);
  611. temp.set(data2, data1.length);
  612.  
  613. return temp;
  614. }
  615.  
  616. export interface IEmsgParsingData {
  617. schemeIdUri: string;
  618. value: string;
  619. timeScale: number;
  620. presentationTimeDelta?: number;
  621. presentationTime?: number;
  622. eventDuration: number;
  623. id: number;
  624. payload: Uint8Array;
  625. }
  626.  
  627. export function parseSamples(
  628. timeOffset: number,
  629. track: PassthroughTrack
  630. ): UserdataSample[] {
  631. const seiSamples = [] as UserdataSample[];
  632. const videoData = track.samples;
  633. const timescale = track.timescale;
  634. const trackId = track.id;
  635. let isHEVCFlavor = false;
  636.  
  637. const moofs = findBox(videoData, ['moof']);
  638. moofs.map((moof) => {
  639. const moofOffset = moof.byteOffset - 8;
  640. const trafs = findBox(moof, ['traf']);
  641. trafs.map((traf) => {
  642. // get the base media decode time from the tfdt
  643. const baseTime = findBox(traf, ['tfdt']).map((tfdt) => {
  644. const version = tfdt[0];
  645. let result = readUint32(tfdt, 4);
  646. if (version === 1) {
  647. result *= Math.pow(2, 32);
  648. result += readUint32(tfdt, 8);
  649. }
  650. return result / timescale;
  651. })[0];
  652.  
  653. if (baseTime !== undefined) {
  654. timeOffset = baseTime;
  655. }
  656.  
  657. return findBox(traf, ['tfhd']).map((tfhd) => {
  658. const id = readUint32(tfhd, 4);
  659. const tfhdFlags = readUint32(tfhd, 0) & 0xffffff;
  660. const baseDataOffsetPresent = (tfhdFlags & 0x000001) !== 0;
  661. const sampleDescriptionIndexPresent = (tfhdFlags & 0x000002) !== 0;
  662. const defaultSampleDurationPresent = (tfhdFlags & 0x000008) !== 0;
  663. let defaultSampleDuration = 0;
  664. const defaultSampleSizePresent = (tfhdFlags & 0x000010) !== 0;
  665. let defaultSampleSize = 0;
  666. const defaultSampleFlagsPresent = (tfhdFlags & 0x000020) !== 0;
  667. let tfhdOffset = 8;
  668.  
  669. if (id === trackId) {
  670. if (baseDataOffsetPresent) {
  671. tfhdOffset += 8;
  672. }
  673. if (sampleDescriptionIndexPresent) {
  674. tfhdOffset += 4;
  675. }
  676. if (defaultSampleDurationPresent) {
  677. defaultSampleDuration = readUint32(tfhd, tfhdOffset);
  678. tfhdOffset += 4;
  679. }
  680. if (defaultSampleSizePresent) {
  681. defaultSampleSize = readUint32(tfhd, tfhdOffset);
  682. tfhdOffset += 4;
  683. }
  684. if (defaultSampleFlagsPresent) {
  685. tfhdOffset += 4;
  686. }
  687. if (track.type === 'video') {
  688. isHEVCFlavor = isHEVC(track.codec);
  689. }
  690.  
  691. findBox(traf, ['trun']).map((trun) => {
  692. const version = trun[0];
  693. const flags = readUint32(trun, 0) & 0xffffff;
  694. const dataOffsetPresent = (flags & 0x000001) !== 0;
  695. let dataOffset = 0;
  696. const firstSampleFlagsPresent = (flags & 0x000004) !== 0;
  697. const sampleDurationPresent = (flags & 0x000100) !== 0;
  698. let sampleDuration = 0;
  699. const sampleSizePresent = (flags & 0x000200) !== 0;
  700. let sampleSize = 0;
  701. const sampleFlagsPresent = (flags & 0x000400) !== 0;
  702. const sampleCompositionOffsetsPresent = (flags & 0x000800) !== 0;
  703. let compositionOffset = 0;
  704. const sampleCount = readUint32(trun, 4);
  705. let trunOffset = 8; // past version, flags, and sample count
  706.  
  707. if (dataOffsetPresent) {
  708. dataOffset = readUint32(trun, trunOffset);
  709. trunOffset += 4;
  710. }
  711. if (firstSampleFlagsPresent) {
  712. trunOffset += 4;
  713. }
  714.  
  715. let sampleOffset = dataOffset + moofOffset;
  716.  
  717. for (let ix = 0; ix < sampleCount; ix++) {
  718. if (sampleDurationPresent) {
  719. sampleDuration = readUint32(trun, trunOffset);
  720. trunOffset += 4;
  721. } else {
  722. sampleDuration = defaultSampleDuration;
  723. }
  724. if (sampleSizePresent) {
  725. sampleSize = readUint32(trun, trunOffset);
  726. trunOffset += 4;
  727. } else {
  728. sampleSize = defaultSampleSize;
  729. }
  730. if (sampleFlagsPresent) {
  731. trunOffset += 4;
  732. }
  733. if (sampleCompositionOffsetsPresent) {
  734. if (version === 0) {
  735. compositionOffset = readUint32(trun, trunOffset);
  736. } else {
  737. compositionOffset = readSint32(trun, trunOffset);
  738. }
  739. trunOffset += 4;
  740. }
  741. if (track.type === ElementaryStreamTypes.VIDEO) {
  742. let naluTotalSize = 0;
  743. while (naluTotalSize < sampleSize) {
  744. const naluSize = readUint32(videoData, sampleOffset);
  745. sampleOffset += 4;
  746. if (isSEIMessage(isHEVCFlavor, videoData[sampleOffset])) {
  747. const data = videoData.subarray(
  748. sampleOffset,
  749. sampleOffset + naluSize
  750. );
  751. parseSEIMessageFromNALu(
  752. data,
  753. isHEVCFlavor ? 2 : 1,
  754. timeOffset + compositionOffset / timescale,
  755. seiSamples
  756. );
  757. }
  758. sampleOffset += naluSize;
  759. naluTotalSize += naluSize + 4;
  760. }
  761. }
  762.  
  763. timeOffset += sampleDuration / timescale;
  764. }
  765. });
  766. }
  767. });
  768. });
  769. });
  770. return seiSamples;
  771. }
  772.  
  773. function isHEVC(codec: string) {
  774. if (!codec) {
  775. return false;
  776. }
  777. const delimit = codec.indexOf('.');
  778. const baseCodec = delimit < 0 ? codec : codec.substring(0, delimit);
  779. return (
  780. baseCodec === 'hvc1' ||
  781. baseCodec === 'hev1' ||
  782. // Dolby Vision
  783. baseCodec === 'dvh1' ||
  784. baseCodec === 'dvhe'
  785. );
  786. }
  787.  
  788. function isSEIMessage(isHEVCFlavor: boolean, naluHeader: number) {
  789. if (isHEVCFlavor) {
  790. const naluType = (naluHeader >> 1) & 0x3f;
  791. return naluType === 39 || naluType === 40;
  792. } else {
  793. const naluType = naluHeader & 0x1f;
  794. return naluType === 6;
  795. }
  796. }
  797.  
  798. export function parseSEIMessageFromNALu(
  799. unescapedData: Uint8Array,
  800. headerSize: number,
  801. pts: number,
  802. samples: UserdataSample[]
  803. ) {
  804. const data = discardEPB(unescapedData);
  805. let seiPtr = 0;
  806. // skip nal header
  807. seiPtr += headerSize;
  808. let payloadType = 0;
  809. let payloadSize = 0;
  810. let endOfCaptions = false;
  811. let b = 0;
  812.  
  813. while (seiPtr < data.length) {
  814. payloadType = 0;
  815. do {
  816. if (seiPtr >= data.length) {
  817. break;
  818. }
  819. b = data[seiPtr++];
  820. payloadType += b;
  821. } while (b === 0xff);
  822.  
  823. // Parse payload size.
  824. payloadSize = 0;
  825. do {
  826. if (seiPtr >= data.length) {
  827. break;
  828. }
  829. b = data[seiPtr++];
  830. payloadSize += b;
  831. } while (b === 0xff);
  832.  
  833. const leftOver = data.length - seiPtr;
  834.  
  835. if (!endOfCaptions && payloadType === 4 && seiPtr < data.length) {
  836. endOfCaptions = true;
  837.  
  838. const countryCode = data[seiPtr++];
  839. if (countryCode === 181) {
  840. const providerCode = readUint16(data, seiPtr);
  841. seiPtr += 2;
  842.  
  843. if (providerCode === 49) {
  844. const userStructure = readUint32(data, seiPtr);
  845. seiPtr += 4;
  846.  
  847. if (userStructure === 0x47413934) {
  848. const userDataType = data[seiPtr++];
  849.  
  850. // Raw CEA-608 bytes wrapped in CEA-708 packet
  851. if (userDataType === 3) {
  852. const firstByte = data[seiPtr++];
  853. const totalCCs = 0x1f & firstByte;
  854. const enabled = 0x40 & firstByte;
  855. const totalBytes = enabled ? 2 + totalCCs * 3 : 0;
  856. const byteArray = new Uint8Array(totalBytes);
  857. if (enabled) {
  858. byteArray[0] = firstByte;
  859. for (let i = 1; i < totalBytes; i++) {
  860. byteArray[i] = data[seiPtr++];
  861. }
  862. }
  863.  
  864. samples.push({
  865. type: userDataType,
  866. payloadType,
  867. pts,
  868. bytes: byteArray,
  869. });
  870. }
  871. }
  872. }
  873. }
  874. } else if (payloadType === 5 && payloadSize < leftOver) {
  875. endOfCaptions = true;
  876.  
  877. if (payloadSize > 16) {
  878. const uuidStrArray: Array<string> = [];
  879. for (let i = 0; i < 16; i++) {
  880. const b = data[seiPtr++].toString(16);
  881. uuidStrArray.push(b.length == 1 ? '0' + b : b);
  882.  
  883. if (i === 3 || i === 5 || i === 7 || i === 9) {
  884. uuidStrArray.push('-');
  885. }
  886. }
  887. const length = payloadSize - 16;
  888. const userDataBytes = new Uint8Array(length);
  889. for (let i = 0; i < length; i++) {
  890. userDataBytes[i] = data[seiPtr++];
  891. }
  892.  
  893. samples.push({
  894. payloadType,
  895. pts,
  896. uuid: uuidStrArray.join(''),
  897. userData: utf8ArrayToStr(userDataBytes),
  898. userDataBytes,
  899. });
  900. }
  901. } else if (payloadSize < leftOver) {
  902. seiPtr += payloadSize;
  903. } else if (payloadSize > leftOver) {
  904. break;
  905. }
  906. }
  907. }
  908.  
  909. /**
  910. * remove Emulation Prevention bytes from a RBSP
  911. */
  912. export function discardEPB(data: Uint8Array): Uint8Array {
  913. const length = data.byteLength;
  914. const EPBPositions = [] as Array<number>;
  915. let i = 1;
  916.  
  917. // Find all `Emulation Prevention Bytes`
  918. while (i < length - 2) {
  919. if (data[i] === 0 && data[i + 1] === 0 && data[i + 2] === 0x03) {
  920. EPBPositions.push(i + 2);
  921. i += 2;
  922. } else {
  923. i++;
  924. }
  925. }
  926.  
  927. // If no Emulation Prevention Bytes were found just return the original
  928. // array
  929. if (EPBPositions.length === 0) {
  930. return data;
  931. }
  932.  
  933. // Create a new array to hold the NAL unit data
  934. const newLength = length - EPBPositions.length;
  935. const newData = new Uint8Array(newLength);
  936. let sourceIndex = 0;
  937.  
  938. for (i = 0; i < newLength; sourceIndex++, i++) {
  939. if (sourceIndex === EPBPositions[0]) {
  940. // Skip this byte
  941. sourceIndex++;
  942. // Remove this position index
  943. EPBPositions.shift();
  944. }
  945. newData[i] = data[sourceIndex];
  946. }
  947. return newData;
  948. }
  949.  
  950. export function parseEmsg(data: Uint8Array): IEmsgParsingData {
  951. const version = data[0];
  952. let schemeIdUri: string = '';
  953. let value: string = '';
  954. let timeScale: number = 0;
  955. let presentationTimeDelta: number = 0;
  956. let presentationTime: number = 0;
  957. let eventDuration: number = 0;
  958. let id: number = 0;
  959. let offset: number = 0;
  960.  
  961. if (version === 0) {
  962. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  963. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  964. offset += 1;
  965. }
  966.  
  967. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  968. offset += 1;
  969.  
  970. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  971. value += bin2str(data.subarray(offset, offset + 1));
  972. offset += 1;
  973. }
  974.  
  975. value += bin2str(data.subarray(offset, offset + 1));
  976. offset += 1;
  977.  
  978. timeScale = readUint32(data, 12);
  979. presentationTimeDelta = readUint32(data, 16);
  980. eventDuration = readUint32(data, 20);
  981. id = readUint32(data, 24);
  982. offset = 28;
  983. } else if (version === 1) {
  984. offset += 4;
  985. timeScale = readUint32(data, offset);
  986. offset += 4;
  987. const leftPresentationTime = readUint32(data, offset);
  988. offset += 4;
  989. const rightPresentationTime = readUint32(data, offset);
  990. offset += 4;
  991. presentationTime = 2 ** 32 * leftPresentationTime + rightPresentationTime;
  992. if (!Number.isSafeInteger(presentationTime)) {
  993. presentationTime = Number.MAX_SAFE_INTEGER;
  994. // eslint-disable-next-line no-console
  995. console.warn(
  996. 'Presentation time exceeds safe integer limit and wrapped to max safe integer in parsing emsg box'
  997. );
  998. }
  999.  
  1000. eventDuration = readUint32(data, offset);
  1001. offset += 4;
  1002. id = readUint32(data, offset);
  1003. offset += 4;
  1004.  
  1005. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  1006. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  1007. offset += 1;
  1008. }
  1009.  
  1010. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  1011. offset += 1;
  1012.  
  1013. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  1014. value += bin2str(data.subarray(offset, offset + 1));
  1015. offset += 1;
  1016. }
  1017.  
  1018. value += bin2str(data.subarray(offset, offset + 1));
  1019. offset += 1;
  1020. }
  1021. const payload = data.subarray(offset, data.byteLength);
  1022.  
  1023. return {
  1024. schemeIdUri,
  1025. value,
  1026. timeScale,
  1027. presentationTime,
  1028. presentationTimeDelta,
  1029. eventDuration,
  1030. id,
  1031. payload,
  1032. };
  1033. }
  1034.  
  1035. export function mp4Box(type: ArrayLike<number>, ...payload: Uint8Array[]) {
  1036. const len = payload.length;
  1037. let size = 8;
  1038. let i = len;
  1039. while (i--) {
  1040. size += payload[i].byteLength;
  1041. }
  1042. const result = new Uint8Array(size);
  1043. result[0] = (size >> 24) & 0xff;
  1044. result[1] = (size >> 16) & 0xff;
  1045. result[2] = (size >> 8) & 0xff;
  1046. result[3] = size & 0xff;
  1047. result.set(type, 4);
  1048. for (i = 0, size = 8; i < len; i++) {
  1049. result.set(payload[i], size);
  1050. size += payload[i].byteLength;
  1051. }
  1052. return result;
  1053. }
  1054.  
  1055. export function mp4pssh(
  1056. systemId: Uint8Array,
  1057. keyids: Array<Uint8Array> | null,
  1058. data: Uint8Array
  1059. ) {
  1060. if (systemId.byteLength !== 16) {
  1061. throw new RangeError('Invalid system id');
  1062. }
  1063. let version;
  1064. let kids;
  1065. if (keyids) {
  1066. version = 1;
  1067. kids = new Uint8Array(keyids.length * 16);
  1068. for (let ix = 0; ix < keyids.length; ix++) {
  1069. const k = keyids[ix]; // uint8array
  1070. if (k.byteLength !== 16) {
  1071. throw new RangeError('Invalid key');
  1072. }
  1073. kids.set(k, ix * 16);
  1074. }
  1075. } else {
  1076. version = 0;
  1077. kids = new Uint8Array();
  1078. }
  1079. let kidCount;
  1080. if (version > 0) {
  1081. kidCount = new Uint8Array(4);
  1082. if (keyids!.length > 0) {
  1083. new DataView(kidCount.buffer).setUint32(0, keyids!.length, false);
  1084. }
  1085. } else {
  1086. kidCount = new Uint8Array();
  1087. }
  1088. const dataSize = new Uint8Array(4);
  1089. if (data && data.byteLength > 0) {
  1090. new DataView(dataSize.buffer).setUint32(0, data.byteLength, false);
  1091. }
  1092. return mp4Box(
  1093. [112, 115, 115, 104],
  1094. new Uint8Array([
  1095. version,
  1096. 0x00,
  1097. 0x00,
  1098. 0x00, // Flags
  1099. ]),
  1100. systemId, // 16 bytes
  1101. kidCount,
  1102. kids,
  1103. dataSize,
  1104. data || new Uint8Array()
  1105. );
  1106. }
  1107.  
  1108. export function parsePssh(initData: ArrayBuffer) {
  1109. if (!(initData instanceof ArrayBuffer) || initData.byteLength < 32) {
  1110. return null;
  1111. }
  1112. const result = {
  1113. version: 0,
  1114. systemId: '',
  1115. kids: null as null | Uint8Array[],
  1116. data: null as null | Uint8Array,
  1117. };
  1118. const view = new DataView(initData);
  1119. const boxSize = view.getUint32(0);
  1120. if (initData.byteLength !== boxSize && boxSize > 44) {
  1121. return null;
  1122. }
  1123. const type = view.getUint32(4);
  1124. if (type !== 0x70737368) {
  1125. return null;
  1126. }
  1127. result.version = view.getUint32(8) >>> 24;
  1128. if (result.version > 1) {
  1129. return null;
  1130. }
  1131. result.systemId = Hex.hexDump(new Uint8Array(initData, 12, 16));
  1132. const dataSizeOrKidCount = view.getUint32(28);
  1133. if (result.version === 0) {
  1134. if (boxSize - 32 < dataSizeOrKidCount) {
  1135. return null;
  1136. }
  1137. result.data = new Uint8Array(initData, 32, dataSizeOrKidCount);
  1138. } else if (result.version === 1) {
  1139. result.kids = [];
  1140. for (let i = 0; i < dataSizeOrKidCount; i++) {
  1141. result.kids.push(new Uint8Array(initData, 32 + i * 16, 16));
  1142. }
  1143. }
  1144. return result;
  1145. }