diff options
Diffstat (limited to 'Libraries/libtheora-1.1.1/doc/spec/spec.tex')
-rw-r--r-- | Libraries/libtheora-1.1.1/doc/spec/spec.tex | 8171 |
1 files changed, 0 insertions, 8171 deletions
diff --git a/Libraries/libtheora-1.1.1/doc/spec/spec.tex b/Libraries/libtheora-1.1.1/doc/spec/spec.tex deleted file mode 100644 index b29d4ef5..00000000 --- a/Libraries/libtheora-1.1.1/doc/spec/spec.tex +++ /dev/null @@ -1,8171 +0,0 @@ -\documentclass[9pt,letterpaper]{book} - -\usepackage{latexsym} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{bm} -\usepackage{textcomp} -\usepackage{graphicx} -\usepackage{booktabs} -\usepackage{tabularx} -\usepackage{longtable} -\usepackage{ltablex} -\usepackage{wrapfig} -\usepackage[pdfpagemode=None,pdfstartview=FitH,pdfview=FitH,colorlinks=true]% - {hyperref} - -\newtheorem{theorem}{Theorem}[section] -\newcommand{\idx}[1]{{\ensuremath{\mathit{#1}}}} -\newcommand{\qti}{\idx{qti}} -\newcommand{\qtj}{\idx{qtj}} -\newcommand{\pli}{\idx{pli}} -\newcommand{\plj}{\idx{plj}} -\newcommand{\qi}{\idx{qi}} -\newcommand{\ci}{\idx{ci}} -\newcommand{\bmi}{\idx{bmi}} -\newcommand{\bmj}{\idx{bmj}} -\newcommand{\qri}{\idx{qri}} -\newcommand{\qrj}{\idx{qrj}} -\newcommand{\hti}{\idx{hti}} -\newcommand{\sbi}{\idx{sbi}} -\newcommand{\bi}{\idx{bi}} -\newcommand{\bj}{\idx{bj}} -\newcommand{\mbi}{\idx{mbi}} -\newcommand{\mbj}{\idx{mbj}} -\newcommand{\mi}{\idx{mi}} -\newcommand{\cbi}{\idx{cbi}} -\newcommand{\qii}{\idx{qii}} -\newcommand{\ti}{\idx{ti}} -\newcommand{\tj}{\idx{tj}} -\newcommand{\rfi}{\idx{rfi}} -\newcommand{\zzi}{\idx{zzi}} -\newcommand{\ri}{\idx{ri}} -%This somewhat odd construct ensures that \bitvar{\qi}, etc., will set the -% qi in bold face, even though it is in a \mathit font, yet \bitvar{VAR} will -% set VAR in a bold, roman font. -\newcommand{\bitvar}[1]{\ensuremath{\mathbf{\bm{#1}}}} -\newcommand{\locvar}[1]{\ensuremath{\mathrm{#1}}} -\newcommand{\term}[1]{{\em #1}} -\newcommand{\bin}[1]{\ensuremath{\mathtt{b#1}}} -\newcommand{\hex}[1]{\ensuremath{\mathtt{0x#1}}} -\newcommand{\ilog}{\ensuremath{\mathop{\mathrm{ilog}}\nolimits}} -\newcommand{\round}{\ensuremath{\mathop{\mathrm{round}}\nolimits}} -\newcommand{\sign}{\ensuremath{\mathop{\mathrm{sign}}\nolimits}} -\newcommand{\lflim}{\ensuremath{\mathop{\mathrm{lflim}}\nolimits}} - -%Section-based table, figure, and equation numbering. -\numberwithin{equation}{chapter} -\numberwithin{figure}{chapter} -\numberwithin{table}{chapter} - -\keepXColumns - -\pagestyle{headings} -\bibliographystyle{alpha} - -\title{Theora Specification} -\author{Xiph.org Foundation} -\date{\today} - - -\begin{document} - -\frontmatter - -\begin{titlepage} -\maketitle -\end{titlepage} -\thispagestyle{empty} -\cleardoublepage - -\pagenumbering{roman} - -\thispagestyle{plain} -\tableofcontents -\cleardoublepage - -\thispagestyle{plain} -\listoffigures -\cleardoublepage - -\thispagestyle{plain} -\listoftables -\cleardoublepage - -\thispagestyle{plain} -\markboth{{\sc Notation and Conventions}}{{\sc Notation and Conventions}} -\chapter*{Notation and Conventions} - -All parameters either passed in or out of a decoding procedure are given in - \bitvar{bold\ face}. - -The prefix \bin{} indicates that the following value is to be interpreted as a - binary number (base 2). -\begin{verse} -{\bf Example:} The value \bin{1110100} is equal to the decimal value 116. -\end{verse} - -The prefix \hex{} indicates the the following value is to be interpreted as a - hexadecimal number (base 16). -\begin{verse} -{\bf Example:} The value \hex{74} is equal to the decimal value 116. -\end{verse} - -All arithmetic defined by this specification is exact. -However, any real numbers that do arise will always be converted back to - integers again in short order. -The entire specification can be implemented using only normal integer - operations. -All operations are to be implemented with sufficiently large integers so that - overflow cannot occur. -Where the result of a computation is to be truncated to a fixed-sized binary - representation, this will be explicitly noted. -The size given for all variables is the maximum number of bits needed to store - any value in that variable. -Intermediate computations involving that variable may require more bits. - -The following operators are defined: - -\begin{description} -\item[$|a|$] -The absolute value of a number $a$. -\begin{align*} -|a| & = \left\{\begin{array}{ll} --a, & a < 0 \\ -a, & a \ge 0 -\end{array}\right. -\end{align*} - -\item[$a*b$] -Multiplication of a number $a$ by a number $b$. -\item[$\frac{a}{b}$] -Exact division of a number $a$ by a number $b$, producing a potentially - non-integer result. - -\item[$\left\lfloor a\right\rfloor$] -The largest integer less than or equal to a real number $a$. - -\item[$\left\lceil a\right\rceil$] -The smallest integer greater than or equal to a real number $a$. - -\item[$a//b$] -Integer division of $a$ by $b$. -\begin{align*} -a//b & = \left\{\begin{array}{ll} -\left\lceil\frac{a}{b}\right\rceil, & a < 0 \\ -\left\lfloor\frac{a}{b}\right\rfloor, & a \ge 0 -\end{array}\right. -\end{align*} - -\item[$a\%b$] -The remainder from the integer division of $a$ by $b$. -\begin{align*} -a\%b & = a-|b|*\left\lfloor\frac{a}{|b|}\right\rfloor -\end{align*} -Note that with this definition, the result is always non-negative and less than - $|b|$. - -\item[$a<<b$] -The value obtained by left-shifting the two's complement integer $a$ by $b$ - bits. -For purposes of this specification, overflow is ignored, and so this is - equivalent to integer multiplication of $a$ by $2^b$. - -\item[$a>>b$] -The value obtained by right-shifting the two's complement integer $a$ by $b$ - bits, filling in the leftmost bits of the new value with $0$ if $a$ is - non-negative and $1$ if $a$ is negative. -This is {\em not} equivalent to integer division of $a$ by $2^b$. -Instead, -\begin{align*} -a>>b & = \left\lfloor\frac{a}{2^b}\right\rfloor. -\end{align*} - -\item[$\round(a)$] -Rounds a number $a$ to the nearest integer, with ties rounded away from $0$. -\begin{align*} -\round(a) = \left\{\begin{array}{ll} -\lceil a-\frac{1}{2}\rceil & a \le 0 \\ -\lfloor a+\frac{1}{2}\rfloor & a > 0 -\end{array}\right. -\end{align*} - -\item[$\sign(a)$] -Returns the sign of a given number. -\begin{align*} -\sign(a) = \left\{\begin{array}{ll} --1 & a < 0 \\ -0 & a = 0 \\ -1 & a > 0 -\end{array}\right. -\end{align*} - -\item[$\ilog(a)$] -The minimum number of bits required to store a positive integer $a$ in - two's complement notation, or $0$ for a non-positive integer $a$. -\begin{align*} -\ilog(a) = \left\{\begin{array}{ll} -0, & a \le 0 \\ -\left\lfloor\log_2{a}\right\rfloor+1, & a > 0 -\end{array}\right. -\end{align*} - -\begin{verse} -{\bf Examples:} -\begin{itemize} -\item $\ilog(-1)=0$ -\item $\ilog(0)=0$ -\item $\ilog(1)=1$ -\item $\ilog(2)=2$ -\item $\ilog(3)=2$ -\item $\ilog(4)=3$ -\item $\ilog(7)=3$ -\end{itemize} -\end{verse} - -\item[$\min(a,b)$] -The minimum of two numbers $a$ and $b$. - -\item[$\max(a,b)$] -The maximum of two numbers $a$ and $b$. - -\end{description} -\cleardoublepage - - -\thispagestyle{plain} -\markboth{{\sc Key words}}{{\sc Key words}} -\chapter*{Key words} - -%We can't rewrite this, because this is text required by RFC 2119, so we use -% some emergency stretching to get it typeset properly. -\setlength{\emergencystretch}{2em} -The key words ``MUST'', ``MUST NOT'', ``REQUIRED'', ``SHALL'', ``SHALL NOT'', - ``SHOULD'', ``SHOULD NOT'', ``RECOMMENDED'', ``MAY'', and ``OPTIONAL'' in this - document are to be intrepreted as described in RFC 2119 \cite{rfc2119}.\par -\setlength{\emergencystretch}{0em} - -Where such assertions are placed on the contents of a Theora bitstream itself, - implementations should be prepared to encounter bitstreams that do not follow - these requirements. -An application's behavior in the presecence of such non-conforming bitstreams - is not defined by this specification, but any reasonable method of handling - them MAY be used. -By way of example, applications MAY discard the current frame, retain the - current output thus far, or attempt to continue on by assuming some default - values for the erroneous bits. -When such an error occurs in the bitstream headers, an application MAY refuse - to decode the entire stream. -An application SHOULD NOT allow such non-conformant bitstreams to overflow - buffers and potentially execute arbitrary code, as this represents a serious - security risk. - -An application MUST, however, ensure any bits marked as reserved have the value - zero, and refuse to decode the stream if they do not. -These are used as place holders for future bitstream features with which the - current bitstream is forward-compatible. -Such features may not increment the bitstream version number, and can only be - recognized by checking the value of these reserved bits. - -\cleardoublepage - - - -\mainmatter - -\pagenumbering{arabic} -\setcounter{page}{1} - -\chapter{Introduction} - -Theora is a general purpose, lossy video codec. -It is based on the VP3 video codec produced by On2 Technologies - (\url{http://www.on2.com/}). -On2 donated the VP3.1 source code to the Xiph.org Foundation and released it - under a BSD-like license. -On2 also made an irrevocable, royalty-free license grant for any patent claims - it might have over the software and any derivatives. -No formal specification exists for the VP3 format beyond this source code, - however Mike Melanson maintains a detailed description \cite{Mel04}. -Portions of this specification were adopted from that text with permission. - -\section{VP3 and Theora} - -Theora contains a superset of the features that were available in the original - VP3 codec. -Content encoded with VP3.1 can be losslessly transcoded into the Theora format. -Theora content cannot, in general, be losslessly transcoded into the VP3 - format. -If a feature is not available in the original VP3 format, this is mentioned - when that feature is defined. -A complete list of these features appears in Appendix~\ref{app:vp3-compat}. -%TODO: VP3 - theora comparison in appendix - -\section{Video Formats} - -Theora currently supports progressive video data of arbitrary dimensions at a - constant frame rate in one of several $Y'C_bC_r$ color spaces. -The precise definition the supported color spaces appears in - Section~\ref{sec:colorspaces}. -Three different chroma subsampling formats are supported: 4:2:0, 4:2:2, - and 4:4:4. -The precise details of each of these formats and their sampling locations are - described in Section~\ref{sec:pixfmts}. - -The Theora format does not support interlaced material, variable frame rates, - bit-depths larger than 8 bits per component, nor alternate color spaces such - as RGB or arbitrary multi-channel spaces. -Black and white content can be efficiently encoded, however, because the - uniform chroma planes compress well. -Support for interlaced material is planned for a future version. -\begin{verse} -{\bf Note:} Infrequently changing frame rates---as when film and video - sequences are cut together---can be supported in the Ogg container format by - chaining several Theora streams together. -\end{verse} -Support for increased bit depths or additional color spaces is not planned. - -\section{Classification} - -Theora is a block-based lossy transform codec that utilizes an - $8\times 8$ Type-II Discrete Cosine Transform and block-based motion - compensation. -This places it in the same class of codecs as MPEG-1, -2, -4, and H.263. -The details of how individual blocks are organized and how DCT coefficients are - stored in the bitstream differ substantially from these codecs, however. -Theora supports only intra frames (I frames in MPEG) and inter frames (P frames - in MPEG). -There is no equivalent to the bi-predictive frames (B frames) found in MPEG - codecs. - -\section{Assumptions} - -The Theora codec design assumes a complex, psychovisually-aware encoder and a - simple, low-complexity decoder. -%TODO: Talk more about implementation complexity. - -Theora provides none of its own framing, synchronization, or protection against - transmission errors. -An encoder is solely a method of accepting input video frames and - compressing these frames into raw, unformatted `packets'. -The decoder then accepts these raw packets in sequence, decodes them, and - synthesizes a fascimile of the original video frames. -Theora is a free-form variable bit rate (VBR) codec, and packets have no - minimum size, maximum size, or fixed/expected size. - -Theora packets are thus intended to be used with a transport mechanism that - provides free-form framing, synchronization, positioning, and error correction - in accordance with these design assumptions, such as Ogg (for file transport) - or RTP (for network multicast). -For the purposes of a few examples in this document, we will assume that Theora - is embedded in an Ogg stream specifically, although this is by no means a - requirement or fundamental assumption in the Theora design. - -The specification for embedding Theora into an Ogg transport stream is given in - Appendix~\ref{app:oggencapsulation}. - -\section{Codec Setup and Probability Model} - -Theora's heritage is the proprietary commerical codec VP3, and it retains a - fair amount of inflexibility when compared to Vorbis \cite{vorbis}, the first - Xiph.org codec, which began as a research codec. -However, to provide additional scope for encoder improvement, Theora adopts - some of the configurable aspects of decoder setup that are present in Vorbis. -This configuration data is not available in VP3, which uses hardcoded values - instead. - -Theora makes the same controversial design decision that Vorbis made to include - the entire probability model for the DCT coefficients and all the quantization - parameters in the bitstream headers. -This is often several hundred fields. -It is therefore impossible to decode any frame in the stream without - having previously fetched the codec info and codec setup headers. - -\begin{verse} -{\bf Note:} Theora {\em can} initiate decode at an arbitrary intra-frame packet - within a bitstream so long as the codec has been initialized with the setup - headers. -\end{verse} - -Thus, Theora headers are both required for decode to begin and relatively large - as bitstream headers go. -The header size is unbounded, although as a rule-of-thumb less than 16kB is - recommended, and Xiph.org's reference encoder follows this suggestion. -%TODO: Is 8kB enough? My setup header is 7.4kB, that doesn't leave much room -% for comments. -%RG: the lesson from vorbis is that as small as possible is really -% important in some applications. Practically, what's acceptable -% depends a great deal on the target bitrate. I'd leave 16 kB in the -% spec for now. fwiw more than 1k of comments is quite unusual. - -Our own design work indicates that the primary liability of the required header - is in mindshare; it is an unusual design and thus causes some amount of - complaint among engineers as this runs against current design trends and - points out limitations in some existing software/interface designs. -However, we find that it does not fundamentally limit Theora's suitable - application space. - -%silvia: renamed -%\subsection{Format Specification} -\section{Format Conformance} - -The Theora format is well-defined by its decode specification; any encoder that - produces packets that are correctly decoded by an implementation following - this specification may be considered a proper Theora encoder. -A decoder must faithfully and completely implement the specification defined - herein %, except where noted, - to be considered a conformant Theora decoder. -A decoder need not be implemented strictly as described, but the - actual decoder process MUST be {\em entirely mathematically equivalent} - to the described process. -Where appropriate, a non-normative description of encoder processes is - included. -These sections will be marked as such, and a proper Theora encoder is not - bound to follow them. - -%TODO: \subsection{Hardware Profile} - - -\chapter{Coded Video Structure} - -Theora's encoding and decoding process is based on $8\times 8$ blocks of - pixels. -This sections describes how a video frame is laid out, divided into - blocks, and how those blocks are organized. - -\section{Frame Layout} - -A video frame in Theora is a two-dimensional array of pixels. -Theora, like VP3, uses a right-handed coordinate system, with the origin in the - lower-left corner of the frame. -This is contrary to many video formats which use a left-handed coordinate - system with the origin in the upper-left corner of the frame. -%INT: This means that for interlaced material, the definition of `even fields' -%INT: and `odd fields' may be reversed between Theora and other video codecs. -%INT: This document will always refer to them as `top fields' and `bottom -%INT: fields'. - -Theora divides the pixel array up into three separate \term{color planes}, one - for each of the $Y'$, $C_b$, and $C_r$ components of the pixel. -The $Y'$ plane is also called the \term{luma plane}, and the $C_b$ and $C_r$ - planes are also called the \term{chroma planes}. -Each plane is assigned a numerical value, as shown in - Table~\ref{tab:color-planes}. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{cl}\toprule -Index & Color Plane \\\midrule -$0$ & $Y'$ \\ -$1$ & $C_b$ \\ -$2$ & $C_r$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Color Plane Indices} -\label{tab:color-planes} -\end{table} - -In some pixel formats, the chroma planes are subsampled by a factor of two - in one or both directions. -This means that the width or height of the chroma planes may be half that of - the total frame width and height. -The luma plane is never subsampled. - -\section{Picture Region} - -An encoded video frame in Theora is required to have a width and height that - are multiples of sixteen, making an integral number of blocks even when the - chroma planes are subsampled. -However, inside a frame a smaller \term{picture region} may be defined - to present material whose dimensions are not a multiple of sixteen pixels, as - shown in Figure~\ref{fig:pic-frame}. -The picture region can be offset from the lower-left corner of the frame by up - to 255 pixels in each direction, and may have an arbitrary width and height, - provided that it is contained entirely within the coded frame. -It is this picture region that contains the actual video data. -The portions of the frame which lie outside the picture region may contain - arbitrary image data, so the frame must be cropped to the picture region - before display. -The picture region plays no other role in the decode process, which operates on - the entire video frame. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{pic-frame} -\end{center} -\caption{Location of frame and picture regions} -\label{fig:pic-frame} -\end{figure} - -\section{Blocks and Super Blocks} -\label{sec:blocks-and-sbs} - -Each color plane is subdivided into \term{blocks} of $8\times 8$ pixels. -Blocks are grouped into $4\times 4$ arrays called \term{super blocks} as - shown in Figure~\ref{fig:superblock}. -Each color plane has its own set of blocks and super blocks. -If the chroma planes are subsampled, they are still divided into $8\times 8$ - blocks of pixels; there are just fewer blocks than in the luma plane. -The boundaries of blocks and super blocks in the luma plane do not necessarily - coincide with those of the chroma planes, if the chroma planes have been - subsampled. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{superblock} -\end{center} -\caption{Subdivision of a frame into blocks and super blocks} -\label{fig:superblock} -\end{figure} - -Blocks are accessed in two different orders in the various decoder processes. -The first is \term{raster order}, illustrated in Figure~\ref{fig:raster-block}. -This accesses each block in row-major order, starting in the lower left of the - frame and continuing along the bottom row of the entire frame, followed by the - next row up, starting on the left edge of the frame, etc. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{raster-block} -\end{center} -\caption{Raster ordering of $n\times m$ blocks} -\label{fig:raster-block} -\end{figure} - -The second is \term{coded order}. -In coded order, blocks are accessed by super block. -Within each frame, super blocks are traversed in raster order, - similar to raster order for blocks. -Within each super block, however, blocks are accessed in a Hilbert curve - pattern, illustrated in Figure~\ref{fig:hilbert-block}. -If a color plane does not contain a complete super block on the top or right - sides, the same ordering is still used, simply with any blocks outside the - frame boundary ommitted. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{hilbert-block} -\end{center} -\caption{Hilbert curve ordering of blocks within a super block} -\label{fig:hilbert-block} -\end{figure} - -To illustrate this ordering, consider a frame that is 240 pixels wide and - 48 pixels high. -Each row of the luma plane has 30 blocks and 8 super blocks, and there are 6 - rows of blocks and two rows of super blocks. - -%When accessed in raster order, each block in the luma plane is assigned the -% following indices: - -%\vspace{\baselineskip} -%\begin{center} -%\begin{tabular}{|ccccccc|}\hline -%150 & 151 & 152 & 153 & $\ldots$ & 178 & 179 \\ -%120 & 121 & 122 & 123 & $\ldots$ & 148 & 149 \\\hline -% 90 & 91 & 92 & 93 & $\ldots$ & 118 & 119 \\ -% 60 & 61 & 62 & 63 & $\ldots$ & 88 & 89 \\ -% 30 & 31 & 32 & 33 & $\ldots$ & 58 & 59 \\ -% 0 & 1 & 2 & 3 & $\ldots$ & 28 & 29 \\\hline -%\end{tabular} -%\end{center} -%\vspace{\baselineskip} - -When accessed in coded order, each block in the luma plane is assigned the - following indices: - -\vspace{\baselineskip} -\begin{center} -\begin{tabular}{|cccc|c|cc|}\hline -123 & 122 & 125 & 124 & $\ldots$ & 179 & 178 \\ -120 & 121 & 126 & 127 & $\ldots$ & 176 & 177 \\\hline - 5 & 6 & 9 & 10 & $\ldots$ & 117 & 118 \\ - 4 & 7 & 8 & 11 & $\ldots$ & 116 & 119 \\ - 3 & 2 & 13 & 12 & $\ldots$ & 115 & 114 \\ - 0 & 1 & 14 & 15 & $\ldots$ & 112 & 113 \\\hline -\end{tabular} -\end{center} -\vspace{\baselineskip} - -Here the index values specify the order in which the blocks would be accessed. -The indices of the blocks are numbered continuously from one color plane to the - next. -They do not reset to zero at the start of each plane. -Instead, the numbering increases continuously from the $Y'$ plane to the $C_b$ - plane to the $C_r$ plane. -The implication is that the blocks from all planes are treated as a unit during - the various processing steps. - -Although blocks are sometimes accessed in raster order, in this document the - index associated with a block is {\em always} its index in coded order. - -\section{Macro Blocks} -\label{sec:mbs} - -A macro block contains a $2\times 2$ array of blocks in the luma plane - {\em and} the co-located blocks in the chroma planes, as shown in - Figure~\ref{fig:macroblock}. -Thus macro blocks can represent anywhere from six to twelve blocks, depending - on how the chroma planes are subsampled. -This is in contrast to super blocks, which only contain blocks from a single - color plane. -% the whole super vs. macro blocks thing is a little confusing, and it can be -% hard to remember which is what initially. A figure would/will help here, -% but I tried to add some text emphasizing the difference in terms of -% functionality. -%TBT: At this point we haven't described any functionality yet. -%TBT: As far as the reader knows, the only purpose of the blocks, macro blocks -%TBT: and super blocks is for data organization---and for blocks and super -%TBT: blocks, this is essentially true. -%TBT: So lets restrict the differences we emphasize to those of data -%TBT: organization, which the sentence I just added above does. -Macro blocks contain information about coding mode and motion vectors for the - corresponding blocks in all color planes. - -\begin{figure}[htbp] - \begin{center} - \includegraphics{macroblock} - \end{center} - \caption{Subdivision of a frame into macro blocks} - \label{fig:macroblock} -\end{figure} - -Macro blocks are also accessed in a \term{coded order}. -This coded order proceeds by examining each super block in the luma plane in - raster order, and traversing the four macro blocks inside using a smaller - Hilbert curve, as shown in Figure~\ref{fig:hilbert-mb}. -%r: I rearranged the wording to make a more formal idiom here -If the luma plane does not contain a complete super block on the top or right - sides, the same ordering is still used, with any macro blocks outside - the frame boundary simply omitted. -Because the frame size is constrained to be a multiple of 16, there are never - any partial macro blocks. -Unlike blocks, macro blocks need never be accessed in a pure raster order. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{hilbert-mb} -\end{center} -\caption{Hilbert curve ordering of macro blocks within a super block} -\label{fig:hilbert-mb} -\end{figure} - -Using the same frame size as the example above, there are 15 macro blocks in - each row and 3 rows of macro blocks. -The macro blocks are assigned the following indices: - -\vspace{\baselineskip} -\begin{center} -\begin{tabular}{|cc|cc|c|cc|c|}\hline -30 & 31 & 32 & 33 & $\cdots$ & 42 & 43 & 44 \\\hline - 1 & 2 & 5 & 6 & $\cdots$ & 25 & 26 & 29 \\ - 0 & 3 & 4 & 7 & $\cdots$ & 24 & 27 & 28 \\\hline -\end{tabular} -\end{center} -\vspace{\baselineskip} - -\section{Coding Modes and Prediction} - -Each block is coded using one of a small, fixed set of \term{coding modes} that - define how the block is predicted from previous frames. -A block is predicted using one of two \term{reference frames}, selected - according to the coding mode. -A reference frame is the fully decoded version of a previous frame in the - stream. -The first available reference frame is the previous intra frame, called the - \term{golden frame}. -The second available reference frame is the previous frame, whether it was an - intra frame or an inter frame. -If the previous frame was an intra frame, then both reference frames are the - same. -See Figure~\ref{fig:reference-frames} for an illustration of the reference - frames used for an intra frame that does not follow an intra frame. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{reference-frames} -\end{center} -\caption{Example of reference frames for an inter frame} -\label{fig:reference-frames} -\end{figure} - -Two coding modes in particular are worth mentioning here. -The INTRA mode is used for blocks that are not predicted from either reference - frame. -This is the only coding mode allowed in intra frames. -The INTER\_NOMV coding mode uses the co-located contents of the block in the - previous frame as the predictor. -This is the default coding mode. - -\section{DCT Coefficients} -\label{sec:dct-coeffs} - -A \term{residual} is added to the predicted contents of a block to form the - final reconstruction. -The residual is stored as a set of quantized coefficients from an integer - approximation of a two-dimensional Type II Discrete Cosine Transform. -The DCT takes an $8\times 8$ array of pixel values as input and returns an - $8\times 8$ array of coefficient values. -The \term{natural ordering} of these coefficients is defined to be row-major - order, from lowest to highest frequency. -They are also often indexed in \term{zig-zag order}, as shown in - Figure~\ref{tab:zig-zag}. - -\begin{figure}[htbp] -\begin{center} -\begin{tabular}[c]{rr|c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c} - &\multicolumn{1}{r}{} & && &&&&&$c$&&& && && \\ - &\multicolumn{1}{r}{} &0&&1&&2&&3&&4&&5&&6&&7 \\\cline{3-17} - &0 & 0 &$\rightarrow$& 1 && 5 &$\rightarrow$& 6 && 14 &$\rightarrow$& 15 && 27 &$\rightarrow$& 28 \\[-0.5\defaultaddspace] - & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\ - &1 & 2 & & 4 && 7 & & 13 && 16 & & 26 && 29 & & 42 \\[-0.5\defaultaddspace] - & &$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\ - &2 & 3 & & 8 && 12 & & 17 && 25 & & 30 && 41 & & 43 \\[-0.5\defaultaddspace] - & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\ - &3 & 9 & & 11 && 18 & & 24 && 31 & & 40 && 44 & & 53 \\[-0.5\defaultaddspace] -$r$&&$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\ - &4 & 10 & & 19 && 23 & & 32 && 39 & & 45 && 52 & & 54 \\[-0.5\defaultaddspace] - & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\ - &5 & 20 & & 22 && 33 & & 38 && 46 & & 51 && 55 & & 60 \\[-0.5\defaultaddspace] - & &$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\ - &6 & 21 & & 34 && 37 & & 47 && 50 & & 56 && 59 & & 61 \\[-0.5\defaultaddspace] - & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\ - &7 & 35 &$\rightarrow$& 36 && 48 &$\rightarrow$& 49 && 57 &$\rightarrow$& 58 && 62 &$\rightarrow$& 63 -\end{tabular} -\end{center} -\caption{Zig-zag order} -\label{tab:zig-zag} -\end{figure} - -\begin{verse} -{\bf Note:} the row and column indices refer to {\em frequency number} and not - pixel locations. -The frequency numbers are defined independently of the memory organization of - the pixels. -They have been written from top to bottom here to follow conventional notation, - despite the right-handed coordinate system Theora uses for pixel locations. -%RG: I'd rather we were internally consistent and put dc at the lower left. -Many implementations of the DCT operate `in-place'. -That is, they return DCT coefficients in the same memory buffer that the - initial pixel values were stored in. -Due to the right-handed coordinate system used for pixel locations in Theora, - one must note carefully how both pixel values and DCT coefficients are - organized in memory in such a system. -\end{verse} - -DCT coefficient $(0,0)$ is called the \term{DC coefficient}. -All the other coefficients are called \term{AC coefficients}. - - -\chapter{Decoding Overview} - -This section provides a high level description of the Theora codec's - construction. -A bit-by-bit specification appears beginning in Section~\ref{sec:bitpacking}. -The later sections assume a high-level understanding of the Theora decode - process, which is provided below. - -\section{Decoder Configuration} - -Decoder setup consists of configuration of the quantization matrices and the - Huffman codebooks for the DCT coefficients, and a table of limit values for - the deblocking filter. -The remainder of the decoding pipeline is not configurable. - -\subsection{Global Configuration} - -The global codec configuration consists of a few video related fields, such as - frame rate, frame size, picture size and offset, aspect ratio, color space, - pixel format, and a version number. -The version number is divided into a major version, a minor version, amd a - minor revision number. -%r: afaik the released vp3 codec called itself 3.1 and is compatible w/ theora -%r: even though we received the in-progress 3.2 codebase -For the format defined in this specification, these are `3', `2', and - `1', respectively, in reference to Theora's origin as a successor to - the VP3.1 format. - -\subsection{Quantization Matrices} - -Theora allows up to 384 different quantization matrices to be defined, one for - each \term{quantization type}, \term{color plane} ($Y'$, $C_b$, or $C_r$), and - \term{quantization index}, \qi, which ranges from zero to 63, inclusive. -There are currently two quantization types defined, which depend on the coding - mode of the block being dequantized, as shown in Table~\ref{tab:quant-types}. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{cl}\toprule -Quantization Type & Usage \\\midrule -$0$ & INTRA-mode blocks \\ -$1$ & Blocks in any other mode. \\ -\bottomrule\end{tabular} -\end{center} -\caption{Quantization Type Indices} -\label{tab:quant-types} -\end{table} - -%r: I think 'nominally' is more specific than 'generally' here -The quantization index, on the other hand, nominally represents a progressive - range of quality levels, from low quality near zero to high quality near 63. -However, the interpretation is arbitrary, and it is possible, for example, to - partition the scale into two completely separate ranges with 32 levels each - that are meant to represent different classes of source material, or any - other arrangement that suits the encoder's requirements. - -Each quantization matrix is an $8\times 8$ matrix of 16-bit values, which is - used to quantize the output of the $8\times 8$ DCT\@. -Quantization matrices are specified using three components: a - \term{base matrix} and two \term{scale values}. -The first scale value is the \term{DC scale}, which is applied to the DC - component of the base matrix. -The second scale value is the \term{AC scale}, which is applied to all the - other components of the base matrix. -There are 64 DC scale values and 64 AC scale values, one for each \qi\ value. - -There are 64 elements in each base matrix, one for each DCT coefficient. -They are stored in natural order (cf. Section~\ref{sec:dct-coeffs}). -There is a separate set of base matrices for each quantization type and each - color plane, with up to 64 possible base matrices in each set, one for each - \qi\ value. -%r: we will mention that the given matricies must bound the \qi range -%r: in the detailed section. it's not important at this level. -Typically the bitstream contains matrices for only a sparse subset of the - possible \qi\ values. -The base matrices for the remainder of the \qi\ values are computed using - linear interpolation. -This configuration allows the encoder to adjust the quantization matrices to - approximate the complex, non-linear response of the human visual system to - different quantization errors. - -Finally, because the in-loop deblocking filter strength depends on the strength - of the quantization matrices defined in this header, a table of 64 \term{loop - filter limit values} is defined, one for each \qi\ value. - -The precise specification of how all of this information is decoded appears in - Section~\ref{sub:loop-filter-limits} and Section~\ref{sub:quant-params}. - -\subsection{Huffman Codebooks} - -Theora uses 80 configurable binary Huffman codes to represent the 32 tokens - used to encode DCT coefficients. -Each of the 32 token values has a different semantic meaning and is used to - represent single coefficient values, zero runs, combinations of the two, and - \term{End-Of-Block markers}. - -The 80 codes are divided up into five groups of 16, with each group - corresponding to a set of DCT coefficient indices. -The first group corresponds to the DC coefficient, while the remaining four - groups correspond to different subsets of the AC coefficients. -Within each frame, two pairs of 4-bit codebook indices are stored. -The first pair selects which codebooks to use from the DC coefficient group for - the $Y'$ coefficients and the $C_b$ and $C_r$ coefficients. -The second pair selects which codebooks to use from {\em all four} of the AC - coefficient groups for the $Y'$ coefficients and the $C_b$ and $C_r$ - coefficients. - -The precise specification of how the codebooks are decoded appears in - Section~\ref{sub:huffman-tables}. - -\section{High-Level Decode Process} - -\subsection{Decoder Setup} - -Before decoding can begin, a decoder MUST be initialized using the bitstream - headers corresponding to the stream to be decoded. -Theora uses three header packets; all are required, in order, by this - specification. -Once set up, decode may begin at any intra-frame packet---or even inter-frame - packets, provided the appropriate decoded reference frames have already been - decoded and cached---belonging to the Theora stream. -In Theora I, all packets after the three initial headers are intra-frame or - inter-frame packets. - -The header packets are, in order, the identification header, the comment - header, and the setup header. - -\paragraph{Identification Header} - -The identification header identifies the stream as Theora, provides a version - number, and defines the characteristics of the video stream such as frame - size. -A complete description of the identification header appears in - Section~\ref{sec:idheader}. - -\paragraph{Comment Header} - -The comment header includes user text comments (`tags') and a vendor string - for the application/library that produced the stream. -The format of the comment header is the same as that used in the Vorbis I and - Speex codecs, with slight modifications due to the use of a different bit - packing mechanism. -A complete description of how the comment header is coded appears in - Section~\ref{sec:commentheader}, along with a suggested set of tags. - -\paragraph{Setup Header} - -The setup header includes extensive codec setup information, including the - complete set of quantization matrices and Huffman codebooks needed to decode - the DCT coefficients. -A complete description of the setup header appears in - Section~\ref{sec:setupheader}. - -\subsection{Decode Procedure} - -The decoding and synthesis procedure for all video packets is fundamentally the - same, with some steps omitted for intra frames. -\begin{itemize} -\item -Decode packet type flag. -\item -Decode frame header. -\item -Decode coded block information (inter frames only). -\item -Decode macro block mode information (inter frames only). -\item -Decode motion vectors (inter frames only). -\item -Decode block-level \qi\ information. -\item -Decode DC coefficient for each coded block. -\item -Decode 1st AC coefficient for each coded block. -\item -Decode 2nd AC coefficient for each coded block. -\item -$\ldots$ -\item -Decode 63rd AC coefficient for each coded block. -\item Perform DC coefficient prediction. -\item Reconstruct coded blocks. -\item Copy uncoded bocks. -\item Perform loop filtering. -\end{itemize} - -\begin{verse} -{\bf Note:} clever rearrangement of the steps in this process is possible. -As an example, in a memory-constrained environment, one can make multiple - passes through the DCT coefficients to avoid buffering them all in memory. -On the first pass, the starting location of each coefficient is identified, and - then 64 separate get pointers are used to read in the 64 DCT coefficients - required to reconstruct each coded block in sequence. -This operation produces entirely equivalent output and is naturally perfectly - legal. -It may even be a benefit in non-memory-constrained environments due to a - reduced cache footprint. -\end{verse} - -Theora makes equivalence easy to check by defining all decoding operations in - terms of exact integer operations. -No floating-point math is required, and in particular, the implementation of - the iDCT transform MUST be followed precisely. -This prevents the decoder mismatch problem commonly associated with codecs that - provide a less rigorous transform specification. -Such a mismatch problem would be devastating to Theora, since a single rounding - error in one frame could propagate throughout the entire succeeding frame due - to DC prediction. - -\paragraph{Packet Type Decode} - -Theora uses four packet types. -The first three packet types mark each of the three Theora headers described - above. -The fourth packet type marks a video packet. -All other packet types are reserved; packets marked with a reserved type should - be ignored. - -Additionally, zero-length packets are treated as if they were an inter -frame with no blocks coded. That is, as a duplicate frame. - -\paragraph{Frame Header Decode} - -The frame header contains some global information about the current frame. -The first is the frame type field, which specifies if this is an intra frame or - an inter frame. -Inter frames predict their contents from previously decoded reference frames. -Intra frames can be independently decoded with no established reference frames. - -The next piece of information in the frame header is the list of \qi\ values - allowed in the frame. -Theora allows from one to three different \qi\ values to be used in a single - frame, each of which selects a set of six quantization matrices, one for each - quantization type (inter or intra), and one for each color plane. -The first \qi\ value is {\em always} used when dequantizing DC coefficients. -The \qi\ value used when dequantizing AC coefficients, however, can vary from - block to block. -VP3, in contrast, only allows a single \qi\ value per frame for both the DC and - AC coefficients. - -\paragraph{Coded Block Information} - -This stage determines which blocks in the frame are coded and which are - uncoded. -A \term{coded block list} is constructed which lists all the coded blocks in - coded order. -For intra frames, every block is coded, and so no data needs to be read from - the packet. - -\paragraph{Macro Block Mode Information} - -For intra frames, every block is coded in INTRA mode, and this stage is - skipped. -In inter frames a \term{coded macro block list} is constructed from the coded - block list. -Any macro block which has at least one of its luma blocks coded is considered - coded; all other macro blocks are uncoded, even if they contain coded chroma - blocks. -A coding mode is decoded for each coded macro block, and assigned to all its - constituent coded blocks. -All coded chroma blocks in uncoded macro blocks are assigned the INTER\_NOMV - coding mode. - -\paragraph{Motion Vectors} - -Intra frames are coded entirely in INTRA mode, and so this stage is skipped. -Some inter coding modes, however, require one or more motion vectors to be - specified for each macro block. -These are decoded in this stage, and an appropriate motion vector is assigned - to each coded block in the macro block. - -\paragraph{Block-Level \qi\ Information} - -If a frame allows multiple \qi\ values, the \qi\ value assigned to each block - is decoded here. -Frames that use only a single \qi\ value have nothing to decode. - -\paragraph{DCT Coefficients} - -Finally, the quantized DCT coefficients are decoded. -A list of DCT coefficients in zig-zag order for a single block is represented - by a list of tokens. -A token can take on one of 32 different values, each with a different semantic - meaning. -A single token can represent a single DCT coefficient, a run of zero - coefficients within a single block, a combination of a run of zero - coefficients followed by a single non-zero coefficient, an - \term{End-Of-Block marker}, or a run of EOB markers. -EOB markers signify that the remainder of the block is one long zero run. -Unlike JPEG and MPEG, there is no requirement for each block to end with - a special marker. -If non-EOB tokens yield values for all 64 of the coefficients in a block, then - no EOB marker occurs. - -Each token is associated with a specific \term{token index} in a block. -For single-coefficient tokens, this index is the zig-zag index of the token in - the block. -For zero-run tokens, this index is the zig-zag index of the {\em first} - coefficient in the run. -For combination tokens, the index is again the zig-zag index of the first - coefficient in the zero run. -For EOB markers, which signify that the remainder of the block is one long zero - run, the index is the zig-zag index of the first zero coefficient in that run. -For EOB runs, the token index is that of the first EOB marker in the run. -Due to zero runs and EOB markers, a block does not have to have a token for - every zig-zag index. - -Tokens are grouped in the stream by token index, not by the block they - originate from. -This means that for each zig-zag index in turn, the tokens with that index from - {\em all} the coded blocks are coded in coded block order. -When decoding, a current token index is maintained for each coded block. -This index is advanced by the number of coefficients that are added to the - block as each token is decoded. -After fully decoding all the tokens with token index \ti, the current token - index of every coded block will be \ti\ or greater. - -If an EOB run of $n$ blocks is decoded at token index \ti, then it ends the - next $n$ blocks in coded block order whose current token index is equal to - \ti, but not greater. -If there are fewer than $n$ blocks with a current token index of \ti, then the - decoder goes through the coded block list again from the start, ending blocks - with a current token index of $\ti+1$, and so on, until $n$ blocks have been - ended. - -Tokens are read by parsing a Huffman code that depends on \ti\ and the color - plane of the next coded block whose current token index is equal to \ti, but - not greater. -The Huffman codebooks are selected on a per-frame basis from the 80 codebooks - defined in the setup header. -Many tokens have a fixed number of \term{extra bits} associated with them. -These bits are read from the packet immediately after the token is decoded. -These are used to define things such as coefficient magnitude, sign, and the - length of runs. - -\paragraph{DC Prediction} - -After the coefficients for each block are decoded, the quantized DC value of - each block is adjusted based on the DC values of its neighbors. -This adjustment is performed by scanning the blocks in raster order, not coded - block order. - -\paragraph{Reconstruction} - -Finally, using the coding mode, motion vector (if applicable), quantized - coefficient list, and \qi\ value defined for each block, all the coded blocks - are reconstructed. -The DCT coefficients are dequantized, an inverse DCT transform is applied, and - the predictor is formed from the coding mode and motion vector and added to - the result. - -\paragraph{Loop Filtering} - -To complete the reconstructed frame, an ``in-loop'' deblocking filter is - applied to the edges of all coded blocks. - - -\chapter{Video Formats} - -This section gives a precise description of the video formats that Theora is - capable of storing. -The Theora bitstream is capable of handling video at any arbitrary resolution - up to $1048560\times 1048560$. -Such video would require almost three terabytes of storage per frame for - uncompressed data, so compliant decoders MAY refuse to decode images with - sizes beyond their capabilities. -%TODO: What MUST a "compliant" decoder accept? -%TODO: What SHOULD a decoder use for an upper bound? (derive from total amount -%TODO: of memory and memory bandwidth) -%TODO: Any lower limits? -%TODO: We really need hardware device profiles, but such things should be -%TODO: developed with input from the hardware community. -%TODO: And even then sometimes they're useless - -The remainder of this section talks about two specific aspects of the video - format: the color space and the pixel format. -The first describes how color is represented and how to transform that color - representation into a device independent color space such as CIE $XYZ$ (1931). -The second describes the various schemes for sampling the color values in time - and space. - -\section{Color Space Conventions} - -There are a large number of different color standards used in digital video. -Since Theora is a lossy codec, it restricts itself to only a few of them to - simplify playback. -Unlike the alternate method of describing all the parameters of the color - model, this allows a few dedicated routines for color conversion to be written - and heavily optimized in a decoder. -More flexible conversion functions should instead be specified in an encoder, - where additional computational complexity is more easily tolerated. -The color spaces were selected to give a fair representation of color standards - in use around the world today. -Most of the standards that do not exactly match one of these can be converted - to one fairly easily. - -All Theora color spaces are $Y'C_bC_r$ color spaces with one luma channel and - two chroma channels. -Each channel contains 8-bit discrete values in the range $0\ldots255$, which - represent non-linear gamma pre-corrected signals. -The Theora identification header contains an 8-bit value that describes the - color space. -This merely selects one of the color spaces available from an enumerated list. -Currently, only two color spaces are defined, with a third possibility that - indicates the color space is ``unknown". - -\section{Color Space Conversions and Parameters} -\label{sec:color-xforms} - -The parameters which describe the conversions between each color space are - listed below. -These are the parameters needed to map colors from the encoded $Y'C_bC_r$ - representation to the device-independent color space CIE $XYZ$ (1931). -These parameters define abstract mathematical conversion functions which are - infinitely precise. -The accuracy and precision with which the conversions are performed in a real - system is determined by the quality of output desired and the available - processing power. -Exact decoder output is defined by this specification only in the original - $Y'C_bC_r$ space. - -\begin{description} -\item[$Y'C_bC_r$ to $Y'P_bP_r$:] -\vspace{\baselineskip}\hfill - -This conversion takes 8-bit discrete values in the range $[0\ldots255]$ and - maps them to real values in the range $[0\ldots1]$ for Y and - $[-\frac{1}{2}\ldots\frac{1}{2}]$ for $P_b$ and $P_r$. -Because some values may fall outside the offset and excursion defined for each - channel in the $Y'C_bC_r$ space, the results may fall outside these ranges in - $Y'P_bP_r$ space. -No clamping should be done at this stage. - -\begin{align} -Y'_\mathrm{out} & = - \frac{Y'_\mathrm{in}-\mathrm{Offset}_Y}{\mathrm{Excursion}_Y} \\ -P_b & = - \frac{C_b-\mathrm{Offset}_{C_b}}{\mathrm{Excursion}_{C_b}} \\ -P_r & = - \frac{C_r-\mathrm{Offset}_{C_r}}{\mathrm{Excursion}_{C_r}} -\end{align} - -Parameters: $\mathrm{Offset}_{Y,C_b,C_r}$, $\mathrm{Excursion}_{Y,C_b,C_r}$. - -\item[$Y'P_bP_r$ to $R'G'B'$:] -\vspace{\baselineskip}\hfill - -This conversion takes the one luma and two chroma channel representation and - maps it to the non-linear $R'G'B'$ space used to drive actual output devices. -Values should be clamped into the range $[0\ldots1]$ after this stage. - -\begin{align} -R' & = Y'+2(1-K_r)P_r \\ -G' & = Y'-2\frac{(1-K_b)K_b}{1-K_b-K_r}P_b-2\frac{(1-K_r)K_r}{1-K_b-K_r}P_r\\ -B' & = Y'+2(1-K_b)P_b -\end{align} - -Parameters: $K_b,K_r$. - -\item[$R'G'B'$ to $RGB$ (Output device gamma correction):] -\vspace{\baselineskip}\hfill - -This conversion takes the non-linear $R'G'B'$ voltage levels and maps them to - linear light levels produced by the actual output device. -Note that this conversion is only that of the output device, and its inverse is - {\em not} that used by the input device. -Because a dim viewing environment is assumed in most television standards, the - overall gamma between the input and output devices is usually around $1.1$ to - $1.2$, and not a strict $1.0$. - -For calibration with actual output devices, the model -\begin{align} -L & =(E'+\Delta)^\gamma -\end{align} - should be used, with $\Delta$ the free parameter and $\gamma$ held fixed to - the value specified in this document. -The conversion function presented here is an idealized version with $\Delta=0$. - -\begin{align} -R & = R'^\gamma \\ -G & = G'^\gamma \\ -B & = B'^\gamma -\end{align} - -Parameters: $\gamma$. - -\item[$RGB$ to $R'G'B'$ (Input device gamma correction):] -\vspace{\baselineskip}\hfill - -%TODO: Tag section as non-normative - -This conversion takes linear light levels and maps them to the non-linear - voltage levels produced in the actual input device. -This information is merely informative. -It is not required for building a decoder or for converting between the various - formats and the actual output capabilities of a particular device. - -A linear segment is introduced on the low end to reduce noise in dark areas of - the image. -The rest of the scale is adjusted so that the power segment of the curve - intersects the linear segment with the proper slope, and so that it still maps - 0 to 0 and 1 to 1. - -\begin{align} -R' & = \left\{ -\begin{array}{ll} -\alpha R, & 0\le R<\delta \\ -(1+\epsilon)R^\beta-\epsilon, & \delta\le R\le1 -\end{array}\right. \\ -G' & = \left\{ -\begin{array}{ll} -\alpha G, & 0\le G<\delta \\ -(1+\epsilon)G^\beta-\epsilon, & \delta\le G\le1 -\end{array}\right. \\ -B' & = \left\{ -\begin{array}{ll} -\alpha B, & 0\le B<\delta \\ -(1+\epsilon)B^\beta-\epsilon, & \delta\le B\le1 -\end{array}\right. -\end{align} - -Parameters: $\beta$, $\alpha$, $\delta$, $\epsilon$. - -\item[$RGB$ to CIE $XYZ$ (1931):] -\vspace{\baselineskip}\hfill - -This conversion maps a device-dependent linear RGB space to the - device-independent linear CIE $XYZ$ space. -The parameters are the CIE chromaticity coordinates of the three - primaries---red, green, and blue---as well as the chromaticity coordinates - of the white point of the device. -This is how hardware manufacturers and standards typically describe a - particular $RGB$ space. -The math required to convert these parameters into a useful transformation - matrix is reproduced below. - -\begin{align} -F & = -\left[\begin{array}{ccc} -\frac{x_r}{y_r} & \frac{x_g}{y_g} & \frac{x_b}{y_b} \\ -1 & 1 & 1 \\ -\frac{1-x_r-y_r}{y_r} & \frac{1-x_g-y_g}{y_g} & \frac{1-x_b-y_b}{y_b} -\end{array}\right] \\ -\left[\begin{array}{c} -s_r \\ -s_g \\ -s_b -\end{array}\right] & = -F^{-1}\left[\begin{array}{c} -\frac{x_w}{y_w} \\ -1 \\ -\frac{1-x_w-y_w}{y_w} -\end{array}\right] \\ -\left[\begin{array}{c} -X \\ -Y \\ -Z -\end{array}\right] & = -F\left[\begin{array}{c} -s_rR \\ -s_gG \\ -s_bB -\end{array}\right] -\end{align} -Parameters: $x_r,x_g,x_b,x_w, y_r,y_g,y_b,y_w$. - -\end{description} - -\section{Available Color Spaces} -\label{sec:colorspaces} - -These are the color spaces currently defined for use by Theora video. -Each one has a short name, with which it is referred to in this document, and - a more detailed specification of the standards from which its parameters are - derived. -Some standards do not specify all the parameters necessary. -For these unspecified parameters, this document serves as the definition of - what should be used when encoding or decoding Theora video. - -\subsection{Rec.~470M (Rec.~ITU-R~BT.470-6 System M/NTSC with - Rec.~ITU-R~BT.601-5)} -\label{sec:470m} - -This color space is used by broadcast television and DVDs in much of the - Americas, Japan, Korea, and the Union of Myanmar \cite{rec470}. -This color space may also be used for System M/PAL (Brazil), with an - appropriate conversion supplied by the encoder to compensate for the - different gamma value. -See Section~\ref{sec:470bg} for an appropriate gamma value to assume for M/PAL - input. - -In the US, studio monitors are adjusted to a D65 white point - ($x_w,y_w=0.313,0.329$). -In Japan, studio monitors are adjusted to a D white of 9300K - ($x_w,y_w=0.285,0.293$). - -Rec.~470 does not specify a digital encoding of the color signals. -For Theora, Rec.~ITU-R~BT.601-5 \cite{rec601} is used, starting from the - $R'G'B'$ signals specified by Rec.~470. - -Rec.~470 does not specify an input gamma function. -For Theora, the Rec.~709 \cite{rec709} input function is assumed. -This is the same as that specified by SMPTE 170M \cite{smpte170m}, which claims - to reflect modern practice in the creation of NTSC signals circa 1994. - -The parameters for all the color transformations defined in - Section~\ref{sec:color-xforms} are given in Table~\ref{tab:470m}. - -\begin{table}[htb] -\begin{align*} -\mathrm{Offset}_{Y,C_b,C_r} & = (16, 128, 128) \\ -\mathrm{Excursion}_{Y,C_b,C_r} & = (219, 224, 224) \\ -K_r & = 0.299 \\ -K_b & = 0.114 \\ -\gamma & = 2.2 \\ -\beta & = 0.45 \\ -\alpha & = 4.5 \\ -\delta & = 0.018 \\ -\epsilon & = 0.099 \\ -x_r,y_r & = 0.67, 0.33 \\ -x_g,y_g & = 0.21, 0.71 \\ -x_b,y_b & = 0.14, 0.08 \\ -\text{(Illuminant C) } x_w,y_w & = 0.310, 0.316 \\ -\end{align*} -\caption{Rec.~470M Parameters} -\label{tab:470m} -\end{table} - -\subsection{Rec.~470BG (Rec.~ITU-R~BT.470-6 Systems B and G with - Rec.~ITU-R~BT.601-5)} -\label{sec:470bg} - -This color space is used by the PAL and SECAM systems in much of the rest of - the world \cite{rec470} -This can be used directly by systems (B, B1, D, D1, G, H, I, K, N)/PAL and (B, - D, G, H, K, K1, L)/SECAM\@. - -\begin{verse} -{\bf Note:} the Rec.~470BG chromaticity values are different from those - specified in Rec.~470M\@. -When PAL and SECAM systems were first designed, they were based upon the same - primaries as NTSC\@. -However, as methods of making color picture tubes have changed, the primaries - used have changed as well. -The U.S. recommends using correction circuitry to approximate the existing, - standard NTSC primaries. -Current PAL and SECAM systems have standardized on primaries in accord with - more recent technology. -\end{verse} - -Rec.~470 provisionally permits the use of the NTSC chromaticity values (given - in Section~\ref{sec:470m}) with legacy PAL and SECAM equipment. -In Theora, material must be decoded assuming the new PAL and SECAM primaries. -Material intended for display on old legacy devices should be converted by the - decoder. - -The official Rec.~470BG specifies a gamma value of $\gamma=2.8$. -However, in practice this value is unrealistically high \cite{Poyn97}. -Rec.~470BG states that the overall system gamma should be approximately - $\gamma\beta=1.2$. -Since most cameras pre-correct with a gamma value of $\beta=0.45$, - this suggests an output device gamma of approximately $\gamma=2.67$. -This is the value recommended for use with PAL systems in Theora. - -Rec.~470 does not specify a digital encoding of the color signals. -For Theora, Rec.~ITU-R~BT.601-5 \cite{rec601} is used, starting from the - $R'G'B'$ signals specified by Rec.~470. - -Rec.~470 does not specify an input gamma function. -For Theora, the Rec 709 \cite{rec709} input function is assumed. - -The parameters for all the color transformations defined in - Section~\ref{sec:color-xforms} are given in Table~\ref{tab:470bg}. - -\begin{table}[htb] -\begin{align*} -\mathrm{Offset}_{Y,C_b,C_r} & = (16, 128, 128) \\ -\mathrm{Excursion}_{Y,C_b,C_r} & = (219, 224, 224) \\ -K_r & = 0.299 \\ -K_b & = 0.114 \\ -\gamma & = 2.67 \\ -\beta & = 0.45 \\ -\alpha & = 4.5 \\ -\delta & = 0.018 \\ -\epsilon & = 0.099 \\ -x_r,y_r & = 0.64, 0.33 \\ -x_g,y_g & = 0.29, 0.60 \\ -x_b,y_b & = 0.15, 0.06 \\ -\text{(D65) } x_w,y_w & = 0.313, 0.329 \\ -\end{align*} -\caption{Rec.~470BG Parameters} -\label{tab:470bg} -\end{table} - -\section{Pixel Formats} -\label{sec:pixfmts} - -Theora supports several different pixel formats, each of which uses different - subsampling for the chroma planes relative to the luma plane. -A decoder may need to recover a full resolution chroma plane with samples - co-sited with the luma plane in order to convert to RGB for display or perform - other processing. -Decoders can assume that the chroma signal satisfies the Nyquist-Shannon - sampling theorem. -The ideal low-pass reconstruction filter this implies is not practical, but any - suitable approximation can be used, depending on the available computing - power. -Decoders MAY simply use a box filter, assigning to each luma sample the chroma - sample closest to it. -Encoders would not go wrong in assuming that this will be the most common - approach. - -\subsection{4:4:4 Subsampling} -\label{sec:444} - -All three color planes are stored at full resolution---each pixel has a $Y'$, - a $C_b$ and a $C_r$ value (see Figure~\ref{fig:pixel444}). -The samples in the different planes are all at co-located sites. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{pixel444} -\end{center} -\caption{Pixels encoded 4:4:4} -\label{fig:pixel444} -\end{figure} - -% Figure. -%YRB YRB -% -% -% -%YRB YRB -% -% -% - - -\subsection{4:2:2 Subsampling} -\label{sec:422} - -The $C_b$ and $C_r$ planes are stored with half the horizontal resolution of - the $Y'$ plane. -Thus, each of these planes has half the number of horizontal blocks as the luma - plane (see Figure~\ref{fig:pixel422}). -Similarly, they have half the number of horizontal super blocks, rounded up. -Macro blocks are defined across color planes, and so their number does not - change, but each macro block contains half as many chroma blocks. - -The chroma samples are vertically aligned with the luma samples, but - horizontally centered between two luma samples. -Thus, each luma sample has a unique closest chroma sample. -A horizontal phase shift may be required to produce signals which use different - horizontal chroma sampling locations for compatibility with different systems. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{pixel422} -\end{center} -\caption{Pixels encoded 4:2:2} -\label{fig:pixel422} -\end{figure} - -% Figure. -%Y RB Y Y RB Y -% -% -% -%Y RB Y Y RB Y -% -% -% - -\subsection{4:2:0 Subsampling} -\label{sec:420} - -The $C_b$ and $C_r$ planes are stored with half the horizontal and half the - vertical resolution of the $Y'$ plane. -Thus, each of these planes has half the number of horizontal blocks and half - the number of vertical blocks as the luma plane, for a total of one quarter - the number of blocks (see Figure~\ref{fig:pixel420}). -Similarly, they have half the number of horizontal super blocks and half the - number of vertical super blocks, rounded up. -Macro blocks are defined across color planes, and so their number does not - change, but each macro block contains within it one quarter as many - chroma blocks. - -The chroma samples are vertically and horizontally centered between four luma - samples. -Thus, each luma sample has a unique closest chroma sample. -This is the same sub-sampling pattern used with JPEG, MJPEG, and MPEG-1, and - was inherited from VP3. -A horizontal or vertical phase shift may be required to produce signals which - use different chroma sampling locations for compatibility with different - systems. - -\begin{figure}[htbp] -\begin{center} -\includegraphics{pixel420} -\end{center} -\caption{Pixels encoded 4:2:0} -\label{fig:pixel420} -\end{figure} - -% Figure. -%Y Y Y Y -% -% RB RB -% -%Y Y Y Y -% -% -% -%Y Y Y Y -% -% RB RB -% -%Y Y Y Y -% -% -% - -\subsection{Subsampling and the Picture Region} - -Although the frame size must be an integral number of macro blocks, and thus - both the number of pixels and the number of blocks in each direction must be - even, no such requirement is made of the picture region. -Thus, when using subsampled pixel formats, careful attention must be paid to - which chroma samples correspond to which luma samples. - -As mentioned above, for each pixel format, there is a unique chroma sample that - is the closest to each luma sample. -When cropping the chroma planes to the picture region, all the chroma samples - corresponding to a luma sample in the cropped picture region must be included. -Thus, when dividing the width or height of the picture region by two to obtain - the size of the subsampled chroma planes, they must be rounded up. - -Furthermore, the sampling locations are defined relative to the frame, - {\em not} the picture region. -When using the 4:2:2 and 4:2:0 formats, the locations of chroma samples - relative to the luma samples depends on whether or not the X offset of the - picture region is odd. -If the offset is even, each column of chroma samples corresponds to two columns - of luma samples (see Figure~\ref{fig:pic_even} for an example). -The only exception is if the width is odd, in which case the last column - corresponds to only one column of luma samples (see Figure~\ref{fig:pic_even_odd}). -If the offset is odd, then the first column of chroma samples corresponds to - only one column of luma samples, while the remaining columns each correspond - to two (see Figure~\ref{fig:pic_odd}). -In this case, if the width is even, the last column again corresponds to only - one column of luma samples (see Figure~\ref{fig:pic_odd_even}). - -A similar process is followed with the rows of a picture region of odd height - encoded in the 4:2:0 format. -If the Y offset is even, each row of chroma samples corresponds to two rows of - luma samples (see Figure~\ref{fig:pic_even}), except with an odd height, where - the last row corresponds to one row of chroma luna samples only (see - Figure~\ref{fig:pic_even_odd}). -If the offset is odd, then it is the first row of chroma samples which - corresponds to only one row of luma samples, while the remaining rows each - correspond to two (Figure~\ref{fig:pic_odd}), except with an even height, - where the last row also corresponds to one (Figure~\ref{fig:pic_odd_even}). - -Encoders should be aware of these differences in the subsampling when using an - even or odd offset. -In the typical case, with an even width and height, where one expects two rows - or columns of luma samples for every row or column of chroma samples, the - encoder must take care to ensure that the offsets used are both even. - -\begin{figure}[htbp] -\begin{center} -\includegraphics[width=\textwidth]{pic_even} -\end{center} -\caption{Pixel correspondence between color planes with even picture - offset and even picture size} -\label{fig:pic_even} -\end{figure} - -\begin{figure}[htbp] -\begin{center} -\includegraphics[width=\textwidth]{pic_even_odd} -\end{center} -\caption{Pixel correspondence with even picture offset and - odd picture size} -\label{fig:pic_even_odd} -\end{figure} - -\begin{figure}[htbp] -\begin{center} -\includegraphics[width=\textwidth]{pic_odd} -\end{center} -\caption{Pixel correspondence with odd picture offset and - odd picture size} -\label{fig:pic_odd} -\end{figure} - -\begin{figure}[htbp] -\begin{center} -\includegraphics[width=\textwidth]{pic_odd_even} -\end{center} -\caption{Pixel correspondence with odd picture offset and - even picture size} -\label{fig:pic_odd_even} -\end{figure} - - -\chapter{Bitpacking Convention} -\label{sec:bitpacking} - -\section{Overview} - -The Theora codec uses relatively unstructured raw packets containing - binary integer fields of arbitrary width. -Logically, each packet is a bitstream in which bits are written one-by-one by - the encoder and then read one-by-one in the same order by the decoder. -Most current binary storage arrangements group bits into a native storage unit - of eight bits (octets), sixteen bits, thirty-two bits, or less commonly other - fixed sizes. -The Theora bitpacking convention specifies the correct mapping of the logical - packet bitstream into an actual representation in fixed-width units. - -\subsection{Octets and Bytes} - -In most contemporary architectures, a `byte' is synonymous with an `octect', - that is, eight bits. -For purposes of the bitpacking convention, a byte implies the smallest native - integer storage representation offered by a platform. -Modern file systems invariably offer bytes as the fundamental atom of storage. - -The most ubiquitous architectures today consider a `byte' to be an octet. -Note, however, that the Theora bitpacking convention is still well defined for - any native byte size; an implementation can use the native bit-width of a - given storage system. -This document assumes that a byte is one octet for purposes of example only. - -\subsection{Words and Byte Order} - -A `word' is an integer size that is a grouped multiple of the byte size. -Most architectures consider a word to be a group of two, four, or eight bytes. -Each byte in the word can be ranked by order of `significance', e.g.\ the - significance of the bits in each byte when storing a binary integer in the - word. -Several byte orderings are possible in a word. -The common ones are -\begin{itemize} -\item{Big-endian:} -in which the most significant byte comes first, e.g.\ 3-2-1-0, -\item{Little-endian:} -in which the least significant byte comes first, e.g.\ 0-1-2-3, and -\item{Mixed-endian:} -one of the less-common orderings that cannot be put into the above two - categories, e.g.\ 3-1-2-0 or 0-2-1-3. -\end{itemize} - -The Theora bitpacking convention specifies storage and bitstream manipulation - at the byte, not word, level. -Thus host word ordering is of a concern only during optimization, when writing - code that operates on a word of storage at a time rather than a byte. -Logically, bytes are always encoded and decoded in order from byte zero through - byte $n$. - -\subsection{Bit Order} - -A byte has a well-defined `least significant' bit (LSb), which is the only bit - set when the byte is storing the two's complement integer value $+1$. -A byte's `most significant' bit (MSb) is at the opposite end. -Bits in a byte are numbered from zero at the LSb to $n$ for the MSb, where - $n=7$ in an octet. - -\section{Coding Bits into Bytes} - -The Theora codec needs to encode arbitrary bit-width integers from zero to 32 - bits wide into packets. -These integer fields are not aligned to the boundaries of the byte - representation; the next field is read at the bit position immediately - after the end of the previous field. - -The decoder logically unpacks integers by first reading the MSb of a binary - integer from the logical bitstream, followed by the next most significant - bit, etc., until the required number of bits have been read. -When unpacking the bytes into bits, the decoder begins by reading the MSb of - the integer to be read from the most significant unread bit position of the - source byte, followed by the next-most significant bit position of the - destination integer, and so on up to the requested number of bits. -Note that this differs from the Vorbis I codec, which - begins decoding with the LSb of the source integer, reading it from the - LSb of the source byte. -When all the bits of the current source byte are read, decoding continues with - the MSb of the next byte. -Any unfilled bits in the last byte of the packet MUST be cleared to zero by the - encoder. - -\subsection{Signedness} - -The binary integers decoded by the above process may be either signed or - unsigned. -This varies from integer to integer, and this specification - indicates how each value should be interpreted as it is read. -That is, depending on context, the three bit binary pattern \bin{111} can be - taken to represent either `$7$' as an unsigned integer or `$-1$' as a signed, - two's complement integer. - -\subsection{Encoding Example} - -The following example shows the state of an (8-bit) byte stream after several - binary integers are encoded, including the location of the put pointer for the - next bit to write to and the total length of the stream in bytes. - -Encode the 4 bit unsigned integer value `12' (\bin{1100}) into an empty byte - stream. - -\begin{tabular}{r|ccccccccl} -\multicolumn{1}{r}{}& &&&&$\downarrow$&&&& \\ - & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9} -byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} & - 0 & 0 & 0 & 0 & $\leftarrow$ \\ -byte 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -byte 2 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -byte 3 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -\multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\ -byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & -byte stream length: 1 byte -\end{tabular} -\vspace{\baselineskip} - -Continue by encoding the 3 bit signed integer value `-1' (\bin{111}). - -\begin{tabular}{r|ccccccccl} -\multicolumn{1}{r}{} &&&&&&&&$\downarrow$& \\ - & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9} -byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} & - \textbf{1} & \textbf{1} & \textbf{1} & 0 & $\leftarrow$ \\ -byte 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -byte 2 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -byte 3 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -\multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\ -byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & -byte stream length: 1 byte -\end{tabular} -\vspace{\baselineskip} - -Continue by encoding the 7 bit integer value `17' (\bin{0010001}). - -\begin{tabular}{r|ccccccccl} -\multicolumn{1}{r}{} &&&&&&&$\downarrow$&& \\ - & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9} -byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} & - \textbf{1} & \textbf{1} & \textbf{1} & \textbf{0} & \\ -byte 1 & \textbf{0} & \textbf{1} & \textbf{0} & \textbf{0} & - \textbf{0} & \textbf{1} & 0 & 0 & $\leftarrow$ \\ -byte 2 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -byte 3 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ -\multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\ -byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & -byte stream length: 2 bytes -\end{tabular} -\vspace{\baselineskip} - -Continue by encoding the 13 bit integer value `6969' (\bin{11011\ 00111001}). - -\begin{tabular}{r|ccccccccl} -\multicolumn{1}{r}{} &&&&$\downarrow$&&&&& \\ - & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9} -byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} & - \textbf{1} & \textbf{1} & \textbf{1} & \textbf{0} & \\ -byte 1 & \textbf{0} & \textbf{1} & \textbf{0} & \textbf{0} & - \textbf{0} & \textbf{1} & \textbf{1} & \textbf{1} & \\ -byte 2 & \textbf{0} & \textbf{1} & \textbf{1} & \textbf{0} & - \textbf{0} & \textbf{1} & \textbf{1} & \textbf{1} & \\ -byte 3 & \textbf{0} & \textbf{0} & \textbf{1} & - 0 & 0 & 0 & 0 & 0 & $\leftarrow$ \\ -\multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\ -byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & -byte stream length: 4 bytes -\end{tabular} -\vspace{\baselineskip} - -\subsection{Decoding Example} - -The following example shows the state of the (8-bit) byte stream encoded in the - previous example after several binary integers are decoded, including the - location of the get pointer for the next bit to read. - -Read a two bit unsigned integer from the example encoded above. - -\begin{tabular}{r|ccccccccl} -\multicolumn{1}{r}{} &&&$\downarrow$&&&&&& \\ - & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9} -byte 0 & \textbf{1} & \textbf{1} & 0 & 0 & 1 & 1 & 1 & 0 & $\leftarrow$ \\ -byte 1 & 0 & 1 & 0 & 0 & 0 & 1 & 1 & 1 & \\ -byte 2 & 0 & 1 & 1 & 0 & 0 & 1 & 1 & 1 & \\ -byte 3 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 & -byte stream length: 4 bytes -\end{tabular} -\vspace{\baselineskip} - -Value read: 3 (\bin{11}). - -Read another two bit unsigned integer from the example encoded above. - -\begin{tabular}{r|ccccccccl} -\multicolumn{1}{r}{} &&&&&$\downarrow$&&&& \\ - & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9} -byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} & - 1 & 1 & 1 & 0 & $\leftarrow$ \\ -byte 1 & 0 & 1 & 0 & 0 & 0 & 1 & 1 & 1 & \\ -byte 2 & 0 & 1 & 1 & 0 & 0 & 1 & 1 & 1 & \\ -byte 3 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 & -byte stream length: 4 bytes -\end{tabular} -\vspace{\baselineskip} - -Value read: 0 (\bin{00}). - -Two things are worth noting here. -\begin{itemize} -\item -Although these four bits were originally written as a single four-bit integer, - reading some other combination of bit-widths from the bitstream is well - defined. -No artificial alignment boundaries are maintained in the bitstream. -\item -The first value is the integer `$3$' only because the context stated we were - reading an unsigned integer. -Had the context stated we were reading a signed integer, the returned value - would have been the integer `$-1$'. -\end{itemize} - -\subsection{End-of-Packet Alignment} - -The typical use of bitpacking is to produce many independent byte-aligned - packets which are embedded into a larger byte-aligned container structure, - such as an Ogg transport bitstream. -Externally, each bitstream encoded as a byte stream MUST begin and end on a - byte boundary. -Often, the encoded packet bitstream is not an integer number of bytes, and so - there is unused space in the last byte of a packet. - -%r: I think the generality here is necessary to be consistent with our assertions -%r: elsewhere about being independent of transport and byte width -When a Theora encoder produces packets for embedding in a byte-aligned - container, unused space in the last byte of a packet is always zeroed during - the encoding process. -Thus, should this unused space be read, it will return binary zeroes. -There is no marker pattern or stuffing bits that will allow the decoder to - obtain the exact size, in bits, of the original bitstream. -This knowledge is not required for decoding. - -Attempting to read past the end of an encoded packet results in an - `end-of-packet' condition. -Any further read operations after an `end-of-packet' condition shall also - return `end-of-packet'. -Unlike Vorbis, Theora does not use truncated packets as a normal mode of - operation. -Therefore if a decoder encounters the `end-of-packet' condition during normal - decoding, it may attempt to use the bits that were read to recover as much of - encoded data as possible, signal a warning or error, or both. - -\subsection{Reading Zero Bit Integers} - -Reading a zero bit integer returns the value `$0$' and does not increment - the stream pointer. -Reading to the end of the packet, but not past the end, so that an - `end-of-packet' condition is not triggered, and then reading a zero bit - integer shall succeed, returning `$0$', and not trigger an `end-of-packet' - condition. -Reading a zero bit integer after a previous read sets the `end-of-packet' - condition shall fail, also returning `end-of-packet'. - -\chapter{Bitstream Headers} -\label{sec:headers} - -A Theora bitstream begins with three header packets. -The header packets are, in order, the identification header, the comment - header, and the setup header. -All are required for decode compliance. -An end-of-packet condition encountered while decoding the identification or - setup header packets renders the stream undecodable. -An end-of-packet condition encountered while decode the comment header is a - non-fatal error condition, and MAY be ignored by a decoder. - -\paragraph{VP3 Compatibility} - -VP3 relies on the headers provided by its container, usually either AVI or - Quicktime. -As such, several parameters available in these headers are not available to VP3 - streams. -These are indicated as they appear in the sections below. - -\section{Common Header Decode} -\label{sub:common-header} - -\begin{figure}[Htbp] -\begin{center} -\begin{verbatim} - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | header type | `t' | `h' | `e' | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | `o' | `r' | `a' | data... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | ... header-specific data ... | - | ... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -\end{verbatim} -\end{center} -\caption{Common Header Packet Layout} -\label{fig:commonheader} -\end{figure} - - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{HEADERTYPE} & Integer & 8 & No & The type of the header being - decoded. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:} None. -\medskip - -Each header packet begins with the same header fields, which are decoded as - follows: - -\begin{enumerate} -\item -Read an 8-bit unsigned integer as \bitvar{HEADERTYPE}. -If the most significant bit of this integer is not set, then stop. -This is not a header packet. -\item -Read 6 8-bit unsigned integers. -If these do not have the values \hex{74}, \hex{68}, \hex{65}, \hex{6F}, - \hex{72}, and \hex{61}, respectively, then stop. -This stream is not decodable by this specification. -These values correspond to the ASCII values of the characters `t', `h', `e', - `o', `r', and `a'. -\end{enumerate} - -Decode continues according to \bitvar{HEADERTYPE}. -The identification header is type \hex{80}, the comment header is type - \hex{81}, and the setup header is type \hex{82}. -These packets must occur in the order: identification, comment, setup. -%r: I clarified the initial-bit scheme here -%TBT: Dashes let the reader know they'll have to pick up the rest of the -%TBT: sentence after the explanatory phrase. -%TBT: Otherwise it just sounds like the bit must exist. -All header packets have the most significant bit of the type - field---which is the initial bit in the packet---set. -This distinguishes them from video data packets in which the first bit - is unset. -% extra header packets are a feature Dan argued for way back when for -% backward-compatible extensions (and icc colourspace for example) -% I think it's reasonable -%TBT: You can always just stick more stuff in the setup header. -Packets with other header types (\hex{83}--\hex{FF}) are reserved and MUST be - ignored. - -\section{Identification Header Decode} -\label{sec:idheader} - -\begin{figure}[Htbp] -\begin{center} -\begin{verbatim} - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 0x80 | `t' | `h' | `e' | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | `o' | `r' | `a' | VMAJ | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | VMIN | VREV | FMBW | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | FMBH | PICW... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | ...PICW | PICH | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PICX | PICY | FRN... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | ...FRN | FRD... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | ...FRD | PARN... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | ...PARN | PARD | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | CS | NOMBR | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | QUAL | KFGSHIFT| PF| Res | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -\end{verbatim} -\end{center} -\caption{Identification Header Packet} -\label{fig:idheader} -\end{figure} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{VMAJ} & Integer & 8 & No & The major version number. \\ -\bitvar{VMIN} & Integer & 8 & No & The minor version number. \\ -\bitvar{VREV} & Integer & 8 & No & The version revision number. \\ -\bitvar{FMBW} & Integer & 16 & No & The width of the frame in macro - blocks. \\ -\bitvar{FMBH} & Integer & 16 & No & The height of the frame in macro - blocks. \\ -\bitvar{NSBS} & Integer & 32 & No & The total number of super blocks in a - frame. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a - frame. \\ -\bitvar{PICW} & Integer & 20 & No & The width of the picture region in - pixels. \\ -\bitvar{PICH} & Integer & 20 & No & The height of the picture region in - pixels. \\ -\bitvar{PICX} & Integer & 8 & No & The X offset of the picture region in - pixels. \\ -\bitvar{PICY} & Integer & 8 & No & The Y offset of the picture region in - pixels. \\ -\bitvar{FRN} & Integer & 32 & No & The frame-rate numerator. \\ -\bitvar{FRD} & Integer & 32 & No & The frame-rate denominator. \\ -\bitvar{PARN} & Integer & 24 & No & The pixel aspect-ratio numerator. \\ -\bitvar{PARD} & Integer & 24 & No & The pixel aspect-ratio denominator. \\ -\bitvar{CS} & Integer & 8 & No & The color space. \\ -\bitvar{PF} & Integer & 2 & No & The pixel format. \\ -\bitvar{NOMBR} & Integer & 24 & No & The nominal bitrate of the stream, in - bits per second. \\ -\bitvar{QUAL} & Integer & 6 & No & The quality hint. \\ -\bitvar{KFGSHIFT} & Integer & 5 & No & The amount to shift the key frame - number by in the granule position. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:} None. -\medskip - -The identification header is a short header with only a few fields used to - declare the stream definitively as Theora and provide detailed information - about the format of the fully decoded video data. -The identification header is decoded as follows: - -\begin{enumerate} -\item -Decode the common header fields according to the procedure described in - Section~\ref{sub:common-header}. -If \bitvar{HEADERTYPE} returned by this procedure is not \hex{80}, then stop. -This packet is not the identification header. -\item -Read an 8-bit unsigned integer as \bitvar{VMAJ}. -If \bitvar{VMAJ} is not $3$, then stop. -This stream is not decodable according to this specification. -\item -Read an 8-bit unsigned integer as \bitvar{VMIN}. -If \bitvar{VMIN} is not $2$, then stop. -This stream is not decodable according to this specification. -\item -Read an 8-bit unsigned integer as \bitvar{VREV}. -If \bitvar{VREV} is greater than $1$, then this stream -may contain optional features or interpretational changes -documented in a future version of this specification. -Regardless of the value of \bitvar{VREV}, the stream is decodable -according to this specification. -\item -Read a 16-bit unsigned integer as \bitvar{FMBW}. -This MUST be greater than zero. -This specifies the width of the coded frame in macro blocks. -The actual width of the frame in pixels is $\bitvar{FMBW}*16$. -\item -Read a 16-bit unsigned integer as \bitvar{FMBH}. -This MUST be greater than zero. -This specifies the height of the coded frame in macro blocks. -The actual height of the frame in pixels is $\bitvar{FMBH}*16$. -\item -Read a 24-bit unsigned integer as \bitvar{PICW}. -This MUST be no greater than $(\bitvar{FMBW}*16)$. -Note that 24 bits are read, even though only 20 bits are sufficient to specify - any value of the picture width. -This is done to preserve octet alignment in this header, to allow for a - simplified parser implementation. -\item -Read a 24-bit unsigned integer as \bitvar{PICH}. -This MUST be no greater than $(\bitvar{FMBH}*16)$. -Together with \bitvar{PICW}, this specifies the size of the displayable picture - region within the coded frame. -See Figure~\ref{fig:pic-frame}. -Again, 24 bits are read instead of 20. -\item -Read an 8-bit unsigned integer as \bitvar{PICX}. -This MUST be no greater than $(\bitvar{FMBW}*16-\bitvar{PICX})$. -\item -Read an 8-bit unsigned integer as \bitvar{PICY}. -This MUST be no greater than $(\bitvar{FMBH}*16-\bitvar{PICY})$. -Together with \bitvar{PICX}, this specifies the location of the lower-left - corner of the displayable picture region. -See Figure~\ref{fig:pic-frame}. -\item -Read a 32-bit unsigned integer as \bitvar{FRN}. -This MUST be greater than zero. -\item -Read a 32-bit unsigned integer as \bitvar{FRD}. -This MUST be greater than zero. -Theora is a fixed-frame rate video codec. -Frames are sampled at the constant rate of $\frac{\bitvar{FRN}}{\bitvar{FRD}}$ - frames per second. -The presentation time of the first frame is at zero seconds. -No mechanism is provided to specify a non-zero offset for the initial - frame. -\item -Read a 24-bit unsigned integer as \bitvar{PARN}. -\item -Read a 24-bit unsigned integer as \bitvar{PARD}. -Together with \bitvar{PARN}, these specify the aspect ratio of the pixels - within a frame, defined as the ratio of the physical width of a pixel to its - physical height. -This is given by the ratio $\bitvar{PARN}:\bitvar{PARD}$. -If either of these fields are zero, this indicates that pixel aspect ratio - information was not available to the encoder. -In this case it MAY be specified by the application via an external means, or - a default value of $1:1$ MAY be used. -\item -Read an 8-bit unsigned integer as \bitvar{CS}. -This is a value from an enumerated list of the available color spaces, given in - Table~\ref{tab:colorspaces}. -The `Undefined' value indicates that color space information was not available - to the encoder. -It MAY be specified by the application via an external means. -If a reserved value is given, a decoder MAY refuse to decode the stream. -\begin{table}[htbp] -\begin{center} -\begin{tabular*}{215pt}{cl@{\extracolsep{\fill}}c}\toprule -Value & Color Space \\\midrule -$0$ & Undefined. \\ -$1$ & Rec.~470M (see Section~\ref{sec:470m}). \\ -$2$ & Rec.~470BG (see Section~\ref{sec:470bg}). \\ -$3$ & Reserved. \\ -$\vdots$ & \\ -$255$ & \\ -\bottomrule\end{tabular*} -\end{center} -\caption{Enumerated List of Color Spaces} -\label{tab:colorspaces} -\end{table} -\item -Read a 24-bit unsigned integer as \bitvar{NOMBR} signifying a rate in bits -per second. Rates equal to or greater than $2^{24}-1$ bits per second are -represented as $2^{24}-1$. -The \bitvar{NOMBR} field is used only as a hint. -For pure VBR streams, this value may be considerably off. -The field MAY be set to zero to indicate that the encoder did not care to -speculate. -\item -Read a 6-bit unsigned integer as \bitvar{QUAL}. -This value is used to provide a hint as to the relative quality of the stream - when compared to others produced by the same encoder. -Larger values indicate higher quality. -This can be used, for example, to select among several streams containing the - same material encoded with different settings. -\item -Read a 5-bit unsigned integer as \bitvar{KFGSHIFT}. -The \bitvar{KFGSHIFT} is used to partition the granule position associated with - each packet into two different parts. -The frame number of the last key frame, starting from zero, is stored in the - upper $64-\bitvar{KFGSHIFT}$ bits, while the lower \bitvar{KFGSHIFT} bits - contain the number of frames since the last keyframe. -Complete details on the granule position mapping are specified in Section~REF. -\item -Read a 2-bit unsigned integer as \bitvar{PF}. -The \bitvar{PF} field contains a value from an enumerated list of the available - pixel formats, given in Table~\ref{tab:pixel-formats}. -If the reserved value $1$ is given, stop. -This stream is not decodable according to this specification. - -\begin{table}[htbp] -\begin{center} -\begin{tabular*}{215pt}{cl@{\extracolsep{\fill}}c}\toprule -Value & Pixel Format \\\midrule -$0$ & 4:2:0 (see Section~\ref{sec:420}). \\ -$1$ & Reserved. \\ -$2$ & 4:2:2 (see Section~\ref{sec:422}). \\ -$3$ & 4:4:4 (see Section~\ref{sec:444}). \\ -\bottomrule\end{tabular*} -\end{center} -\caption{Enumerated List of Pixel Formats} -\label{tab:pixel-formats} -\end{table} - -\item -Read a 3-bit unsigned integer. -These bits are reserved. -If this value is not zero, then stop. -This stream is not decodable according to this specification. -\item -Assign \bitvar{NSBS} a value according to \bitvar{PF}, as given by - Table~\ref{tab:nsbs-for-pf}. - -\begin{table}[bt] -\begin{center} -\begin{tabular}{cc}\toprule -\bitvar{PF} & \bitvar{NSBS} \\\midrule -$0$ & $\begin{aligned} -&((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)\\ -& +2*((\bitvar{FMBW}+3)//4)*((\bitvar{FMBH}+3)//4) -\end{aligned}$ \\\midrule -$2$ & $\begin{aligned} -&((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)\\ -& +2*((\bitvar{FMBW}+3)//4)*((\bitvar{FMBH}+1)//2) -\end{aligned}$ \\\midrule -$3$ & $3*((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Number of Super Blocks for each Pixel Format} -\label{tab:nsbs-for-pf} -\end{table} - -\item -Assign \bitvar{NBS} a value according to \bitvar{PF}, as given by - Table~\ref{tab:nbs-for-pf}. - -\begin{table}[tb] -\begin{center} -\begin{tabular}{cc}\toprule -\bitvar{PF} & \bitvar{NBS} \\\midrule -$0$ & $6*\bitvar{FMBW}*\bitvar{FMBH}$ \\\midrule -$2$ & $8*\bitvar{FMBW}*\bitvar{FMBH}$ \\\midrule -$3$ & $12*\bitvar{FMBW}*\bitvar{FMBH}$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Number of Blocks for each Pixel Format} -\label{tab:nbs-for-pf} -\end{table} - -\item -Assign \bitvar{NMBS} the value $(\bitvar{FMBW}*\bitvar{FMBH})$. - -\end{enumerate} - -\paragraph{VP3 Compatibility} - -VP3 does not correctly handle frame sizes that are not a multiple of 16. -Thus, \bitvar{PICW} and \bitvar{PICH} should be set to the frame width and - height in pixels, respectively, and \bitvar{PICX} and \bitvar{PICY} should be - set to zero. -VP3 headers do not specify a color space. -VP3 only supports the 4:2:0 pixel format. - -\section{Comment Header} -\label{sec:commentheader} - -The Theora comment header is the second of three header packets that begin a - Theora stream. -It is meant for short text comments, not aribtrary metadata; arbitrary metadata - belongs in a separate logical stream that provides greater structure and - machine parseability. - -%r: I tried to morph this a little more in the direction of our -% application space -The comment field is meant to be used much like someone jotting a quick note on - the label of a video. -It should be a little information to remember the disc or tape by and explain it to - others; a short, to-the-point text note that can be more than a couple words, - but isn't going to be more than a short paragraph. -The essentials, in other words, whatever they turn out to be, e.g.: - -%TODO: Example - -The comment header is stored as a logical list of eight-bit clean vectors; the - number of vectors is bounded at $2^{32}-1$ and the length of each vector is - limited to $2^{32}-1$ bytes. -The vector length is encoded; the vector contents themselves are not null - terminated. -In addition to the vector list, there is a single vector for a vendor name, - also eight-bit clean with a length encoded in 32 bits. -%TODO: The 1.0 release of libtheora sets the vendor string to ... - -\subsection{Comment Length Decode} -\label{sub:comment-len} - -\begin{figure} -\begin{center} -\begin{tabular}{ | c | c | } - \hline - 4 byte length & - UTF-8 encoded string ...\\ - \hline -\end{tabular} -\end{center} -\caption{Length encoded string layout} -\label{fig:comment-len} -\end{figure} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{LEN} & Integer & 32 & No & A single 32-bit length value. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{LEN0} & Integer & 8 & No & The first octet of the string length. \\ -\locvar{LEN1} & Integer & 8 & No & The second octet of the string length. \\ -\locvar{LEN2} & Integer & 8 & No & The third octet of the string length. \\ -\locvar{LEN3} & Integer & 8 & No & The fourth octet of the string - length. \\ -\bottomrule\end{tabularx} -\medskip - -A single comment vector is decoded as follows: - -\begin{enumerate} -\item -Read an 8-bit unsigned integer as \locvar{LEN0}. -\item -Read an 8-bit unsigned integer as \locvar{LEN1}. -\item -Read an 8-bit unsigned integer as \locvar{LEN2}. -\item -Read an 8-bit unsigned integer as \locvar{LEN3}. -\item -Assign \bitvar{LEN} the value $(\locvar{LEN0}+(\locvar{LEN1}<<8)+ - (\locvar{LEN2}<<16)+(\locvar{LEN3}<<24))$. -This construction is used so that on platforms with 8-bit bytes, the memory - organization of the comment header is identical with that of Vorbis I, - allowing for common parsing code despite the different bit packing - conventions. -\end{enumerate} - -\subsection{Comment Header Decode} - -\begin{figure} -\begin{center} -\begin{tabular}{ | c | } - \hline - vendor string \\ \hline - number of comments \\ \hline - comment string \\ \hline - comment string \\ \hline - ... \\ - \hline -\end{tabular} -\end{center} -\caption{Comment Header Layout} -\label{fig:commentheader} -\end{figure} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{VENDOR} & \multicolumn{3}{l}{String} & The vendor string. \\ -\bitvar{NCOMMENTS} & Integer & 32 & No & The number of user - comments. \\ -\bitvar{COMMENTS} & \multicolumn{3}{l}{String Array} & A list of - \bitvar{NCOMMENTS} user comment values. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\ci} & Integer & 32 & No & The index of the current user - comment. \\ -\bottomrule\end{tabularx} -\medskip - -The complete comment header is decoded as follows: - -\begin{enumerate} -\item -Decode the common header fields according to the procedure described in - Section~\ref{sub:common-header}. -If \bitvar{HEADERTYPE} returned by this procedure is not \hex{81}, then stop. -This packet is not the comment header. -\item -Decode the length of the vendor string using the procedure given in - Section~\ref{sub:comment-len} into \bitvar{LEN}. -\item -Read \bitvar{LEN} 8-bit unsigned integers. -\item -Set the string \bitvar{VENDOR} to the contents of these octets. -\item -Decode the number of user comments using the procedure given in - Section~\ref{sub:comment-len} into \bitvar{LEN}. -\item -Assign \bitvar{NCOMMENTS} the value stored in \bitvar{LEN}. -\item -For each consecutive value of \locvar{\ci} from $0$ to - $(\bitvar{NCOMMENTS}-1)$, inclusive: -\begin{enumerate} -\item -Decode the length of the current user comment using the procedure given in - Section~\ref{sub:comment-len} into \bitvar{LEN}. -\item -Read \bitvar{LEN} 8-bit unsigned integers. -\item -Set the string $\bitvar{COMMENTS}[\locvar{\ci}]$ to the contents of these - octets. -\end{enumerate} -\end{enumerate} - -The comment header comprises the entirety of the second header packet. -Unlike the first header packet, it is not generally the only packet on the - second page and may span multiple pages. -The length of the comment header packet is (practically) unbounded. -The comment header packet is not optional; it must be present in the stream - even if it is logically empty. - -%TODO: \paragraph{VP3 Compatibility} - -\subsection{User Comment Format} - -The user comment vectors are structured similarly to a UNIX environment - variable. -That is, comment fields consist of a field name and a corresponding value and - look like: -\begin{center} -\begin{tabular}{rcl} -$\bitvar{COMMENTS}[0]$ & = & ``TITLE=the look of Theora" \\ -$\bitvar{COMMENTS}[1]$ & = & ``DIRECTOR=me" -\end{tabular} -\end{center} - -The field name is case-insensitive and MUST consist of ASCII characters - \hex{20} through \hex{7D}, \hex{3D} (`=') excluded. -ASCII \hex{41} through \hex{5A} inclusive (characters `A'--`Z') are to be - considered equivalent to ASCII \hex{61} through \hex{7A} inclusive - (characters `a'--`z'). -An entirely empty field name---one that is zero characters long---is not - disallowed. - -The field name is immediately followed by ASCII \hex{3D} (`='); this equals - sign is used to terminate the field name. - -The data immediately after \hex{3D} until the end of the vector is the eight-bit - clean value of the field contents encoded as a UTF-8 string~\cite{rfc2044}. - -Field names MUST NOT be `internationalized'; this is a concession to - simplicity, not an attempt to exclude the majority of the world that doesn't - speak English. -Applications MAY wish to present internationalized versions of the standard - field names listed below to the user, but they are not to be stored in the - bitstream. -Field {\em contents}, however, use the UTF-8 character encoding to allow easy - representation of any language. - -Individual `vendors' MAY use non-standard field names within reason. -The proper use of comment fields as human-readable notes has already been - explained. -Abuse will be discouraged. - -There is no vendor-specific prefix to `non-standard' field names. -Vendors SHOULD make some effort to avoid arbitrarily polluting the common - namespace. -%"and other bodies"? -%If you're going to be that vague, you might as well not say anything at all. -Xiph.org and other bodies will generally collect and rationalize the more - useful tags to help with standardization. - -Field names are not restricted to occur only once within a comment header. -%TODO: Example - -\paragraph{Field Names} - -%r should this be an appendix? - -Below is a proposed, minimal list of standard field names with a description of - their intended use. -No field names are mandatory; a comment header may contain one or more, all, or - none of the names in this list. - -\begin{description} -\item{TITLE:} Video name. -\item{ARTIST:} Filmmaker or other creator name. -\item{VERSION:} Subtitle, remix info, or other text distinguishing - versions of a video. -\item{DATE:} Date associated with the video. Implementations SHOULD attempt - to parse this field as an ISO 8601 date for machine interpretation and - conversion. -\item{LOCATION:} Location associated with the video. This is usually the - filming location for non-fiction works. -\item{COPYRIGHT:} Copyright statement. -\item{LICENSE:} Copyright and other licensing information. - Implementations wishing to do automatic parsing of e.g - of distribution terms SHOULD look here for a URL uniquely defining - the license. If no instance of this field is present, or if no - instance contains a parseable URL, and implementation MAY look - in the COPYRIGHT field for such a URL. -\item{ORGANIZATION:} Studio name, Publisher, or other organization - involved in the creation of the video. - -\item{DIRECTOR:} Director or Filmmaker credit, similar to ARTIST. -\item{PRODUCER:} Producer credit for the video. -\item{COMPOSER:} Music credit for the video. -\item{ACTOR:} Acting credit for the video. - -\item{TAG:} subject or category tag, keyword, or other content - classification labels. The value of each instance of this - field SHOULD be treated as a single label, with multiple - instances of the field for multiple tags. The value of - a single field SHOULD NOT be parsed into multiple tags - based on some internal delimeter. -\item{DESCRIPTION:} General description, summary, or blurb. -\end{description} - -\section{Setup Header} -\label{sec:setupheader} - -The Theora setup header contains the limit values used to drive the loop - filter, the base matrices and scale values used to build the dequantization - tables, and the Huffman tables used to unpack the DCT tokens. -Because the contents of this header are specific to Theora, no concessions have - been made to keep the fields octet-aligned for easy parsing. - -\begin{figure} -\begin{center} -\begin{tabular}{ | c | } - \hline - common header block \\ \hline - loop filter table resolution \\ \hline - loop filter table \\ \hline - scale table resolution \\ \hline - AC scale table \\ \hline - DC scale table \\ \hline - number of base matricies \\ \hline - base quatization matricies \\ \hline - ... \\ \hline - quant range interpolation table \\ \hline - DCT token Huffman tables \\ - \hline -\end{tabular} -\end{center} -\caption{Setup Header structure} -\label{fig:setupheader} -\end{figure} - -\subsection{Loop Filter Limit Table Decode} -\label{sub:loop-filter-limits} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} & - 7 & No & A 64-element array of loop filter limit - values. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\qi} & Integer & 6 & No & The quantization index. \\ -\locvar{NBITS} & Integer & 3 & No & The size of values being read in the - current table. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure decodes the table of loop filter limit values used to drive the - loop filter, which is described in Section~\ref{sub:loop-filter-limits}. -It is decoded as follows: - -\begin{enumerate} -\item -Read a 3-bit unsigned integer as \locvar{NBITS}. -\item -For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive: -\begin{enumerate} -\item -Read an \locvar{NBITS}-bit unsigned integer as $\bitvar{LFLIMS}[\locvar{\qi}]$. -\end{enumerate} -\end{enumerate} - -\paragraph{VP3 Compatibility} - -The loop filter limit values are hardcoded in VP3. -The values used are given in Appendix~\ref{app:vp3-loop-filter-limits}. - -\subsection{Quantization Parameters Decode} -\label{sub:quant-params} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - AC coefficients for each \qi\ value. \\ -\bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - the DC coefficient for each \qi\ value. \\ -\bitvar{NBMS} & Integer & 10 & No & The number of base matrices. \\ -\bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 8 & No & A $\bitvar{NBMS}\times 64$ array - containing the base matrices. \\ -\bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 6 & No & A $2\times 3$ array containing the - number of quant ranges for a given \qti\ and \pli, respectively. -This is at most $63$. \\ -\bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 6 & No & A $2\times 3\times 63$ array of the - sizes of each quant range for a given \qti\ and \pli, respectively. -Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\ -\bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 9 & No & A $2\times 3\times 64$ array of the - \bmi's used for each quant range for a given \qti\ and \pli, respectively. -Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\qti} & Integer & 1 & No & A quantization type index. -See Table~\ref{tab:quant-types}.\\ -\locvar{\qtj} & Integer & 1 & No & A quantization type index. \\ -\locvar{\pli} & Integer & 2 & No & A color plane index. -See Table~\ref{tab:color-planes}.\\ -\locvar{\plj} & Integer & 2 & No & A color plane index. \\ -\locvar{\qi} & Integer & 6 & No & The quantization index. \\ -\locvar{\ci} & Integer & 6 & No & The DCT coefficient index. \\ -\locvar{\bmi} & Integer & 9 & No & The base matrix index. \\ -\locvar{\qri} & Integer & 6 & No & The quant range index. \\ -\locvar{NBITS} & Integer & 5 & No & The size of fields to read. \\ -\locvar{NEWQR} & Integer & 1 & No & Flag that indicates a new set of quant - ranges will be defined. \\ -\locvar{RPQR} & Integer & 1 & No & Flag that indicates the quant ranges to - copy will come from the same color plane. \\ -\bottomrule\end{tabularx} -\medskip - -The AC scale and DC scale values are defined in two simple tables with 64 - values each, one for each \qi\ value. -The same scale values are used for every quantization type and color plane. - -The base matrices for all quantization types and color planes are stored in a - single table. -These are then referenced by index in several sets of \term{quant ranges}. -The purpose of the quant ranges is to specify which base matrices are used for - which \qi\ values. - -A set of quant ranges is defined for each quantization type and color plane. -To save space in the header, bit flags allow a set of quant ranges to be copied - from a previously defined set instead of being specified explicitly. -Every set except the first one can be copied from the immediately preceding - set. -Similarly, if the quantization type is not $0$, the set can be copied from the - set defined for the same color plane for the preceding quantization type. -This formulation allows compact representation of, for example, the same - set of quant ranges in both chroma channels, as is done in the original VP3, - or the same set of quant ranges in INTRA and INTER modes. - -Each quant range is defined by a size and two base matrix indices, one for each - end of the range. -The base matrix for the end of one range is used as the start of the next - range, so that for $n$ ranges, $n+1$ base matrices are specified. -The base matrices for the \qi\ values between the two endpoints of the range - are generated by linear interpolation. - -%TODO: figure - -The location of the endpoints of each range is encoded by their size. -The \qi\ value for the left end-point is the sum of the sizes of all preceding - ranges, and the \qi\ value for the right end-point adds the size of the - current range. -Thus the sum of the sizes of all the ranges MUST be 63, so that the last range - falls on the last possible \qi\ value. - -The complete set of quantization parameters are decoded as follows: - -\begin{enumerate} -\item -Read a 4-bit unsigned integer. -Assign \locvar{NBITS} the value read, plus one. -\item -For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive: -\begin{enumerate} -\item -Read an \locvar{NBITS}-bit unsigned integer as - $\bitvar{ACSCALE}[\locvar{\qi}]$. -\end{enumerate} -\item -Read a 4-bit unsigned integer. -Assign \locvar{NBITS} the value read, plus one. -\item -For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive: -\begin{enumerate} -\item -Read an \locvar{NBITS}-bit unsigned integer as - $\bitvar{DCSCALE}[\locvar{\qi}]$. -\end{enumerate} -\item -Read a 9-bit unsigned integer. -Assign \bitvar{NBMS} the value decoded, plus one. -\bitvar{NBMS} MUST be no greater than 384. -\item -For each consecutive value of \locvar{\bmi} from $0$ to $(\bitvar{NBMS}-1)$, - inclusive: -\begin{enumerate} -\item -For each consecutive value of \locvar{\ci} from $0$ to $63$, inclusive: -\begin{enumerate} -\item -Read an 8-bit unsigned integer as $\bitvar{BMS}[\locvar{\bmi}][\locvar{\ci}]$. -\end{enumerate} -\end{enumerate} -\item -For each consecutive value of \locvar{\qti} from $0$ to $1$, inclusive: -\begin{enumerate} -\item -For each consecutive value of \locvar{\pli} from $0$ to $2$, inclusive: -\begin{enumerate} -\item -If $\locvar{\qti}>0$ or $\locvar{\pli}>0$, read a 1-bit unsigned integer as - \locvar{NEWQR}. -\item -Else, assign \locvar{NEWQR} the value one. -\item -If \locvar{NEWQR} is zero, then we are copying a previously defined set of - quant ranges. -In that case: -\begin{enumerate} -\item -If $\locvar{\qti}>0$, read a 1-bit unsigned integer as \locvar{RPQR}. -\item -Else, assign \locvar{RPQR} the value zero. -\item -If \locvar{RPQR} is one, assign \locvar{\qtj} the value $(\locvar{\qti}-1)$ - and assign \locvar{\plj} the value \locvar{\pli}. -This selects the set of quant ranges defined for the same color plane as this - one, but for the previous quantization type. -\item -Else assign \locvar{\qtj} the value $(3*\locvar{\qti}+\locvar{\pli}-1)//3$ and - assign \locvar{\plj} the value $(\locvar{\pli}+2)\%3$. -This selects the most recent set of quant ranges defined. -\item -Assign $\bitvar{NQRS}[\locvar{\qti}][\locvar{\pli}]$ the value - $\bitvar{NQRS}[\locvar{\qtj}][\locvar{\plj}]$. -\item -Assign $\bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}]$ the values in - $\bitvar{QRSIZES}[\locvar{\qtj}][\locvar{\plj}]$. -\item -Assign $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}]$ the values in - $\bitvar{QRBMIS}[\locvar{\qtj}][\locvar{\plj}]$. -\end{enumerate} -\item -Else, \locvar{NEWQR} is one, which indicates that we are defining a new set of - quant ranges. -In that case: -\begin{enumerate} -\item -Assign $\locvar{\qri}$ the value zero. -\item -Assign $\locvar{\qi}$ the value zero. -\item -Read an $\ilog(\bitvar{NBMS}-1)$-bit unsigned integer as\\ - $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$. -If this is greater than or equal to \bitvar{NBMS}, stop. -The stream is undecodable. -\item -\label{step:qr-loop} -Read an $\ilog(62-\locvar{\qi})$-bit unsigned integer. -Assign\\ $\bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$ the value - read, plus one. -\item -Assign \locvar{\qi} the value $\locvar{\qi}+ - \bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$. -\item -Assign \locvar{\qri} the value $\locvar{\qri}+1$. -\item -Read an $\ilog(\bitvar{NBMS}-1)$-bit unsigned integer as\\ - $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$. -\item -If \locvar{\qi} is less than 63, go back to step~\ref{step:qr-loop}. -\item -If \locvar{\qi} is greater than 63, stop. -The stream is undecodable. -\item -Assign $\bitvar{NQRS}[\locvar{\qti}][\locvar{\pli}]$ the value \locvar{\qri}. -\end{enumerate} -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\paragraph{VP3 Compatibility} - -The quantization parameters are hardcoded in VP3. -The values used are given in Appendix~\ref{app:vp3-quant-params}. - -\subsection{Computing a Quantization Matrix} -\label{sub:quant-mat} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - AC coefficients for each \qi\ value. \\ -\bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - the DC coefficient for each \qi\ value. \\ -\bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 8 & No & A $\bitvar{NBMS}\times 64$ array - containing the base matrices. \\ -\bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 6 & No & A $2\times 3$ array containing the - number of quant ranges for a given \qti\ and \pli, respectively. -This is at most $63$. \\ -\bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 6 & No & A $2\times 3\times 63$ array of the - sizes of each quant range for a given \qti\ and \pli, respectively. -Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\ -\bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 9 & No & A $2\times 3\times 64$ array of the - \bmi's used for each quant range for a given \qti\ and \pli, respectively. -Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\ -\bitvar{\qti} & Integer & 1 & No & A quantization type index. -See Table~\ref{tab:quant-types}.\\ -\bitvar{\pli} & Integer & 2 & No & A color plane index. -See Table~\ref{tab:color-planes}.\\ -\bitvar{\qi} & Integer & 6 & No & The quantization index. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of quantization - values for each DCT coefficient in natural order. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\ci} & Integer & 6 & No & The DCT coefficient index. \\ -\locvar{\bmi} & Integer & 9 & No & The base matrix index. \\ -\locvar{\bmj} & Integer & 9 & No & The base matrix index. \\ -\locvar{\qri} & Integer & 6 & No & The quant range index. \\ -\locvar{QISTART} & Integer & 6 & No & The left end-point of the \qi\ range. \\ -\locvar{QIEND } & Integer & 6 & No & The right end-point of the \qi\ range. \\ -\locvar{BM} & \multicolumn{1}{p{40pt}}{Integer array} & - 8 & No & A 64-element array containing the - interpolated base matrix. \\ -\locvar{QMIN} & Integer & 16 & No & The minimum quantization value allowed - for the current coefficient. \\ -\locvar{QSCALE} & Integer & 16 & No & The current scale value. \\ -\bottomrule\end{tabularx} -\medskip - -The following procedure can be used to generate a single quantization matrix - for a given quantization type, color plane, and \qi\ value, given the - quantization parameters decoded in Section~\ref{sub:quant-params}. - -Note that the product of the scale value and the base matrix value is in units - of $100$ths of a pixel value, and thus is divided by $100$ to return it to - units of a single pixel value. -This value is then scaled by four, to match the scaling of the DCT output, - which is also a factor of four larger than the orthonormal version of the - transform. - -\begin{enumerate} -\item -Assign \locvar{\qri} the index of a quant range such that -\begin{displaymath} -\bitvar{\qi} \ge \sum_{\qrj=0}^{\locvar{\qri}-1} - \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj], -\end{displaymath} - and -\begin{displaymath} -\bitvar{\qi} \le \sum_{\qrj=0}^{\locvar{\qri}} - \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj], -\end{displaymath} - where summation from $0$ to $-1$ is defined to be zero. -If there is more than one such value of $\locvar{\qri}$, i.e., if \bitvar{\qi} - lies on the boundary between two quant ranges, then the output will be the - same regardless of which one is chosen. -\item -Assign \locvar{QISTART} the value -\begin{displaymath} -\sum_{\qrj=0}^{\qri-1} \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj]. -\end{displaymath} -\item -Assign \locvar{QIEND} the value -\begin{displaymath} -\sum_{\qrj=0}^{\qri} \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj]. -\end{displaymath} -\item -Assign \locvar{\bmi} the value - $\bitvar{QRBMIS}[\bitvar{\qti}][\bitvar{\pli}][\qri]$. -\item -Assign \locvar{\bmj} the value - $\bitvar{QRBMIS}[\bitvar{\qti}][\bitvar{\pli}][\qri+1]$. -\item -For each consecutive value of \locvar{\ci} from $0$ to $63$, inclusive: -\begin{enumerate} -\item -Assign $\locvar{BM}[\locvar{\ci}]$ the value -\begin{displaymath} -\begin{split} -(&2*(\locvar{QIEND}-\bitvar{\qi})*\bitvar{BMS}[\locvar{\bmi}][\locvar{\ci}]\\ - &+2*(\bitvar{\qi}- - \locvar{QISTART})*\bitvar{BMS}[\locvar{\bmj}][\locvar{\ci}]\\ - &+\bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\locvar{\qri}])// - (2*\bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\locvar{\qri}]) -\end{split} -\end{displaymath} -\item -Assign \locvar{QMIN} the value given by Table~\ref{tab:qmin} according to - \bitvar{\qti} and \locvar{\ci}. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{clr}\toprule -Coefficient & \multicolumn{1}{c}{\bitvar{\qti}} - & \locvar{QMIN} \\\midrule -$\locvar{\ci}=0$ & $0$ (Intra) & $16$ \\ -$\locvar{\ci}>0$ & $0$ (Intra) & $8$ \\ -$\locvar{\ci}=0$ & $1$ (Inter) & $32$ \\ -$\locvar{\ci}>0$ & $1$ (Inter) & $16$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Minimum Quantization Values} -\label{tab:qmin} -\end{table} - -\item -If \locvar{\ci} equals zero, assign $\locvar{QSCALE}$ the value - $\bitvar{DCSCALE}[\bitvar{\qi}]$. -\item -Else, assign $\locvar{QSCALE}$ the value - $\bitvar{ACSCALE}[\bitvar{\qi}]$. -\item -Assign $\bitvar{QMAT}[\locvar{\ci}]$ the value -\begin{displaymath} -\max(\locvar{QMIN}, - \min((\locvar{QSCALE}*\locvar{BM}[\locvar{\ci}]//100)*4,4096)). -\end{displaymath} -\end{enumerate} -\end{enumerate} - -\subsection{DCT Token Huffman Tables} -\label{sub:huffman-tables} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{HTS} & \multicolumn{3}{l}{Huffman table array} - & An 80-element array of Huffman tables - with up to 32 entries each. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{HBITS} & Bit string & 32 & No & A string of up to 32 bits. \\ -\locvar{TOKEN} & Integer & 5 & No & A single DCT token value. \\ -\locvar{ISLEAF} & Integer & 1 & No & Flag that indicates if the current - node of the tree being decoded is a leaf node. \\ -\bottomrule\end{tabularx} -\medskip - -The Huffman tables used to decode DCT tokens are stored in the setup header in - the form of a binary tree. -This enforces the requirements that the code be full---so that any sequence of - bits will produce a valid sequence of tokens---and that the code be - prefix-free so that there is no ambiguity when decoding. - -One more restriction is placed on the tables that is not explicitly enforced by - the bitstream syntax, but nevertheless must be obeyed by compliant encoders. -There must be no more than 32 entries in a single table. -Note that this restriction along with the fullness requirement limit the - maximum size of a single Huffman code to 32 bits. -It is probably a good idea to enforce this latter consequence explicitly when - implementing the decoding procedure as a recursive algorithm, so as to prevent - a possible stack overflow given an invalid bitstream. - -Although there are 32 different DCT tokens, and thus a normal table will have - exactly 32 entries, this is not explicitly required. -It is allowable to use a Huffman code that omits some---but not all---of the - possible token values. -It is also allowable, if not particularly useful, to specify multiple codes for - the same token value in a single table. -Note also that token values may appear in the tree in any order. -In particular, it is not safe to assume that token value zero (which ends a - single block), has a Huffman code of all zeros. - -The tree is decoded as follows: - -\begin{enumerate} -\item -For each consecutive value of \locvar{\hti} from $0$ to $79$, inclusive: -\begin{enumerate} -\item -Set \locvar{HBITS} to the empty string. -\item -\label{step:huff-tree-loop} -If \locvar{HBITS} is longer than 32 bits in length, stop. -The stream is undecodable. -\item -Read a 1-bit unsigned integer as \locvar{ISLEAF}. -\item -If \locvar{ISLEAF} is one: -\begin{enumerate} -\item -If the number of entries in table $\bitvar{HTS}[\locvar{\hti}]$ is already 32, - stop. -The stream is undecodable. -\item -Read a 5-bit unsigned integer as \locvar{TOKEN}. -\item -Add the pair $(\locvar{HBITS},\locvar{TOKEN})$ to Huffman table - $\bitvar{HTS}[\locvar{\hti}]$. -\end{enumerate} -\item -Otherwise: -\begin{enumerate} -\item -Add a `0' to the end of \locvar{HBITS}. -\item -Decode the `0' sub-tree using this procedure, starting from - step~\ref{step:huff-tree-loop}. -\item -Remove the `0' from the end of \locvar{HBITS} and add a `1' to the end of - \locvar{HBITS}. -\item -Decode the `1' sub-tree using this procedure, starting from - step~\ref{step:huff-tree-loop}. -\item -Remove the `1' from the end of \locvar{HBITS}. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\paragraph{VP3 Compatibility} - -The DCT token Huffman tables are hardcoded in VP3. -The values used are given in Appendix~\ref{app:vp3-huffman-tables}. - -\subsection{Setup Header Decode} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} & - 7 & No & A 64-element array of loop filter limit - values. \\ -\bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - AC coefficients for each \qi\ value. \\ -\bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - the DC coefficient for each \qi\ value. \\ -\bitvar{NBMS} & Integer & 10 & No & The number of base matrices. \\ -\bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 8 & No & A $\bitvar{NBMS}\times 64$ array - containing the base matrices. \\ -\bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 6 & No & A $2\times 3$ array containing the - number of quant ranges for a given \qti\ and \pli, respectively. -This is at most $63$. \\ -\bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 6 & No & A $2\times 3\times 63$ array of the - sizes of each quant range for a given \qti\ and \pli, respectively. -Only the first $\bitvar{NQRS}[\qti][\pli]$ values will be used. \\ -\bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 9 & No & A $2\times 3\times 64$ array of the - \bmi's used for each quant range for a given \qti\ and \pli, respectively. -Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values will be used. \\ -\bitvar{HTS} & \multicolumn{3}{l}{Huffman table array} - & An 80-element array of Huffman tables - with up to 32 entries each. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:} None. -\medskip - -The complete setup header is decoded as follows: - -\begin{enumerate} -\item -Decode the common header fields according to the procedure described in - Section~\ref{sub:common-header}. -If \bitvar{HEADERTYPE} returned by this procedure is not \hex{82}, then stop. -This packet is not the setup header. -\item -Decode the loop filter limit value table using the procedure given in - Section~\ref{sub:loop-filter-limits} into \bitvar{LFLIMS}. -\item -Decode the quantization parameters using the procedure given in - Section~\ref{sub:quant-params}. -The results are stored in \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{NBMS}, - \bitvar{BMS}, \bitvar{NQRS}, \bitvar{QRSIZES}, and \bitvar{QRBMIS}. -\item -Decode the DCT token Huffman tables using the procedure given in - Section~\ref{sub:huffman-tables} into \bitvar{HTS}. -\end{enumerate} - -\chapter{Frame Decode} - -This section describes the complete procedure necessary to decode a single - frame. -This begins with the frame header, followed by coded block flags, macro block - modes, motion vectors, block-level \qi\ values, and finally the DCT residual - tokens, which are used to reconstruct the frame. - -\section{Frame Header Decode} -\label{sub:frame-header} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{FTYPE} & Integer & 1 & No & The frame type. \\ -\bitvar{NQIS} & Integer & 2 & No & The number of \qi\ values. \\ -\bitvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} & - 6 & No & An \bitvar{NQIS}-element array of - \qi\ values. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{MOREQIS} & Integer & 1 & No & A flag indicating there are more - \qi\ values to be decoded. \\ -\bottomrule\end{tabularx} -\medskip - -The frame header selects which type of frame is being decoded, intra or inter, - and contains the list of \qi\ values that will be used in this frame. -The first \qi\ value will be used for {\em all} DC coefficients in all blocks. -This is done to ensure that DC prediction, which is done in the quantized - domain, works as expected. -The AC coefficients, however, can be dequantized using any \qi\ value on the - list, selected on a block-by-block basis. - -\begin{enumerate} -\item -Read a 1-bit unsigned integer. -If the value read is not zero, stop. -This is not a data packet. -\item -Read a 1-bit unsigned integer as \bitvar{FTYPE}. -This is the type of frame being decoded, as given in - Table~\ref{tab:frame-type}. -If this is the first frame being decoded, this MUST be zero. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{cl}\toprule -\bitvar{FTYPE} & Frame Type \\\midrule -$0$ & Intra frame \\ -$1$ & Inter frame \\ -\bottomrule\end{tabular} -\end{center} -\caption{Frame Type Values} -\label{tab:frame-type} -\end{table} - -\item -Read in a 6-bit unsigned integer as $\bitvar{QIS}[0]$. -\item -Read a 1-bit unsigned integer as \locvar{MOREQIS}. -\item -If \locvar{MOREQIS} is zero, set \bitvar{NQIS} to 1. -\item -Otherwise: -\begin{enumerate} -\item -Read in a 6-bit unsigned integer as $\bitvar{QIS}[1]$. -\item -Read a 1-bit unsigned integer as \locvar{MOREQIS}. -\item -If \locvar{MOREQIS} is zero, set \bitvar{NQIS} to 2. -\item -Otherwise: -\begin{enumerate} -\item -Read in a 6-bit unsigned integer as $\bitvar{QIS}[2]$. -\item -Set \bitvar{NQIS} to 3. -\end{enumerate} -\end{enumerate} -\item -If \bitvar{FTYPE} is 0, read a 3-bit unsigned integer. -These bits are reserved. -If this value is not zero, stop. -This frame is not decodable according to this specification. -\end{enumerate} - -\paragraph{VP3 Compatibility} - -The precise format of the frame header is substantially different in Theora - than in VP3. -The original VP3 format includes a larger number of unused, reserved bits that - are required to be zero. -The original VP3 frame header also can contain only a single \qi\ value, - because VP3 does not support block-level \qi\ values and uses the same - \qi\ value for all the coefficients in a frame. - -\section{Run-Length Encoded Bit Strings} - -Two variations of run-length encoding are used to store sequences of bits for - the block coded flags and the block-level \qi\ values. -The procedures to decode these bit sequences are specified in the following two - sections. - -\subsection{Long-Run Bit String Decode} -\label{sub:long-run} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{NBITS} & Integer & 36 & No & The number of bits to decode. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{BITS} & Bit string & & & The decoded bits. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{LEN} & Integer & 36 & No & The number of bits decoded so far. \\ -\locvar{BIT} & Integer & 1 & No & The value associated with the current - run. \\ -\locvar{RLEN} & Integer & 13 & No & The length of the current run. \\ -\locvar{RBITS} & Integer & 4 & No & The number of extra bits needed to - decode the run length. \\ -\locvar{RSTART} & Integer & 6 & No & The start of the possible run-length - values for a given Huffman code. \\ -\locvar{ROFFS} & Integer & 12 & No & The offset from \locvar{RSTART} of the - run-length. \\ -\bottomrule\end{tabularx} -\medskip - -There is no practical limit to the number of consecutive 0's and 1's that can - be decoded with this procedure. -In reality, the run length is limited by the number of blocks in a single - frame, because more will never be requested. -A separate procedure described in Section~\ref{sub:short-run} is used when - there is a known limit on the maximum size of the runs. - -For the first run, a single bit value is read, and then a Huffman-coded - representation of a run length is decoded, and that many copies of the bit - value are appended to the bit string. -For each consecutive run, the value of the bit is toggled instead of being read - from the bitstream. - -The only exception is if the length of the previous run was 4129, the maximum - possible length encodable by the Huffman-coded representation. -In this case another bit value is read from the stream, to allow for - consecutive runs of 0's or 1's longer than this maximum. - -Note that in both cases---for the first run and after a run of length 4129---if - no more bits are needed, then no bit value is read. - -The complete decoding procedure is as follows: - -\begin{enumerate} -\item -Assign \locvar{LEN} the value 0. -\item -Assign \bitvar{BITS} the empty string. -\item -If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string - \bitvar{BITS}. -\item -Read a 1-bit unsigned integer as \locvar{BIT}. -\item -\label{step:long-run-loop} -Read a bit at a time until one of the Huffman codes given in - Table~\ref{tab:long-run} is recognized. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{lrrl}\toprule -Huffman Code & \locvar{RSTART} & \locvar{RBITS} & Run Lengths \\\midrule -\bin{0} & $1$ & $0$ & $1$ \\ -\bin{10} & $2$ & $1$ & $2\ldots 3$ \\ -\bin{110} & $4$ & $1$ & $4\ldots 5$ \\ -\bin{1110} & $6$ & $2$ & $6\ldots 9$ \\ -\bin{11110} & $10$ & $3$ & $10\ldots 17$ \\ -\bin{111110} & $18$ & $4$ & $18\ldots 33$ \\ -\bin{111111} & $34$ & $12$ & $34\ldots 4129$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Huffman Codes for Long Run Lengths} -\label{tab:long-run} -\end{table} - -\item -Assign \locvar{RSTART} and \locvar{RBITS} the values given in - Table~\ref{tab:long-run} according to the Huffman code read. -\item -Read an \locvar{RBITS}-bit unsigned integer as \locvar{ROFFS}. -\item -Assign \locvar{RLEN} the value $(\locvar{RSTART}+\locvar{ROFFS})$. -\item -Append \locvar{RLEN} copies of \locvar{BIT} to \bitvar{BITS}. -\item -Add \locvar{RLEN} to the value \locvar{LEN}. -\locvar{LEN} MUST be less than or equal to \bitvar{NBITS}. -\item -If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string - \bitvar{BITS}. -\item -If \locvar{RLEN} equals 4129, read a 1-bit unsigned integer as \locvar{BIT}. -\item -Otherwise, assign \locvar{BIT} the value $(1-\locvar{BIT})$. -\item -Continue decoding runs from step~\ref{step:long-run-loop}. -\end{enumerate} - -\paragraph{VP3 Compatibility} - -VP3 does not read a new bit value after decoding a run length of 4129. -This limits the maximum number of consecutive 0's or 1's to 4129 in - VP3-compatible streams. -For reasonable video sizes of $1920\times 1080$ or less in 4:2:0 format---the - only pixel format VP3 supports---this does not pose any problems because runs - longer than 4129 are not needed. - -\subsection{Short-Run Bit String Decode} -\label{sub:short-run} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{NBITS} & Integer & 36 & No & The number of bits to decode. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{BITS} & Bit string & & & The decoded bits. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{LEN} & Integer & 36 & No & The number of bits decoded so far. \\ -\locvar{BIT} & Integer & 1 & No & The value associated with the current - run. \\ -\locvar{RLEN} & Integer & 13 & No & The length of the current run. \\ -\locvar{RBITS} & Integer & 4 & No & The number of extra bits needed to - decode the run length. \\ -\locvar{RSTART} & Integer & 6 & No & The start of the possible run-length - values for a given Huffman code. \\ -\locvar{ROFFS} & Integer & 12 & No & The offset from \locvar{RSTART} of the - run-length. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure is similar to the procedure outlined in - Section~\ref{sub:long-run}, except that the maximum number of consecutive 0's - or 1's is limited to 30. -This is the maximum run length needed when encoding a bit for each of the 16 - blocks in a super block when it is known that not all the bits in a super - block are the same. - -The complete decoding procedure is as follows: - -\begin{enumerate} -\item -Assign \locvar{LEN} the value 0. -\item -Assign \bitvar{BITS} the empty string. -\item -If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string - \bitvar{BITS}. -\item -Read a 1-bit unsigned integer as \locvar{BIT}. -\item -\label{step:short-run-loop} -Read a bit at a time until one of the Huffman codes given in - Table~\ref{tab:short-run} is recognized. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{lrrl}\toprule -Huffman Code & \locvar{RSTART} & \locvar{RBITS} & Run Lengths \\\midrule -\bin{0} & $1$ & $1$ & $1\ldots 2$ \\ -\bin{10} & $3$ & $1$ & $3\ldots 4$ \\ -\bin{110} & $5$ & $1$ & $5\ldots 6$ \\ -\bin{1110} & $7$ & $2$ & $7\ldots 10$ \\ -\bin{11110} & $11$ & $2$ & $11\ldots 14$ \\ -\bin{11111} & $15$ & $4$ & $15\ldots 30$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Huffman Codes for Short Run Lengths} -\label{tab:short-run} -\end{table} - -\item -Assign \locvar{RSTART} and \locvar{RBITS} the values given in - Table~\ref{tab:short-run} according to the Huffman code read. -\item -Read an \locvar{RBITS}-bit unsigned integer as \locvar{ROFFS}. -\item -Assign \locvar{RLEN} the value $(\locvar{RSTART}+\locvar{ROFFS})$. -\item -Append \locvar{RLEN} copies of \locvar{BIT} to \bitvar{BITS}. -\item -Add \locvar{RLEN} to the value \locvar{LEN}. -\locvar{LEN} MUST be less than or equal to \bitvar{NBITS}. -\item -If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string - \bitvar{BITS}. -\item -Assign \locvar{BIT} the value $(1-\locvar{BIT})$. -\item -Continue decoding runs from step~\ref{step:short-run-loop}. -\end{enumerate} - -\section{Coded Block Flags Decode} -\label{sub:coded-blocks} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{FTYPE} & Integer & 1 & No & The frame type. \\ -\bitvar{NSBS} & Integer & 32 & No & The total number of super blocks in a - frame. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{NBITS} & Integer & 36 & No & The length of a bit string to decode. \\ -\locvar{BITS} & Bit string & & & A decoded set of flags. \\ -\locvar{SBPCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NSBS}-element array of flags - indicating whether or not each super block is partially coded. \\ -\locvar{SBFCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NSBS}-element array of flags - indicating whether or not each non-partially coded super block is fully - coded. \\ -\locvar{\sbi} & Integer & 32 & No & The index of the current super - block. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in coded - order. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure determines which blocks are coded in a given frame. -In an intra frame, it marks all blocks coded. -In an inter frame, however, any or all of the blocks may remain uncoded. -The output is a list of bit flags, one for each block, marking it coded or not - coded. - -It is important to note that flags are still decoded for any blocks which lie - entirely outside the picture region, even though they are not displayed. -Encoders MAY choose to code such blocks. -Decoders MUST faithfully reconstruct such blocks, because their contents can be - used for predictors in future frames. -Flags are \textit{not} decoded for portions of a super block which lie outside - the full frame, as there are no blocks in those regions. - -The complete procedure is as follows: - -\begin{enumerate} -\item -If \bitvar{FTYPE} is zero (intra frame): -\begin{enumerate} -\item -For each consecutive value of \locvar{\bi} from 0 to $(\locvar{NBS}-1)$, assign - $\bitvar{BCODED}[\locvar{\bi}]$ the value one. -\end{enumerate} -\item -Otherwise (inter frame): -\begin{enumerate} -\item -Assign \locvar{NBITS} the value \bitvar{NSBS}. -\item -Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure - described in Section~\ref{sub:long-run}. -This represents the list of partially coded super blocks. -\item -For each consecutive value of \locvar{\sbi} from 0 to $(\locvar{NSBS}-1)$, - remove the bit at the head of the string \locvar{BITS} and assign it to - $\locvar{SBPCODED}[\locvar{\sbi}]$. -\item -Assign \locvar{NBITS} the total number of super blocks such that \\ - $\locvar{SBPCODED}[\locvar{\sbi}]$ equals zero. -\item -Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure - described in Section~\ref{sub:long-run}. -This represents the list of fully coded super blocks. -\item -For each consecutive value of \locvar{\sbi} from 0 to $(\locvar{NSBS}-1)$ such - that $\locvar{SBPCODED}[\locvar{\sbi}]$ equals zero, remove the bit at the - head of the string \locvar{BITS} and assign it to - $\locvar{SBFCODED}[\locvar{\sbi}]$. -\item -Assign \locvar{NBITS} the number of blocks contained in super blocks where - $\locvar{SBPCODED}[\locvar{\sbi}]$ equals one. -Note that this might {\em not} be equal to 16 times the number of partially - coded super blocks, since super blocks which overlap the edge of the frame - will have fewer than 16 blocks in them. -\item -Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure - described in Section~\ref{sub:short-run}. -\item -For each block in coded order---indexed by \locvar{\bi}: -\begin{enumerate} -\item -Assign \locvar{\sbi} the index of the super block containing block - \locvar{\bi}. -\item -If $\locvar{SBPCODED}[\locvar{\sbi}]$ is zero, assign - $\bitvar{BCODED}[\locvar{\bi}]$ the value $\locvar{SBFCODED}[\locvar{\sbi}]$. -\item -Otherwise, remove the bit at the head of the string \locvar{BITS} and assign it - to $\bitvar{BCODED}[\locvar{\bi}]$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\section{Macro Block Coding Modes} -\label{sub:mb-modes} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{FTYPE} & Integer & 1 & No & The frame type. \\ -\bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a - frame. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} & - 3 & No & An \bitvar{NMBS}-element array of coding - modes for each macro block. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{MSCHEME} & Integer & 3 & No & The mode coding scheme. \\ -\locvar{MALPHABET} & \multicolumn{1}{p{40pt}}{Integer array} - & 3 & No & The list of modes corresponding to each - Huffman code. \\ -\locvar{\mbi} & Integer & 32 & No & The index of the current macro - block. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\locvar{\mi} & Integer & 3 & No & The index of a Huffman code from - Table~\ref{tab:mode-codes}, starting from $0$. \\ -\bottomrule\end{tabularx} -\medskip - -In an intra frame, every macro block marked as coded in INTRA mode. -In an inter frame, however, a macro block can be coded in one of eight coding - modes, given in Table~\ref{tab:coding-modes}. -All of the blocks in all color planes contained in a macro block will be - assigned the coding mode of that macro block. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{cl}\toprule -Index & Coding Mode \\\midrule -$0$ & INTER\_NOMV \\ -$1$ & INTRA \\ -$2$ & INTER\_MV \\ -$3$ & INTER\_MV\_LAST \\ -$4$ & INTER\_MV\_LAST2 \\ -$5$ & INTER\_GOLDEN\_NOMV \\ -$6$ & INTER\_GOLDEN\_MV \\ -$7$ & INTER\_MV\_FOUR \\ -\bottomrule\end{tabular} -\end{center} -\caption{Macro Block Coding Modes} -\label{tab:coding-modes} -\end{table} - -An important thing to note is that a coding mode is only stored in the - bitstream for a macro block if it has at least one {\em luma} block coded. -A macro block that contains coded blocks in the chroma planes, but not in the - luma plane, MUST be coded in INTER\_NOMV mode. -Thus, no coding mode needs to be decoded for such a macro block. - -Coding modes are encoded using one of eight different schemes. -Schemes 0 through 6 use the same simple Huffman code to represent the mode - numbers, as given in Table~\ref{tab:mode-codes}. -The difference in the schemes is the mode number assigned to each code. -Scheme 0 uses an assignment specified in the bitstream, while schemes 1--6 use - a fixed assignment, also given in Table~\ref{tab:mode-codes}. -Scheme 7 simply codes each mode directly in the bitstream using three bits. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{lccccccc}\toprule -Scheme & $1$ & $2$ & $3$ & $4$ & $5$ & $6$ & $7$ \\\cmidrule{2-7} -Huffman Code & \multicolumn{6}{c}{Coding Mode} & \locvar{\mi} \\\midrule -\bin{0} & $3$ & $3$ & $3$ & $3$ & $0$ & $0$ & $0$ \\ -\bin{10} & $4$ & $4$ & $2$ & $2$ & $3$ & $5$ & $1$ \\ -\bin{110} & $2$ & $0$ & $4$ & $0$ & $4$ & $3$ & $2$ \\ -\bin{1110} & $0$ & $2$ & $0$ & $4$ & $2$ & $4$ & $3$ \\ -\bin{11110} & $1$ & $1$ & $1$ & $1$ & $1$ & $2$ & $4$ \\ -\bin{111110} & $5$ & $5$ & $5$ & $5$ & $5$ & $1$ & $5$ \\ -\bin{1111110} & $6$ & $6$ & $6$ & $6$ & $6$ & $6$ & $6$ \\ -\bin{1111111} & $7$ & $7$ & $7$ & $7$ & $7$ & $7$ & $7$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Macro Block Mode Schemes} -\label{tab:mode-codes} -\end{table} - -\begin{enumerate} -\item -If \bitvar{FTYPE} is 0 (intra frame): -\begin{enumerate} -\item -For each consecutive value of \locvar{\mbi} from 0 to $(\bitvar{NMBS}-1)$, - inclusive, assign $\bitvar{MBMODES}[\mbi]$ the value 1 (INTRA). -\end{enumerate} -\item -Otherwise (inter frame): -\begin{enumerate} -\item -Read a 3-bit unsigned integer as \locvar{MSCHEME}. -\item -If \locvar{MSCHEME} is 0: -\begin{enumerate} -\item -For each consecutive value of \locvar{MODE} from 0 to 7, inclusive: -\begin{enumerate} -\item -Read a 3-bit unsigned integer as \locvar{\mi}. -\item -Assign $\locvar{MALPHABET}[\mi]$ the value \locvar{MODE}. -\end{enumerate} -\end{enumerate} -\item -Otherwise, if \locvar{MSCHEME} is not 7, assign the entries of - \locvar{MALPHABET} the values in the corresponding column of - Table~\ref{tab:mode-codes}. -\item -For each consecutive macro block in coded order (cf. - Section~\ref{sec:mbs})---indexed by \locvar{\mbi}: -\begin{enumerate} -\item -If a block \locvar{\bi} in the luma plane of macro block \locvar{\mbi} exists - such that $\bitvar{BCODED}[\locvar{\bi}]$ is 1: -\begin{enumerate} -\item -If \locvar{MSCHEME} is not 7, read one bit at a time until one of the Huffman - codes in Table~\ref{tab:mode-codes} is recognized, and assign - $\bitvar{MBMODES}[\locvar{\mbi}]$ the value - $\locvar{MALPHABET}[\locvar{\mi}]$, where \locvar{\mi} is the index of the - Huffman code decoded. -\item -Otherwise, read a 3-bit unsigned integer as $\bitvar{MBMODES}[\locvar{\mbi}]$. -\end{enumerate} -\item -Otherwise, if no luma-plane blocks in the macro block are coded, assign - $\bitvar{MBMODES}[\locvar{\mbi}]$ the value 0 (INTER\_NOMV). -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\section{Motion Vectors} - -In an intra frame, no motion vectors are used, and so motion vector decoding is - skipped. -In an inter frame, however, many of the inter coding modes require a motion - vector in order to specify an offset into the reference frame from which to - predict a block. -These procedures assigns such a motion vector to every block. - -\subsection{Motion Vector Decode} -\label{sub:mv-decode} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{MVMODE} & Integer & 1 & No & The motion vector decoding method. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{MVX} & Integer & 6 & Yes & The X component of the motion - vector. \\ -\bitvar{MVY} & Integer & 6 & Yes & The Y component of the motion - vector. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{MVSIGN} & Integer & 1 & No & The sign of the motion vector component - just decoded. \\ -\bottomrule\end{tabularx} -\medskip - -The individual components of a motion vector can be coded using one of two - methods. -The first uses a variable length Huffman code, given in - Table~\ref{tab:mv-huff-codes}. -The second encodes the magnitude of the component directly in 5 bits, and the - sign in one bit. -Note that in this case there are two representations for the value zero. -For compatibility with VP3, a sign bit is read even if the magnitude read is - zero. -One scheme is chosen and used for the entire frame. - -Each component can take on integer values from $-31\ldots 31$, inclusive, at - half-pixel resolution, i.e. $-15.5\ldots 15.5$ pixels in the luma plane. -For each subsampled axis in the chroma planes, the corresponding motion vector - component is interpreted as being at quarter-pixel resolution, i.e. - $-7.75\ldots 7.75$ pixels. -The precise details of how these vectors are used to compute predictors for - each block are described in Section~\ref{sec:predictors}. - -\begin{table}[ht] -\begin{center} -\begin{tabular}{lrlr}\toprule -Huffman Code & Value & Huffman Code & Value \\\midrule -\bin{000} & $0$ \\ -\bin{001} & $1$ & \bin{010} & $-1$ \\ -\bin{0110} & $2$ & \bin{0111} & $-2$ \\ -\bin{1000} & $3$ & \bin{1001} & $-3$ \\ -\bin{101000} & $4$ & \bin{101001} & $-4$ \\ -\bin{101010} & $5$ & \bin{101011} & $-5$ \\ -\bin{101100} & $6$ & \bin{101101} & $-6$ \\ -\bin{101110} & $7$ & \bin{101111} & $-7$ \\ -\bin{1100000} & $8$ & \bin{1100001} & $-8$ \\ -\bin{1100010} & $9$ & \bin{1100011} & $-9$ \\ -\bin{1100100} & $10$ & \bin{1100101} & $-10$ \\ -\bin{1100110} & $11$ & \bin{1100111} & $-11$ \\ -\bin{1101000} & $12$ & \bin{1101001} & $-12$ \\ -\bin{1101010} & $13$ & \bin{1101011} & $-13$ \\ -\bin{1101100} & $14$ & \bin{1101101} & $-14$ \\ -\bin{1101110} & $15$ & \bin{1101111} & $-15$ \\ -\bin{11100000} & $16$ & \bin{11100001} & $-16$ \\ -\bin{11100010} & $17$ & \bin{11100011} & $-17$ \\ -\bin{11100100} & $18$ & \bin{11100101} & $-18$ \\ -\bin{11100110} & $19$ & \bin{11100111} & $-19$ \\ -\bin{11101000} & $20$ & \bin{11101001} & $-20$ \\ -\bin{11101010} & $21$ & \bin{11101011} & $-21$ \\ -\bin{11101100} & $22$ & \bin{11101101} & $-22$ \\ -\bin{11101110} & $23$ & \bin{11101111} & $-23$ \\ -\bin{11110000} & $24$ & \bin{11110001} & $-24$ \\ -\bin{11110010} & $25$ & \bin{11110011} & $-25$ \\ -\bin{11110100} & $26$ & \bin{11110101} & $-26$ \\ -\bin{11110110} & $27$ & \bin{11110111} & $-27$ \\ -\bin{11111000} & $28$ & \bin{11111001} & $-28$ \\ -\bin{11111010} & $29$ & \bin{11111011} & $-29$ \\ -\bin{11111100} & $30$ & \bin{11111101} & $-30$ \\ -\bin{11111110} & $31$ & \bin{11111111} & $-31$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Huffman Codes for Motion Vector Components} -\label{tab:mv-huff-codes} -\end{table} - -A single motion vector is decoded is follows: - -\begin{enumerate} -\item -If \bitvar{MVMODE} is 0: -\begin{enumerate} -\item -Read 1 bit at a time until one of the Huffman codes in - Table~\ref{tab:mv-huff-codes} is recognized, and assign the value to - \locvar{MVX}. -\item -Read 1 bit at a time until one of the Huffman codes in - Table~\ref{tab:mv-huff-codes} is recognized, and assign the value to - \locvar{MVY}. -\end{enumerate} -\item -Otherwise: -\begin{enumerate} -\item -Read a 5-bit unsigned integer as \bitvar{MVX}. -\item -Read a 1-bit unsigned integer as \locvar{MVSIGN}. -\item -If \locvar{MVSIGN} is 1, assign \bitvar{MVX} the value $-\bitvar{MVX}$. -\item -Read a 5-bit unsigned integer as \bitvar{MVY}. -\item -Read a 1-bit unsigned integer as \locvar{MVSIGN}. -\item -If \locvar{MVSIGN} is 1, assign \bitvar{MVY} the value $-\bitvar{MVY}$. -\end{enumerate} -\end{enumerate} - -\subsection{Macro Block Motion Vector Decode} -\label{sub:mb-mv-decode} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{PF} & Integer & 2 & No & The pixel format. \\ -\bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a - frame. \\ -\bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} & - 3 & No & An \bitvar{NMBS}-element array of coding - modes for each macro block. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{MVECTS} & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} & - 6 & Yes & An \bitvar{NBS}-element array of - motion vectors for each block. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{LAST1} & \multicolumn{1}{p{50pt}}{2D Integer Vector} & - 6 & Yes & The last motion vector. \\ -\locvar{LAST2} & \multicolumn{1}{p{50pt}}{2D Integer Vector} & - 6 & Yes & The second to last motion vector. \\ -\locvar{MVX} & Integer & 6 & Yes & The X component of a motion vector. \\ -\locvar{MVY} & Integer & 6 & Yes & The Y component of a motion vector. \\ -\locvar{\mbi} & Integer & 32 & No & The index of the current macro - block. \\ -\locvar{A} & Integer & 36 & No & The index of the lower-left luma block - in the macro block. \\ -\locvar{B} & Integer & 36 & No & The index of the lower-right luma - block in the macro block. \\ -\locvar{C} & Integer & 36 & No & The index of the upper-left luma block - in the macro block. \\ -\locvar{D} & Integer & 36 & No & The index of the upper-right luma - block in the macro block. \\ -\locvar{E} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{F} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{G} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{H} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{I} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{J} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{K} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\locvar{L} & Integer & 36 & No & The index of a chroma block in the - macro block, depending on the pixel format. \\ -\bottomrule\end{tabularx} -\medskip - -Motion vectors are stored for each macro block. -In every mode except for INTER\_MV\_FOUR, every block in all the color planes - are assigned the same motion vector. -In INTER\_MV\_FOUR mode, all four blocks in the luma plane are assigned their - own motion vector, and motion vectors for blocks in the chroma planes are - computed from these, using averaging appropriate to the pixel format. - -For INTER\_MV and INTER\_GOLDEN\_MV modes, a single motion vector is decoded - and applied to each block. -For INTER\_MV\_FOUR macro blocks, a motion vector is decoded for each coded - luma block. -Uncoded luma blocks receive the default $(0,0)$ vector for the purposes of - computing the chroma motion vectors. - -None of the remaining macro block coding modes require decoding motion vectors - from the stream. -INTRA mode does not use a motion-compensated predictor, and so requires no - motion vector, and INTER\_NOMV and INTER\_GOLDEN\_NOMV modes use the default - vector $(0,0)$ for each block. -This also includes all macro blocks with no coded luma blocks, as they are - coded in INTER\_NOMV mode by definition. - -The modes INTER\_MV\_LAST and INTER\_MV\_LAST2 use the motion vector from the - last macro block (in coded order) and the second to last macro block, - respectively, that contained a motion vector pointing to the previous frame. -Thus no explicit motion vector needs to be decoded for these modes. -Macro blocks coded in INTRA mode or one of the GOLDEN modes are not considered - in this process. -If an insufficient number of macro blocks have been coded in one of the INTER - modes, then the $(0,0)$ vector is used instead. -For macro blocks coded in INTER\_MV\_FOUR mode, the vector from the upper-right - luma block is used, even if the upper-right block is not coded. - -The motion vectors are decoded from the stream as follows: - -\begin{enumerate} -\item -Assign \locvar{LAST1} and \locvar{LAST2} both the value $(0,0)$. -\item -Read a 1-bit unsigned integer as \locvar{MVMODE}. -Note that this value is read even if no macro blocks require a motion vector to - be decoded. -\item -For each consecutive value of \locvar{\mbi} from 0 to $(\bitvar{NMBS}-1)$: -\begin{enumerate} -\item -If $\bitvar{MBMODES}[\locvar{\mbi}]$ is 7 (INTER\_MV\_FOUR): -\begin{enumerate} -\item -Let \locvar{A}, \locvar{B}, \locvar{C}, and \locvar{D} be the indices in coded - order \locvar{\bi} of the luma blocks in macro block \locvar{\mbi}, arranged - into raster order. -Thus, \locvar{A} is the index in coded order of the block in the lower left, - \locvar{B} the lower right, \locvar{C} the upper left, and \locvar{D} the - upper right. % TODO: as shown in Figure~REF. -\item If $\bitvar{BCODED}[\locvar{A}]$ is non-zero: -\begin{enumerate} -\item Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using - the procedure described in Section~\ref{sub:mv-decode}. -\item Assign $\bitvar{MVECTS}[\locvar{A}]$ the value - $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\item Otherwise, assign $\bitvar{MVECTS}[\locvar{A}]$ the value $(0,0)$. -\item If $\bitvar{BCODED}[\locvar{B}]$ is non-zero: -\begin{enumerate} -\item Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using - the procedure described in Section~\ref{sub:mv-decode}. -\item Assign $\bitvar{MVECTS}[\locvar{B}]$ the value - $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\item -Otherwise assign $\bitvar{MVECTS}[\locvar{B}]$ the value $(0,0)$. -\item If $\bitvar{BCODED}[\locvar{C}]$ is non-zero: -\begin{enumerate} -\item Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using - the procedure described in Section~\ref{sub:mv-decode}. -\item Assign $\bitvar{MVECTS}[\locvar{C}]$ the value - $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\item Otherwise assign $\bitvar{MVECTS}[\locvar{C}]$ the value $(0,0)$. -\item If $\bitvar{BCODED}[\locvar{D}]$ is non-zero: -\begin{enumerate} -\item Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using - the procedure described in Section~\ref{sub:mv-decode}. -\item Assign $\bitvar{MVECTS}[\locvar{D}]$ the value - $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\item -Otherwise, assign $\bitvar{MVECTS}[\locvar{D}]$ the value $(0,0)$. -\item -If \bitvar{PF} is 0 (4:2:0): -\begin{enumerate} -\item -Let \locvar{E} and \locvar{F} be the index in coded order of the one block in - the macro block from the $C_b$ and $C_r$ planes, respectively. -\item -Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{F}]$ the - value -\begin{multline*} -(\round\biggl(\frac{\begin{aligned} - \bitvar{MVECTS}[\locvar{A}]_x+\bitvar{MVECTS}[\locvar{B}]_x+\\ - \bitvar{MVECTS}[\locvar{C}]_x+\bitvar{MVECTS}[\locvar{D}]_x - \end{aligned}}{4}\biggr), \\ - \round\biggl(\frac{\begin{aligned} - \bitvar{MVECTS}[\locvar{A}]_y+\bitvar{MVECTS}[\locvar{B}]_y+\\ - \bitvar{MVECTS}[\locvar{C}]_y+\bitvar{MVECTS}[\locvar{D}]_y - \end{aligned}}{4}\biggr)) -\end{multline*} -\end{enumerate} -\item -If \bitvar{PF} is 2 (4:2:2): -\begin{enumerate} -\item -Let \locvar{E} and \locvar{F} be the indices in coded order of the bottom and - top blocks in the macro block from the $C_b$ plane, respectively, and - \locvar{G} and \locvar{H} be the indices in coded order of the bottom and top - blocks in the $C_r$ plane, respectively. %TODO: as shown in Figure~REF. -\item -Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{G}]$ the - value -\begin{multline*} -(\round\left(\frac{ - \bitvar{MVECTS}[\locvar{A}]_x+\bitvar{MVECTS}[\locvar{B}]_x}{2}\right), \\ - \round\left(\frac{ - \bitvar{MVECTS}[\locvar{A}]_y+\bitvar{MVECTS}[\locvar{B}]_y}{2}\right)) -\end{multline*} -\item -Assign $\bitvar{MVECTS}[\locvar{F}]$ and $\bitvar{MVECTS}[\locvar{H}]$ the - value -\begin{multline*} -(\round\left(\frac{ - \bitvar{MVECTS}[\locvar{C}]_x+\bitvar{MVECTS}[\locvar{D}]_x}{2}\right), \\ - \round\left(\frac{ - \bitvar{MVECTS}[\locvar{C}]_y+\bitvar{MVECTS}[\locvar{D}]_y}{2}\right)) -\end{multline*} -\end{enumerate} -\item -If \bitvar{PF} is 3 (4:4:4): -\begin{enumerate} -\item -Let \locvar{E}, \locvar{F}, \locvar{G}, and \locvar{H} be the indices - \locvar{\bi} in coded order of the $C_b$ plane blocks in macro block - \locvar{\mbi}, arranged into raster order, and \locvar{I}, \locvar{J}, - \locvar{K}, and \locvar{L} be the indices \locvar{\bi} in coded order of the - $C_r$ plane blocks in macro block \locvar{\mbi}, arranged into raster order. - %TODO: as shown in Figure~REF. -\item -Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{I}]$ the - value \\ $\bitvar{MVECTS}[\locvar{A}]$. -\item -Assign $\bitvar{MVECTS}[\locvar{F}]$ and $\bitvar{MVECTS}[\locvar{J}]$ the - value \\ $\bitvar{MVECTS}[\locvar{B}]$. -\item -Assign $\bitvar{MVECTS}[\locvar{G}]$ and $\bitvar{MVECTS}[\locvar{K}]$ the - value \\ $\bitvar{MVECTS}[\locvar{C}]$. -\item -Assign $\bitvar{MVECTS}[\locvar{H}]$ and $\bitvar{MVECTS}[\locvar{L}]$ the - value \\ $\bitvar{MVECTS}[\locvar{D}]$. -\end{enumerate} -\item -Assign \locvar{LAST2} the value \locvar{LAST1}. -\item -Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$. -This is the value of the motion vector decoded from the last coded luma block - in raster order. -There must always be at least one, since macro blocks with no coded luma blocks - must use mode 0:~INTER\_NOMV. -\end{enumerate} -\item -Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 6 (INTER\_GOLDEN\_MV), - decode a single motion vector into \locvar{MVX} and \locvar{MVY} using the - procedure described in Section~\ref{sub:mv-decode}. -\item -Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 4 (INTER\_MV\_LAST2): -\begin{enumerate} -\item -Assign $(\locvar{MVX},\locvar{MVY})$ the value \locvar{LAST2}. -\item -Assign \locvar{LAST2} the value \locvar{LAST1}. -\item -Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\item -Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 3 (INTER\_MV\_LAST), assign - $(\locvar{MVX},\locvar{MVY})$ the value \locvar{LAST1}. -\item -Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 2 (INTER\_MV): -\begin{enumerate} -\item -Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using the - procedure described in Section~\ref{sub:mv-decode}. -\item -Assign \locvar{LAST2} the value \locvar{LAST1}. -\item -Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\item -Otherwise ($\bitvar{MBMODES}[\locvar{\mbi}]$ is 5:~INTER\_GOLDEN\_NOMV, - 1:~INTRA, or 0:~INTER\_NOMV), assign \locvar{MVX} and \locvar{MVY} the value - zero. -\item -If $\bitvar{MBMODES}[\locvar{\mbi}]$ is not 7 (not INTER\_MV\_FOUR), then for - each coded block \locvar{\bi} in macro block \locvar{\mbi}: -\begin{enumerate} -\item -Assign $\bitvar{MVECTS}[\locvar{\bi}]$ the value $(\locvar{MVX},\locvar{MVY})$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\paragraph{VP3 Compatibility} - -Unless all four luma blocks in the macro block are coded, the VP3 encoder does - not select mode INTER\_MV\_FOUR. -Theora removes this restriction by treating the motion vector for an uncoded - luma block as the default $(0,0)$ vector. -This is consistent with the premise that the block has not changed since the - previous frame and that chroma information can be largely ignored when - estimating motion. - -No modification is required for INTER\_MV\_FOUR macro blocks in VP3 streams to - be decoded correctly by a Theora decoder. -However, regardless of how many of the luma blocks are actually coded, the VP3 - decoder always reads four motion vectors from the stream for INTER\_MV\_FOUR - mode. -The motion vectors read are used to calculate the motion vectors for the chroma - blocks, but are otherwise ignored. -Thus, care should be taken when creating Theora streams meant to be backwards - compatible with VP3 to only use INTER\_MV\_FOUR mode when all four luma - blocks are coded. - -\section{Block-Level \qi\ Decode} -\label{sub:block-qis} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bitvar{NQIS} & Integer & 2 & No & The number of \qi\ values. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{QIIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 2 & No & An \bitvar{NBS}-element array of - \locvar{\qii} values for each block. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{NBITS} & Integer & 36 & No & The length of a bit string to decode. \\ -\locvar{BITS} & Bit string & & & A decoded set of flags. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\locvar{\qii} & Integer & 2 & No & The index of \qi\ value in the list of - \qi\ values defined for this frame. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure selects the \qi\ value to be used for dequantizing the AC - coefficients of each block. -DC coefficients all use the same \qi\ value, so as to avoid interference with - the DC prediction mechanism, which occurs in the quantized domain. - -The value is actually represented by an index \locvar{\qii} into the list of - \qi\ values defined for the frame. -The decoder makes multiple passes through the list of coded blocks, one for - each \qi\ value except the last one. -In each pass, an RLE-coded bitmask is decoded to divide the blocks into two - groups: those that use the current \qi\ value in the list, and those that use - a value from later in the list. -Each subsequent pass is restricted to the blocks in the second group. - -\begin{enumerate} -\item -For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, assign - $\bitvar{QIIS}[\locvar{\bi}]$ the value zero. -\item -For each consecutive value of \locvar{\qii} from 0 to $(\bitvar{NQIS}-2)$: -\begin{enumerate} -\item -Assign \locvar{NBITS} be the number of blocks \locvar{\bi} such that - $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and $\bitvar{QIIS}[\locvar{\bi}]$ - equals $\locvar{\qii}$. -\item -Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure - described in Section~\ref{sub:long-run}. -This represents the list of blocks that use \qi\ value \locvar{\qii} or higher. -\item -For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$ such - that $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and - $\bitvar{QIIS}[\locvar{\bi}]$ equals $\locvar{\qii}$: -\begin{enumerate} -\item -Remove the bit at the head of the string \locvar{BITS} and add its value to - $\bitvar{QIIS}[\locvar{\bi}]$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\paragraph{VP3 Compatibility} - -For VP3 compatible streams, only one \qi\ value can be specified in the frame - header, so the main loop of the above procedure, which would iterate from $0$ - to $-1$, is never executed. -Thus, no bits are read, and each block uses the one \qi\ value defined for the - frame. - -\cleardoublepage - -\section{DCT Coefficients} -\label{sec:dct-decode} - -The quantized DCT coefficients are decoded by making 64 passes through the list - of coded blocks, one for each token index in zig-zag order. -For the DC tokens, two Huffman tables are chosen from among the first 16, one - for the luma plane and one for the chroma planes. -The AC tokens, however, are divided into four different groups. -Again, two 4-bit indices are decoded, one for the luma plane, and one for the - chroma planes, but these select the codebooks for {\em all four} groups. -AC coefficients in group one use codebooks $16\ldots 31$, while group two uses - $32\ldots 47$, etc. -Note that this second set of indices is decoded even if there are no non-zero - AC coefficients in the frame. - -Tokens are divided into two major types: EOB tokens, which fill the remainder - of one or more blocks with zeros, and coefficient tokens, which fill in one or - more coefficients within a single block. -A decoding procedure for the first is given in Section~\ref{sub:eob-token}, and - for the second in Section~\ref{sub:coeff-token}. -The decoding procedure for the complete set of quantized coefficients is given - in Section~\ref{sub:dct-coeffs}. - -\subsection{EOB Token Decode} -\label{sub:eob-token} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{TOKEN} & Integer & 5 & No & The token being decoded. -This must be in the range $0\ldots 6$. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - current token index for each block. \\ -\bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - coefficient count for each block. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\bitvar{\ti} & Integer & 6 & No & The current token index. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - current token index for each block. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{EOBS} & Integer & 36 & No & The remaining length of the current - EOB run. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\bj} & Integer & 36 & No & Another index of a block in coded - order. \\ -\locvar{\tj} & Integer & 6 & No & Another token index. \\ -\bottomrule\end{tabularx} -\medskip - -A summary of the EOB tokens is given in Table~\ref{tab:eob-tokens}. -An important thing to note is that token 6 does not add an offset to the - decoded run value, even though in general it should only be used for runs of - size 32 or longer. -If a value of zero is decoded for this run, it is treated as an EOB run the - size of the remaining coded blocks. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{ccl}\toprule -Token Value & Extra Bits & EOB Run Lengths \\\midrule -$0$ & $0$ & $1$ \\ -$1$ & $0$ & $2$ \\ -$2$ & $0$ & $3$ \\ -$3$ & $2$ & $4\ldots 7$ \\ -$4$ & $3$ & $8\ldots 15$ \\ -$5$ & $4$ & $16\ldots 31$ \\ -$6$ & $12$ & $1\ldots 4095$, or all remaining blocks \\ -\bottomrule\end{tabular} -\end{center} -\caption{EOB Token Summary} -\label{tab:eob-tokens} -\end{table} - -There is no restriction that one EOB token cannot be immediately followed by - another, so no special cases are necessary to extend the range of the maximum - run length as were required in Section~\ref{sub:long-run}. -Indeed, depending on the lengths of the Huffman codes, it may even cheaper to - encode, by way of example, an EOB run of length 31 followed by an EOB run of - length 1 than to encode an EOB run of length 32 directly. -There is also no restriction that an EOB run stop at the end of a color plane - or a token index. -The run MUST, however, end at or before the end of the frame. - -\begin{enumerate} -\item -If \bitvar{TOKEN} is 0, assign \bitvar{EOBS} the value 1. -\item -Otherwise, if \bitvar{TOKEN} is 1, assign \bitvar{EOBS} the value 2. -\item -Otherwise, if \bitvar{TOKEN} is 2, assign \bitvar{EOBS} the value 3. -\item -Otherwise, if \bitvar{TOKEN} is 3: -\begin{enumerate} -\item -Read a 2-bit unsigned integer as \bitvar{EOBS}. -\item -Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+4)$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 4: -\begin{enumerate} -\item -Read a 3-bit unsigned integer as \bitvar{EOBS}. -\item -Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+8)$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 5: -\begin{enumerate} -\item -Read a 4-bit unsigned integer as \bitvar{EOBS}. -\item -Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+16)$. -\end{enumerate} -\item -Otherwise, \bitvar{TOKEN} is 6: -\begin{enumerate} -\item -Read a 12-bit unsigned integer as \bitvar{EOBS}. -\item -If \bitvar{EOBS} is zero, assign \bitvar{EOBS} to be the number of coded blocks - \locvar{\bj} such that $\bitvar{TIS}[\locvar{\bj}]$ is less than 64. -\end{enumerate} -\item -For each value of \locvar{\tj} from $\bitvar{\ti}$ to 63, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value 64. -\item -Assign \bitvar{EOBS} the value $(\bitvar{EOBS}-1)$. -\end{enumerate} - -\paragraph{VP3 Compatibility} - -The VP3 encoder does not use the special interpretation of a zero-length EOB - run, though its decoder {\em does} support it. -That may be due more to a happy accident in the way the decoder was written - than intentional design, however, and other VP3 implementations might not - reproduce it faithfully. -For backwards compatibility, it may be wise to avoid it, especially as for most - frame sizes there are fewer than 4095 blocks, making it unnecessary. - -\subsection{Coefficient Token Decode} -\label{sub:coeff-token} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{TOKEN} & Integer & 5 & No & The token being decoded. -This must be in the range $7\ldots 31$. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - current token index for each block. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\bitvar{\ti} & Integer & 6 & No & The current token index. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - current token index for each block. \\ -\bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - coefficient count for each block. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{SIGN} & Integer & 1 & No & A flag indicating the sign of the - current coefficient. \\ -\locvar{MAG} & Integer & 10 & No & The magnitude of the current - coefficient. \\ -\locvar{RLEN} & Integer & 6 & No & The length of the current zero run. \\ -\locvar{\tj} & Integer & 6 & No & Another token index. \\ -\bottomrule\end{tabularx} -\medskip - -Each of these tokens decodes one or more coefficients in the current block. -A summary of the meanings of the token values is presented in - Table~\ref{tab:coeff-tokens}. -There are often several different ways to tokenize a given coefficient list. -Which one is optimal depends on the exact lengths of the Huffman codes used to - represent each token. -Note that we do not update the coefficient count for the block if we decode a - pure zero run. - -\begin{table}[htbp] -\begin{center} -\begin{tabularx}{\textwidth}{cclX}\toprule -Token Value & Extra Bits & \multicolumn{1}{p{55pt}}{Number of Coefficients} - & Description \\\midrule -$7$ & $3$ & $1\ldots 8$ & Short zero run. \\ -$8$ & $6$ & $1\ldots 64$ & Zero run. \\ -$9$ & $0$ & $1$ & $1$. \\ -$10$ & $0$ & $1$ & $-1$. \\ -$11$ & $0$ & $1$ & $2$. \\ -$12$ & $0$ & $1$ & $-2$. \\ -$13$ & $1$ & $1$ & $\pm 3$. \\ -$14$ & $1$ & $1$ & $\pm 4$. \\ -$15$ & $1$ & $1$ & $\pm 5$. \\ -$16$ & $1$ & $1$ & $\pm 6$. \\ -$17$ & $2$ & $1$ & $\pm 7\ldots 8$. \\ -$18$ & $3$ & $1$ & $\pm 9\ldots 12$. \\ -$19$ & $4$ & $1$ & $\pm 13\ldots 20$. \\ -$20$ & $5$ & $1$ & $\pm 21\ldots 36$. \\ -$21$ & $6$ & $1$ & $\pm 37\ldots 68$. \\ -$22$ & $10$ & $1$ & $\pm 69\ldots 580$. \\ -$23$ & $1$ & $2$ & One zero followed by $\pm 1$. \\ -$24$ & $1$ & $3$ & Two zeros followed by $\pm 1$. \\ -$25$ & $1$ & $4$ & Three zeros followed by - $\pm 1$. \\ -$26$ & $1$ & $5$ & Four zeros followed by - $\pm 1$. \\ -$27$ & $1$ & $6$ & Five zeros followed by - $\pm 1$. \\ -$28$ & $3$ & $7\ldots 10$ & $6\ldots 9$ zeros followed by - $\pm 1$. \\ -$29$ & $4$ & $11\ldots 18$ & $10\ldots 17$ zeros followed by - $\pm 1$.\\ -$30$ & $2$ & $2$ & One zero followed by - $\pm 2\ldots 3$. \\ -$31$ & $3$ & $3\ldots 4$ & $2\ldots 3$ zeros followed by - $\pm 2\ldots 3$. \\ -\bottomrule\end{tabularx} -\end{center} -\caption{Coefficient Token Summary} -\label{tab:coeff-tokens} -\end{table} - -For tokens which represent more than one coefficient, they MUST NOT bring the - total number of coefficients in the block to more than 64. -Care should be taken in a decoder to check for this, as otherwise it may permit - buffer overflows from invalidly formed packets. -\begin{verse} -{\bf Note:} One way to achieve this efficiently is to combine the inverse - zig-zag mapping (described later in Section~\ref{sub:dequant}) with - coefficient decode, and use a table look-up to map zig-zag indices greater - than 63 to a safe location. -\end{verse} - -\begin{enumerate} -\item -If \bitvar{TOKEN} is 7: -\begin{enumerate} -\item -Read in a 3-bit unsigned integer as \locvar{RLEN}. -\item -Assign \locvar{RLEN} the value $(\locvar{RLEN}+1)$. -\item -For each value of \locvar{\tj} from \bitvar{\ti} to - $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value - $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 8: -\begin{enumerate} -\item -Read in a 6-bit unsigned integer as \locvar{RLEN}. -\item -Assign \locvar{RLEN} the value $(\locvar{RLEN}+1)$. -\item -For each value of \locvar{\tj} from \bitvar{\ti} to - $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value - $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 9: -\begin{enumerate} -\item -Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 10: -\begin{enumerate} -\item -Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 11: -\begin{enumerate} -\item -Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $2$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 12: -\begin{enumerate} -\item -Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-2$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 13: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $3$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-3$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 14: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $4$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-4$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 15: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $5$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-5$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 16: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $6$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-6$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 17: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 1-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+7)$. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 18: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 2-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+9)$. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 19: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 3-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+13)$. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 20: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 4-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+21)$. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 21: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 5-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+37)$. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 22: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 9-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+69)$. -\item -If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ - the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 23: -\begin{enumerate} -\item -Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value zero. -\item -Read a 1-bit unsigned integer as SIGN. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value - $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+2$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 24: -\begin{enumerate} -\item -For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+1)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Read a 1-bit unsigned integer as SIGN. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+2]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+2]$ the value - $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+3$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 25: -\begin{enumerate} -\item -For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+2)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Read a 1-bit unsigned integer as SIGN. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+3]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+3]$ the value - $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+4$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 26: -\begin{enumerate} -\item -For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+3)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Read a 1-bit unsigned integer as SIGN. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+4]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+4]$ the value - $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+5$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 27: -\begin{enumerate} -\item -For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+4)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Read a 1-bit unsigned integer as SIGN. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+5]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+5]$ the value - $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+6$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 28: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 2-bit unsigned integer as \locvar{RLEN}. -\item -Assign \locvar{RLEN} the value $(\locvar{RLEN}+6)$. -\item -For each value of \locvar{\tj} from \bitvar{\ti} to - $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ - the value $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value - $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$. -\item -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 29: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 3-bit unsigned integer as \locvar{RLEN}. -\item -Assign \locvar{RLEN} the value $(\locvar{RLEN}+10)$. -\item -For each value of \locvar{\tj} from \bitvar{\ti} to - $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value $1$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ - the value $-1$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value - $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$. -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 30: -\begin{enumerate} -\item -Assign $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\ti}]$ the value zero. -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 1-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+2)$. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value - $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+2$. -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\item -Otherwise, if \bitvar{TOKEN} is 31: -\begin{enumerate} -\item -Read a 1-bit unsigned integer as \locvar{SIGN}. -\item -Read a 1-bit unsigned integer as \locvar{MAG}. -\item -Assign \locvar{MAG} the value $(\locvar{MAG}+2)$. -\item -Read a 1-bit unsigned integer as \locvar{RLEN}. -\item -Assign \locvar{RLEN} the value $(\locvar{RLEN}+2)$. -\item -For each value of \locvar{\tj} from \bitvar{\ti} to - $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero. -\item -If \locvar{SIGN} is zero, assign - $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value - $\locvar{MAG}$. -\item -Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ - the value $-\locvar{MAG}$. -\item -Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value - $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$. -Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$. -\end{enumerate} -\end{enumerate} - -\subsection{DCT Coefficient Decode} -\label{sub:dct-coeffs} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a - frame. \\ -\bitvar{HTS} & \multicolumn{3}{l}{Huffman table array} - & An 80-element array of Huffman tables - with up to 32 entries each. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - coefficient count for each block. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{NLBS} & Integer & 34 & No & The number of blocks in the luma - plane. \\ -\locvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - current token index for each block. \\ -\locvar{EOBS} & Integer & 36 & No & The remaining length of the current - EOB run. \\ -\locvar{TOKEN} & Integer & 5 & No & The current token being decoded. \\ -\locvar{HG} & Integer & 3 & No & The current Huffman table group. \\ -\locvar{\cbi} & Integer & 36 & No & The index of the current block in the - coded block list. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\locvar{\bj} & Integer & 36 & No & Another index of a block in coded - order. \\ -\locvar{\ti} & Integer & 6 & No & The current token index. \\ -\locvar{\tj} & Integer & 6 & No & Another token index. \\ -\locvar{\hti_L} & Integer & 4 & No & The index of the current Huffman table - to use for the luma plane within a group. \\ -\locvar{\hti_C} & Integer & 4 & No & The index of the current Huffman table - to use for the chroma planes within a group. \\ -\locvar{\hti} & Integer & 7 & No & The index of the current Huffman table - to use. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure puts the above two procedures to work to decode the entire set - of DCT coefficients for the frame. -At the end of this procedure, \locvar{EOBS} MUST be zero, and - $\locvar{TIS}[\locvar{\bi}]$ MUST be 64 for every coded \locvar{\bi}. - -Note that we update the coefficient count of every block before continuing an - EOB run or decoding a token, despite the fact that it is already up to date - unless the previous token was a pure zero run. -This is done intentionally to mimic the VP3 accounting rules. -Thus the only time the coefficient count does not include the coefficients in a - pure zero run is when when that run reaches all the way to coefficient 63. -Note, however, that regardless of the coefficient count, any additional - coefficients are still set to zero. -The only use of the count is in determining if a special case of the inverse - DCT can be used in Section~\ref{sub:2d-idct}. - -\begin{enumerate} -\item -Assign \locvar{NLBS} the value $(\bitvar{NMBS}*4)$. -\item -For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, - assign $\locvar{TIS}[\locvar{\bi}]$ the value zero. -\item -Assign \locvar{EOBS} the value 0. -\item -For each consecutive value of \locvar{\ti} from 0 to 63: -\begin{enumerate} -\item -If \locvar{\ti} is $0$ or $1$: -\begin{enumerate} -\item -Read a 4-bit unsigned integer as \locvar{\hti_L}. -\item -Read a 4-bit unsigned integer as \locvar{\hti_C}. -\end{enumerate} -\item -For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$ for - which $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and - $\locvar{TIS}[\locvar{\bi}]$ equals \locvar{\ti}: -\begin{enumerate} -\item -Assign $\bitvar{NCOEFFS}[\locvar{\bi}]$ the value \locvar{\ti}. -\item -If \locvar{EOBS} is greater than zero: -\begin{enumerate} -\item -For each value of \locvar{\tj} from $\locvar{\ti}$ to 63, assign - $\bitvar{COEFFS}[\locvar{\bi}][\locvar{\tj}]$ the value zero. -\item -Assign $\locvar{TIS}[\locvar{\bi}]$ the value 64. -\item -Assign \locvar{EOBS} the value $(\locvar{EOBS}-1)$. -\end{enumerate} -\item -Otherwise: -\begin{enumerate} -\item -Assign \locvar{HG} a value based on \locvar{\ti} from - Table~\ref{tab:huff-groups}. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{lc}\toprule -\locvar{\ti} & \locvar{HG} \\\midrule -$0$ & $0$ \\ -$1\ldots 5$ & $1$ \\ -$6\ldots 14$ & $2$ \\ -$15\ldots 27$ & $3$ \\ -$28\ldots 63$ & $4$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Huffman Table Groups} -\label{tab:huff-groups} -\end{table} - -\item -If \locvar{\bi} is less than \locvar{NLBS}, assign \locvar{\hti} the value - $(16*\locvar{HG}+\locvar{\hti_L})$. -\item -Otherwise, assign \locvar{\hti} the value - $(16*\locvar{HG}+\locvar{\hti_C})$. -\item -Read one bit at a time until one of the codes in $\bitvar{HTS}[\locvar{\hti}]$ - is recognized, and assign the value to \locvar{TOKEN}. -\item -If \locvar{TOKEN} is less than 7, expand an EOB token using the procedure given - in Section~\ref{sub:eob-token} to update $\locvar{TIS}[\locvar{\bi}]$, - $\bitvar{COEFFS}[\locvar{\bi}]$, and \locvar{EOBS}. -\item -Otherwise, expand a coefficient token using the procedure given in - Section~\ref{sub:coeff-token} to update $\locvar{TIS}[\locvar{\bi}]$, - $\bitvar{COEFFS}[\locvar{\bi}]$, and $\bitvar{NCOEFFS}[\locvar{\bi}]$. -\end{enumerate} -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\section{Undoing DC Prediction} - -The actual value of a DC coefficient decoded by Section~\ref{sec:dct-decode} is - the residual from a predicted value computed by the encoder. -This prediction is only applied to DC coefficients. -Quantized AC coefficients are encoded directly. - -This section describes how to undo this prediction to recover the original - DC coefficients. -The predicted DC value for a block is computed from the DC values of its - immediate neighbors which precede the block in raster order. -Thus, reversing this prediction must procede in raster order, instead of coded - order. - -Note that this step comes before dequantizing the coefficients. -For this reason, DC coefficients are all quantized with the same \qi\ value, - regardless of the block-level \qi\ values decoded in - Section~\ref{sub:block-qis}. -Those \qi\ values are applied only to the AC coefficients. - -\subsection{Computing the DC Predictor} -\label{sub:dc-pred} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} & - 3 & No & An \bitvar{NMBS}-element array of - coding modes for each macro block. \\ -\bitvar{LASTDC} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & A 3-element array containing the - most recently decoded DC value, one for inter mode and for each reference - frame. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{DCPRED} & Integer & 16 & Yes & The predicted DC value for the current - block. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{P} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & A 4-element array indicating which - neighbors can be used for DC prediction. \\ -\locvar{PBI} & \multicolumn{1}{p{40pt}}{Integer Array} & - 36 & No & A 4-element array containing the - coded-order block index of the current block's neighbors. \\ -\locvar{W} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & Yes & A 4-element array of the weights to - apply to each neighboring DC value. \\ -\locvar{PDIV} & Integer & 8 & No & The valud to divide the weighted sum - by. \\ -\locvar{\bj} & Integer & 36 & No & The index of a neighboring block in - coded order. \\ -\locvar{\mbi} & Integer & 32 & No & The index of the macro block - containing block \locvar{\bi}. \\ -\locvar{\mbi} & Integer & 32 & No & The index of the macro block - containing block \locvar{\bj}. \\ -\locvar{\rfi} & Integer & 2 & No & The index of the reference frame - indicated by the coding mode for macro block \locvar{\mbi}. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure outlines how a predictor is formed for a single block. - -The predictor is computed as a weighted sum of the neighboring DC values from - coded blocks which use the same reference frame. -This latter condition is determined only by checking the coding mode for the - block. -Even if the golden frame and the previous frame are in fact the same, e.g. for - the first inter frame after an intra frame, they are still treated as being - different for the purposes of DC prediction. -The weighted sum is divided by a power of two, with truncation towards zero, - and the result is checked for outranging if necessary. - -If there are no neighboring coded blocks which use the same reference frame as - the current block, then the most recent DC value of any block that used that - reference frame is used instead. -If no such block exists, then the predictor is set to zero. - -\begin{enumerate} -\item -Assign \locvar{\mbi} the index of the macro block containing block - \bitvar{\bi}. -\item -Assign \locvar{\rfi} the value of the Reference Frame Index column of - Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$. - -\begin{table}[htpb] -\begin{center} -\begin{tabular}{ll}\toprule -Coding Mode & Reference Frame Index \\\midrule -$0$ (INTER\_NOMV) & $1$ (Previous) \\ -$1$ (INTRA) & $0$ (None) \\ -$2$ (INTER\_MV) & $1$ (Previous) \\ -$3$ (INTER\_MV\_LAST) & $1$ (Previous) \\ -$4$ (INTER\_MV\_LAST2) & $1$ (Previous) \\ -$5$ (INTER\_GOLDEN\_NOMV) & $2$ (Golden) \\ -$6$ (INTER\_GOLDEN\_MV) & $2$ (Golden) \\ -$7$ (INTER\_MV\_FOUR) & $1$ (Previous) \\ -\bottomrule\end{tabular} -\end{center} -\caption{Reference Frames for Each Coding Mode} -\label{tab:cm-refs} -\end{table} - -\item -If block \locvar{\bi} is not along the left edge of the coded frame: -\begin{enumerate} -\item -Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s left - neighbor, i.e., in the same row but one column to the left. -\item -If $\bitvar{BCODED}[\bj]$ is not zero: -\begin{enumerate} -\item -Assign \locvar{\mbj} the index of the macro block containing block - \locvar{\bj}. -\item -If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs} - corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}: -\begin{enumerate} -\item -Assign $\locvar{P}[0]$ the value $1$. -\item -Assign $\locvar{PBI}[0]$ the value \locvar{\bj}. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[0]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[0]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[0]$ the value zero. - -\item -If block \locvar{\bi} is not along the left edge nor the bottom edge of the - coded frame: -\begin{enumerate} -\item -Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower-left - neighbor, i.e., one row down and one column to the left. -\item -If $\bitvar{BCODED}[\bj]$ is not zero: -\begin{enumerate} -\item -Assign \locvar{\mbj} the index of the macro block containing block - \locvar{\bj}. -\item -If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs} - corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}: -\begin{enumerate} -\item -Assign $\locvar{P}[1]$ the value $1$. -\item -Assign $\locvar{PBI}[1]$ the value \locvar{\bj}. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[1]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[1]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[1]$ the value zero. - -\item -If block \locvar{\bi} is not along the the bottom edge of the coded frame: -\begin{enumerate} -\item -Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower - neighbor, i.e., in the same column but one row down. -\item -If $\bitvar{BCODED}[\bj]$ is not zero: -\begin{enumerate} -\item -Assign \locvar{\mbj} the index of the macro block containing block - \locvar{\bj}. -\item -If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs} - corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}: -\begin{enumerate} -\item -Assign $\locvar{P}[2]$ the value $1$. -\item -Assign $\locvar{PBI}[2]$ the value \locvar{\bj}. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[2]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[2]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[2]$ the value zero. - -\item -If block \locvar{\bi} is not along the right edge nor the bottom edge of the - coded frame: -\begin{enumerate} -\item -Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower-right - neighbor, i.e., one row down and one column to the right. -\item -If $\bitvar{BCODED}[\bj]$ is not zero: -\begin{enumerate} -\item -Assign \locvar{\mbj} the index of the macro block containing block - \locvar{\bj}. -\item -If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs} - corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}: -\begin{enumerate} -\item -Assign $\locvar{P}[3]$ the value $1$. -\item -Assign $\locvar{PBI}[3]$ the value \locvar{\bj}. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[3]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[3]$ the value zero. -\end{enumerate} -\item -Otherwise, assign $\locvar{P}[3]$ the value zero. - -\item -If none of the values $\locvar{P}[0]$, $\locvar{P}[1]$, $\locvar{P}[2]$, nor - $\locvar{P}[3]$ are non-zero, then assign \bitvar{DCPRED} the value - $\bitvar{LASTDC}[\locvar{\rfi}]$. -\item -Otherwise: -\begin{enumerate} -\item -Assign the array \locvar{W} and the variable \locvar{PDIV} the values from the - row of Table~\ref{tab:dc-weights} corresonding to the values of each - $\locvar{P}[\idx{i}]$. - -\begin{table}[htb] -\begin{center} -\begin{tabular}{ccccrrrrr}\toprule -\multicolumn{1}{p{25pt}}{\centering$\locvar{P}[0]$ (L)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{P}[1]$ (DL)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{P}[2]$ (D)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{P}[3]$ (DR)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{W}[3]$ (L)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{W}[1]$ (DL)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{W}[2]$ (D)} & -\multicolumn{1}{p{25pt}}{\centering$\locvar{W}[3]$ (DR)} & -\locvar{PDIV} \\\midrule -$1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ \\ -$0$ & $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $0$ & $1$ \\ -$1$ & $1$ & $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ \\ -$0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $1$ \\ -$1$ & $0$ & $1$ & $0$ & $1$ & $0$ & $1$ & $0$ & $2$ \\ -$0$ & $1$ & $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $1$ \\ -$1$ & $1$ & $1$ & $0$ & $29$ & $-26$ & $29$ & $0$ & $32$ \\ -$0$ & $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ & $1$ \\ -$1$ & $0$ & $0$ & $1$ & $75$ & $0$ & $0$ & $53$ & $128$ \\ -$0$ & $1$ & $0$ & $1$ & $0$ & $1$ & $0$ & $1$ & $2$ \\ -$1$ & $1$ & $0$ & $1$ & $75$ & $0$ & $0$ & $53$ & $128$ \\ -$0$ & $0$ & $1$ & $1$ & $0$ & $0$ & $1$ & $0$ & $1$ \\ -$1$ & $0$ & $1$ & $1$ & $75$ & $0$ & $0$ & $53$ & $128$ \\ -$0$ & $1$ & $1$ & $1$ & $0$ & $3$ & $10$ & $3$ & $16$ \\ -$1$ & $1$ & $1$ & $1$ & $29$ & $-26$ & $29$ & $0$ & $32$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Weights and Divisors for Each Set of Available DC Predictors} -\label{tab:dc-weights} -\end{table} - -\item -Assign \bitvar{DCPRED} the value zero. -\item -If $\locvar{P}[0]$ is non-zero, assign \bitvar{DCPRED} the value - $(\bitvar{DCPRED}+\locvar{W}[0]*\bitvar{COEFFS}[\locvar{PBI}[0]][0])$. -\item -If $\locvar{P}[1]$ is non-zero, assign \bitvar{DCPRED} the value - $(\bitvar{DCPRED}+\locvar{W}[1]*\bitvar{COEFFS}[\locvar{PBI}[1]][0])$. -\item -If $\locvar{P}[2]$ is non-zero, assign \bitvar{DCPRED} the value - $(\bitvar{DCPRED}+\locvar{W}[2]*\bitvar{COEFFS}[\locvar{PBI}[2]][0])$. -\item -If $\locvar{P}[3]$ is non-zero, assign \bitvar{DCPRED} the value - $(\bitvar{DCPRED}+\locvar{W}[3]*\bitvar{COEFFS}[\locvar{PBI}[3]][0])$. -\item -Assign \bitvar{DCPRED} the value $(\bitvar{DCPRED}//\locvar{PDIV})$. -\item -If $\locvar{P}[0]$, $\locvar{P}[1]$, and $\locvar{P}[2]$ are all non-zero: -\begin{enumerate} -\item -If $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[2]][0]|$ is greater than - $128$, assign \bitvar{DCPRED} the value $\bitvar{COEFFS}[\locvar{PBI}[2]][0]$. -\item -Otherwise, if $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[0]][0]|$ is - greater than $128$, assign \bitvar{DCPRED} the value - $\bitvar{COEFFS}[\locvar{PBI}[0]][0]$. -\item -Otherwise, if $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[1]][0]|$ is - greater than $128$, assign \bitvar{DCPRED} the value - $\bitvar{COEFFS}[\locvar{PBI}[1]][0]$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\subsection{Inverting the DC Prediction Process} -\label{sub:dc-pred-undo} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} & - 3 & No & An \bitvar{NMBS}-element array of - coding modes for each macro block. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. The DC - value of each block will be updated. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{LASTDC} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & A 3-element array containing the - most recently decoded DC value, one for inter mode and for each reference - frame. \\ -\locvar{DCPRED} & Integer & 11 & Yes & The predicted DC value for the current - block. \\ -\locvar{DC} & Integer & 17 & Yes & The actual DC value for the current - block. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\locvar{\mbi} & Integer & 32 & No & The index of the macro block - containing block \locvar{\bi}. \\ -\locvar{\rfi} & Integer & 2 & No & The index of the reference frame - indicated by the coding mode for macro block \locvar{\mbi}. \\ -\locvar{\pli} & Integer & 2 & No & A color plane index. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure describes the complete process of undoing the DC prediction to - recover the original DC values. -Because it is possible to add a value as large as $580$ to the predicted DC - coefficient value at every block, which will then be used to increase the - predictor for the next block, the reconstructed DC value could overflow a - 16-bit integer. -This is handled by truncating the result to a 16-bit signed representation, - simply throwing away any higher bits in the two's complement representation of - the number. - -\begin{enumerate} -\item -For each consecutive value of \locvar{\pli} from $0$ to $2$: -\begin{enumerate} -\item -Assign $\locvar{LASTDC}[0]$ the value zero. -\item -Assign $\locvar{LASTDC}[1]$ the value zero. -\item -Assign $\locvar{LASTDC}[2]$ the value zero. -\item -For each block of color plane \locvar{\pli} in {\em raster} order, with - coded-order index \locvar{\bi}: -\begin{enumerate} -\item -If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero: -\begin{enumerate} -\item -Compute the value \locvar{DCPRED} using the procedure outlined in - Section~\ref{sub:dc-pred}. -\item -Assign \locvar{DC} the value - $(\bitvar{COEFFS}[\locvar{\bi}][0]+\locvar{DCPRED})$. -\item -Truncate \locvar{DC} to a 16-bit representation by dropping any higher-order - bits. -\item -Assign $\bitvar{COEFFS}[\locvar{\bi}][0]$ the value \locvar{DC}. -\item -Assign \locvar{\mbi} the index of the macro block containing block - \locvar{\bi}. -\item -Assign \locvar{\rfi} the value of the Reference Frame Index column of - Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$. -\item -Assign $\locvar{LASTDC}[\rfi]$ the value $\locvar{DC}$. -\end{enumerate} -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\section{Reconstruction} - -At this stage, the complete contents of the data packet have been decoded. -All that remains is to reconstruct the contents of the new frame. -This is applied on a block by block basis, and as each block is independent, - the order they are processed in does not matter. - -\subsection{Predictors} -\label{sec:predictors} - -For each block, a predictor is formed based on its coding mode and motion - vector. -There are three basic types of predictors: the intra predictor, the whole-pixel - predictor, and the half-pixel predictor. -The former is used for all blocks coded in INTRA mode, while all other blocks - use one of the latter two. -The whole-pixel predictor is used if the fractional part of both motion vector - components is zero, otherwise the half-pixel predictor is used. - -\subsubsection{The Intra Predictor} -\label{sub:predintra} - -\paragraph{Input parameters:} None. - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & An $8\times 8$ array of predictor - values to use for INTRA coded blocks. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\idx{bx}} & Integer & 3 & No & The horizontal pixel index in the - block. \\ -\locvar{\idx{by}} & Integer & 3 & No & The vertical pixel index in the - block. \\ -\bottomrule\end{tabularx} -\medskip - -The intra predictor is nothing more than the constant value $128$. -This is applied for the sole purpose of centering the range of possible DC - values for INTRA blocks around zero. - -\begin{enumerate} -\item -For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive: -\begin{enumerate} -\item -For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive: -\begin{enumerate} -\item -Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value $128$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\subsubsection{The Whole-Pixel Predictor} -\label{sub:predfullpel} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RPW} & Integer & 20 & No & The width of the current plane of the - reference frame in pixels. \\ -\bitvar{RPH} & Integer & 20 & No & The height of the current plane of the - reference frame in pixels. \\ -\bitvar{REFP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of the current plane of the reference frame. \\ -\bitvar{BX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the current block. \\ -\bitvar{BY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the current block. \\ -\bitvar{MVX} & Integer & 5 & No & The horizontal component of the block - motion vector. -This is always a whole-pixel value. \\ -\bitvar{MVY} & Integer & 5 & No & The vertical component of the block - motion vector. -This is always a whole-pixel value. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & An $8\times 8$ array of predictor - values to use for INTER coded blocks. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\idx{bx}} & Integer & 3 & Yes & The horizontal pixel index in the - block. \\ -\locvar{\idx{by}} & Integer & 3 & Yes & The vertical pixel index in the - block. \\ -\locvar{\idx{rx}} & Integer & 20 & No & The horizontal pixel index in the - reference frame. \\ -\locvar{\idx{ry}} & Integer & 20 & No & The vertical pixel index in the - reference frame. \\ -\bottomrule\end{tabularx} -\medskip - -The whole pixel predictor simply copies verbatim the contents of the reference - frame pointed to by the block's motion vector. -If the vector points outside the reference frame, then the closest value on the - edge of the reference frame is used instead. -In practice, this is usually implemented by expanding the size of the reference - frame by $8$ or $16$ pixels on each side---depending on whether or not the - corresponding axis is subsampled in the current plane---and copying the border - pixels into this region. - -\begin{enumerate} -\item -For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive: -\begin{enumerate} -\item -Assign \locvar{\idx{ry}} the value - $(\bitvar{BY}+\bitvar{MVY}+\locvar{\idx{by}})$. -\item -If \locvar{\idx{ry}} is greater than $(\bitvar{RPH}-1)$, assign - \locvar{\idx{ry}} the value $(\bitvar{RPH}-1)$. -\item -If \locvar{\idx{ry}} is less than zero, assign \locvar{\idx{ry}} the value - zero. -\item -For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive: -\begin{enumerate} -\item -Assign \locvar{\idx{rx}} the value - $(\bitvar{BX}+\bitvar{MVX}+\locvar{\idx{bx}})$. -\item -If \locvar{\idx{rx}} is greater than $(\bitvar{RPW}-1)$, assign - \locvar{\idx{rx}} the value $(\bitvar{RPW}-1)$. -\item -If \locvar{\idx{rx}} is less than zero, assign \locvar{\idx{rx}} the value - zero. -\item -Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value - $\bitvar{REFP}[\locvar{\idx{ry}}][\locvar{\idx{rx}}]$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\subsubsection{The Half-Pixel Predictor} -\label{sub:predhalfpel} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RPW} & Integer & 20 & No & The width of the current plane of the - reference frame in pixels. \\ -\bitvar{RPH} & Integer & 20 & No & The height of the current plane of the - reference frame in pixels. \\ -\bitvar{REFP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of the current plane of the reference frame. \\ -\bitvar{BX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the current block. \\ -\bitvar{BY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the current block. \\ -\bitvar{MVX} & Integer & 5 & No & The horizontal component of the first - whole-pixel motion vector. \\ -\bitvar{MVY} & Integer & 5 & No & The vertical component of the first - whole-pixel motion vector. \\ -\bitvar{MVX2} & Integer & 5 & No & The horizontal component of the second - whole-pixel motion vector. \\ -\bitvar{MVY2} & Integer & 5 & No & The vertical component of the second - whole-pixel motion vector. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & An $8\times 8$ array of predictor - values to use for INTER coded blocks. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\idx{bx}} & Integer & 3 & Yes & The horizontal pixel index in the - block. \\ -\locvar{\idx{by}} & Integer & 3 & Yes & The vertical pixel index in the - block. \\ -\locvar{\idx{rx1}} & Integer & 20 & No & The first horizontal pixel index in - the reference frame. \\ -\locvar{\idx{ry1}} & Integer & 20 & No & The first vertical pixel index in the - reference frame. \\ -\locvar{\idx{rx2}} & Integer & 20 & No & The second horizontal pixel index in - the reference frame. \\ -\locvar{\idx{ry2}} & Integer & 20 & No & The second vertical pixel index in - the reference frame. \\ -\bottomrule\end{tabularx} -\medskip - -If one or both of the components of the block motion vector is not a - whole-pixel value, then the half-pixel predictor is used. -The half-pixel predictor converts the fractional motion vector into two - whole-pixel motion vectors. -The first is formed by truncating the values of each component towards zero, - and the second is formed by truncating them away from zero. -The contributions from the reference frame at the locations pointed to by each - vector are averaged, truncating towards negative infinity. - -Only two samples from the reference frame contribute to each predictor value, - even if both components of the motion vector have non-zero fractional - components. -Motion vector components with quarter-pixel accuracy in the chroma planes are - treated exactly the same as those with half-pixel accuracy. -Any non-zero fractional part gets rounded one way in the first vector, and the - other way in the second. - -\begin{enumerate} -\item -For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive: -\begin{enumerate} -\item -Assign \locvar{\idx{ry1}} the value - $(\bitvar{BY}+\bitvar{MVY1}+\locvar{\idx{by}})$. -\item -If \locvar{\idx{ry1}} is greater than $(\bitvar{RPH}-1)$, assign - \locvar{\idx{ry1}} the value $(\bitvar{RPH}-1)$. -\item -If \locvar{\idx{ry1}} is less than zero, assign \locvar{\idx{ry1}} the value - zero. -\item -Assign \locvar{\idx{ry2}} the value - $(\bitvar{BY}+\bitvar{MVY2}+\locvar{\idx{by}})$. -\item -If \locvar{\idx{ry2}} is greater than $(\bitvar{RPH}-1)$, assign - \locvar{\idx{ry2}} the value $(\bitvar{RPH}-1)$. -\item -If \locvar{\idx{ry2}} is less than zero, assign \locvar{\idx{ry2}} the value - zero. -\item -For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive: -\begin{enumerate} -\item -Assign \locvar{\idx{rx1}} the value - $(\bitvar{BX}+\bitvar{MVX1}+\locvar{\idx{bx}})$. -\item -If \locvar{\idx{rx1}} is greater than $(\bitvar{RPW}-1)$, assign - \locvar{\idx{rx1}} the value $(\bitvar{RPW}-1)$. -\item -If \locvar{\idx{rx1}} is less than zero, assign \locvar{\idx{rx1}} the value - zero. -\item -Assign \locvar{\idx{rx2}} the value - $(\bitvar{BX}+\bitvar{MVX2}+\locvar{\idx{bx}})$. -\item -If \locvar{\idx{rx2}} is greater than $(\bitvar{RPW}-1)$, assign - \locvar{\idx{rx2}} the value $(\bitvar{RPW}-1)$. -\item -If \locvar{\idx{rx2}} is less than zero, assign \locvar{\idx{rx2}} the value - zero. -\item -Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value -\begin{equation*} - (\bitvar{REFP}[\locvar{\idx{ry1}}][\locvar{\idx{rx1}}]+ - \bitvar{REFP}[\locvar{\idx{ry2}}][\locvar{\idx{rx2}}])>>1. -\end{equation*} -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\subsection{Dequantization} -\label{sub:dequant} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - AC coefficients for each \qi\ value. \\ -\bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values for - the DC coefficient for each \qi\ value. \\ -\bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 8 & No & A $\bitvar{NBMS}\times 64$ array - containing the base matrices. \\ -\bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 6 & No & A $2\times 3$ array containing the - number of quant ranges for a given \qti\ and \pli, respectively. -This is at most $63$. \\ -\bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 6 & No & A $2\times 3\times 63$ array of the - sizes of each quant range for a given \qti\ and \pli, respectively. -Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\ -\bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 9 & No & A $2\times 3\times 64$ array of the - \bmi's used for each quant range for a given \qti\ and \pli, respectively. -Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\ -\bitvar{\qti} & Integer & 1 & No & A quantization type index. -See Table~\ref{tab:quant-types}.\\ -\bitvar{\pli} & Integer & 2 & No & A color plane index. -See Table~\ref{tab:color-planes}.\\ -\bitvar{\idx{qi0}} & Integer & 6 & No & The quantization index of the DC - coefficient. \\ -\bitvar{\qi} & Integer & 6 & No & The quantization index of the AC - coefficients. \\ -\bitvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{DQC} & \multicolumn{1}{p{40pt}}{Integer Array} & - 14 & Yes & A $64$-element array of dequantized - DCT coefficients in natural order (cf. Section~\ref{sec:dct-coeffs}). \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of quantization - values for each DCT coefficient in natural order. \\ -\locvar{\ci} & Integer & 6 & No & The DCT coefficient index in natural - order. \\ -\locvar{\zzi} & Integer & 6 & No & The DCT coefficient index in zig-zag - order. \\ -\locvar{C} & Integer & 29 & Yes & A single dequantized coefficient. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure takes the quantized DCT coefficient values in zig-zag order for - a single block---after DC prediction has been undone---and returns the - dequantized values in natural order. -If large coefficient values are decoded for coarsely quantized coefficients, - the resulting dequantized value can be significantly larger than 16 bits. -Such a coefficient is truncated to a signed 16-bit representation by discarding - the higher-order bits of its twos-complement representation. - -Although this procedure recomputes the quantization matrices from the - parameters in the setup header for each block, there are at most six different - ones used for each color plane. -An efficient implementation could compute them once in advance. - -\begin{enumerate} -\item -Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, - \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{\qti}, \bitvar{\pli}, and - \bitvar{\idx{qi0}}, use the procedure given in Section~\ref{sub:quant-mat} to - compute the DC quantization matrix \locvar{QMAT}. -\item -Assign \locvar{C} the value - $\bitvar{COEFFS}[\bitvar{\bi}][0]*\locvar{QMAT}[0]$. -\item -Truncate \locvar{C} to a 16-bit representation by dropping any higher-order - bits. -\item -Assign $\bitvar{DQC}[0]$ the value \locvar{C}. -\item -Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, - \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{\qti}, \bitvar{\pli}, and - \bitvar{\qi}, use the procedure given in Section~\ref{sub:quant-mat} to - compute the AC quantization matrix \locvar{QMAT}. -\item -For each value of \locvar{\ci} from 1 to 63, inclusive: -\begin{enumerate} -\item -Assign \locvar{\zzi} the index in zig-zag order corresponding to \locvar{\ci}. -E.g., the value at row $(\locvar{\ci}//8)$ and column $(\locvar{\ci}\%8)$ in - Figure~\ref{tab:zig-zag} -\item -Assign \locvar{C} the value - $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\zzi}]*\locvar{QMAT}[\locvar{\ci}]$. -\item -Truncate \locvar{C} to a 16-bit representation by dropping any higher-order - bits. -\item -Assign $\bitvar{DQC}[\locvar{\ci}]$ the value \locvar{C}. -\end{enumerate} -\end{enumerate} - -\subsection{The Inverse DCT} - -The 2D inverse DCT is separated into two applications of the 1D inverse DCT. -The transform is first applied to each row, and then applied to each column of - the result. - -Each application of the 1D inverse DCT scales the values by a factor of two - relative to the orthonormal version of the transform, for a total scale factor - of four for the 2D transform. -It is assumed that a similar scale factor is applied during the forward DCT - used in the encoder, so that a division by 16 is required after the transform - has been applied in both directions. -The inclusion of this scale factor allows the integerized transform to operate - with increased precision. -All divisions throughout the transform are implemented with right shifts. -Only the final division by $16$ is rounded, with ties rounded towards positive - infinity. - -All intermediate values are truncated to a 32-bit signed representation by - discarding any higher-order bits in their two's complement representation. -The final output of each 1D transform is truncated to a 16-bit signed value in - the same manner. -In practice, if the high word of a $16\times 16$ bit multiplication can be - obtained directly, 16 bits is sufficient for every calculation except scaling - by $C4$. -Thus we truncate to 16 bits before that multiplication to allow an - implementation entirely in 16-bit registers. -Implementations using larger registers must sign-extend the 16-bit value to - maintain compatibility. - -Note that if 16-bit register are used, overflow in the additions and - subtractions should be handled using \textit{unsaturated} arithmetic. -That is, the high-order bits should be discarded and the low-order bits - retained, instead of clamping the result to the maximum or minimum value. -This allows the maximum flexibility in re-ordering these instructions without - deviating from this specification. - -The 1D transform can only overflow if input coefficients larger than $\pm 6201$ - are present. -However, the result of applying the 2D forward transform on pixel values in the - range $-255\ldots 255$ can be as large as $\pm 8157$ due to the scale factor - of four that is applied, and quantization errors could make this even larger. -Therefore, the coefficients cannot simply be clamped into a valid range before - the transform. - -\subsubsection{The 1D Inverse DCT} -\label{sub:1d-idct} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{Y} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & An 8-element array of DCT - coefficients. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{X} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & An 8-element array of output values. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{T} & \multicolumn{1}{p{40pt}}{Integer Array} & - 32 & Yes & An 8-element array containing the - current value of each signal line. \\ -\locvar{R} & Integer & 32 & Yes & A temporary value. \\ -\bottomrule\end{tabularx} -\medskip - -A compliant decoder MUST use the exact implementation of the inverse DCT - defined in this specification. -Some operations may be re-ordered, but the result must be precisely equivalent. -This is a design decision that limits some avenues of decoder optimization, but - prevents any drift in the prediction loop. -Theora uses a 16-bit integerized approximation of of the 8-point 1D inverse DCT - based on the Chen factorization \cite{CSF77}. -It requires 16 multiplications and 26 additions and subtractions. - -\begin{figure}[htbp] -\begin{center} -\includegraphics[width=\textwidth]{idct} -\end{center} -\caption{Signal Flow Graph for the 1D Inverse DCT} -\label{fig:idct} -\end{figure} - -A signal flow graph of the transformation is presented in - Figure~\ref{fig:idct}. -This graph provides a good visualization of which parts of the transform are - parallelizable. -Time increases from left to right. - -Each signal line is involved in an operation where the line is marked with a - dot $\cdot$ or a circled plus sign $\oplus$. -The constants $\locvar{C}i$ and $\locvar{S}j$ are the 16-bit integer - approximations of $\cos(\frac{i\pi}{16})$ and $\sin(\frac{j\pi}{16})$ listed - in Table~\ref{tab:dct-consts}. -When they appear next to a signal line, the value on that line is scaled by the - given constant. -A circled minus sign $\ominus$ next to a signal line indicates that the value - on that line is negated. - -Operations on a single signal path through the graph cannot be reordered, but - operations on different paths may be, or may be executed in parallel. -Different graphs may be obtainable using the associative, commutative, and - distributive properties of unsaturated arithmetic. -The column of numbers on the left represents an initial permutation of the - input DCT coefficients. -The column on the right represents the unpermuted output. -One can be obtained by bit-reversing the 3-bit binary representation of the - other. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{llr}\toprule -$\locvar{C}i$ & $\locvar{S}j$ & Value \\\midrule -$\locvar{C1}$ & $\locvar{S7}$ & $64277$ \\ -$\locvar{C2}$ & $\locvar{S6}$ & $60547$ \\ -$\locvar{C3}$ & $\locvar{S5}$ & $54491$ \\ -$\locvar{C4}$ & $\locvar{S4}$ & $46341$ \\ -$\locvar{C5}$ & $\locvar{S3}$ & $36410$ \\ -$\locvar{C6}$ & $\locvar{S2}$ & $25080$ \\ -$\locvar{C7}$ & $\locvar{S1}$ & $12785$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{16-bit Approximations of Sines and Cosines} -\label{tab:dct-consts} -\end{table} - -\begin{enumerate} -\item -Assign $\locvar{T}[0]$ the value $\bitvar{Y}[0]+\bitvar{Y}[4]$. -\item -Truncate $\locvar{T}[0]$ to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\locvar{T}[0]$ the value - $\locvar{C4}*\locvar{T}[0]>>16$. -\item -Assign $\locvar{T}[1]$ the value $\bitvar{Y}[0]-\bitvar{Y}[4]$. -\item -Truncate $\locvar{T}[1]$ to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\locvar{T}[1]$ the value $\locvar{C4}*\locvar{T}[1]>>16$. -\item -Assign $\locvar{T}[2]$ the value $(\locvar{C6}*\bitvar{Y}[2]>>16)- - (\locvar{S6}*\bitvar{Y}[6]>>16)$. -\item -Assign $\locvar{T}[3]$ the value $(\locvar{S6}*\bitvar{Y}[2]>>16)+ - (\locvar{C6}*\bitvar{Y}[6]>>16)$. -\item -Assign $\locvar{T}[4]$ the value $(\locvar{C7}*\bitvar{Y}[1]>>16)- - (\locvar{S7}*\bitvar{Y}[7]>>16)$. -\item -Assign $\locvar{T}[5]$ the value $(\locvar{C3}*\bitvar{Y}[5]>>16)- - (\locvar{S3}*\bitvar{Y}[3]>>16)$. -\item -Assign $\locvar{T}[6]$ the value $(\locvar{S3}*\bitvar{Y}[5]>>16)+ - (\locvar{C3}*\bitvar{Y}[3]>>16)$. -\item -Assign $\locvar{T}[7]$ the value $(\locvar{S7}*\bitvar{Y}[1]>>16)+ - (\locvar{C7}*\bitvar{Y}[7]>>16)$. -\item -Assign \locvar{R} the value $\locvar{T}[4]+\locvar{T}[5]$. -\item -Assign $\locvar{T}[5]$ the value $\locvar{T}[4]-\locvar{T}[5]$. -\item -Truncate $\locvar{T}[5]$ to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\locvar{T}[5]$ the value $\locvar{C4}*\locvar{T}[5]>>16$. -\item -Assign $\locvar{T}[4]$ the value $\locvar{R}$. -\item -Assign \locvar{R} the value $\locvar{T}[7]+\locvar{T}[6]$. -\item -Assign $\locvar{T}[6]$ the value $\locvar{T}[7]-\locvar{T}[6]$. -\item -Truncate $\locvar{T}[6]$ to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\locvar{T}[6]$ the value $\locvar{C4}*\locvar{T}[6]>>16$. -\item -Assign $\locvar{T}[7]$ the value $\locvar{R}$. -\item -Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[3]$. -\item -Assign $\locvar{T}[3]$ the value $\locvar{T}[0]-\locvar{T}[3]$. -\item -Assign $\locvar{T}[0]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[2]$ -\item -Assign $\locvar{T}[2]$ the value $\locvar{T}[1]-\locvar{T}[2]$ -\item -Assign $\locvar{T}[1]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[6]+\locvar{T}[5]$. -\item -Assign $\locvar{T}[5]$ the value $\locvar{T}[6]-\locvar{T}[5]$. -\item -Assign $\locvar{T}[6]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[7]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[0]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[6]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[1]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[2]+\locvar{T}[5]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[2]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[3]+\locvar{T}[4]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[3]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[3]-\locvar{T}[4]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[4]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[2]-\locvar{T}[5]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[5]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[1]-\locvar{T}[6]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[6]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[0]-\locvar{T}[7]$. -\item -Truncate \locvar{R} to a 16-bit signed representation by dropping any - higher-order bits. -\item -Assign $\bitvar{X}[7]$ the value \locvar{R}. -\end{enumerate} - -\subsubsection{The 2D Inverse DCT} -\label{sub:2d-idct} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{DQC} & \multicolumn{1}{p{40pt}}{Integer Array} & - 14 & Yes & A $64$-element array of dequantized - DCT coefficients in natural order (cf. Section~\ref{sec:dct-coeffs}). \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RES} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $8\times 8$ array containing the - decoded residual for the current block. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{\ci} & Integer & 3 & No & The column index. \\ -\locvar{\ri} & Integer & 3 & No & The row index. \\ -\locvar{Y} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & An 8-element array of 1D iDCT input - values. \\ -\locvar{X} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & An 8-element array of 1D iDCT output - values. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure applies the 1D inverse DCT transform 16 times to a block of - dequantized coefficients: once for each of the 8 rows, and once for each of - the 8 columns of the result. -Note that the coordinate system used for the columns is the same right-handed - coordinate system used by the rest of Theora. -Thus, the column is indexed from bottom to top, not top to bottom. -The final values are divided by sixteen, rounding with ties rounded towards - postive infinity. - -\begin{enumerate} -\item -For each value of \locvar{\ri} from 0 to 7: -\begin{enumerate} -\item -For each value of \locvar{\ci} from 0 to 7: -\begin{enumerate} -\item -Assign $\locvar{Y}[\locvar{\ci}]$ the value - $\bitvar{DQC}[\locvar{\ri}*8+\locvar{\ci}]$. -\end{enumerate} -\item -Compute \locvar{X}, the 1D inverse DCT of \locvar{Y} using the procedure - described in Section~\ref{sub:1d-idct}. -\item -For each value of $\locvar{\ci}$ from 0 to 7: -\begin{enumerate} -\item -Assign $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$ the value - $\locvar{X}[\locvar{\ci}]$. -\end{enumerate} -\end{enumerate} -\item -For each value of \locvar{\ci} from 0 to 7: -\begin{enumerate} -\item -For each value of \locvar{\ri} from 0 to 7: -\begin{enumerate} -\item -Assign $\locvar{Y}[\locvar{\ri}]$ the value - $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$. -\end{enumerate} -\item -Compute \locvar{X}, the 1D inverse DCT of \locvar{Y} using the procedure - described in Section~\ref{sub:1d-idct}. -\item -For each value of \locvar{\ri} from 0 to 7: -\begin{enumerate} -\item -Assign $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$ the value - $(\locvar{X}[\locvar{\ri}]+8)>>4$. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\subsubsection{The 1D Forward DCT (Non-Normative)} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{X} & \multicolumn{1}{p{40pt}}{Integer Array} & - 14 & Yes & An 8-element array of input values. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{Y} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & An 8-element array of DCT - coefficients. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{T} & \multicolumn{1}{p{40pt}}{Integer Array} & - 16 & Yes & An 8-element array containing the - current value of each signal line. \\ -\locvar{R} & Integer & 16 & Yes & A temporary value. \\ -\bottomrule\end{tabularx} -\medskip - -The forward transform used in the encoder is not mandated by this standard as - the inverse one is. -Precise equivalence in the inverse transform alone is all that is required to - guarantee that there is no mismatch in the prediction loop between encoder and - any compliant decoder implementation. -However, a forward transform is provided here as a convenience for implementing - an encoder. -This is the version of the transform used by Xiph.org's Theora encoder, which - is the same as that used by VP3. -Like the inverse DCT, it is first applied to each row, and then applied to each - column of the result. - -\begin{figure}[htbp] -\begin{center} -\includegraphics[width=\textwidth]{fdct} -\end{center} -\caption{Signal Flow Graph for the 1D Forward DCT} -\label{fig:fdct} -\end{figure} - -The signal flow graph for the forward transform is given in - Figure~\ref{fig:fdct}. -It is largely the reverse of the flow graph given for the inverse DCT. -It is important to note that the signs on the constants in the rotations have - changed, and the \locvar{C4} scale factors on one of the lower butterflies now - appear on the opposite side. -The column of numbers on the left represents the unpermuted input, and the - column on the right the permuted output DCT coefficients. - -A proper division by $2^{16}$ is done after the multiplications instead of a - shift in the forward transform. -This can be implemented quickly by adding an offset of $\hex{FFFF}$ if the - number is negative, and then shifting as before. -This slightly increases the computational complexity of the transform. -Unlike the inverse DCT, 16-bit registers and a $16\times16\rightarrow32$ bit - multiply are sufficient to avoid any overflow, so long as the input is in the - range $-6270\ldots 6270$, which is larger than required. - -\begin{enumerate} -\item -Assign $\locvar{T}[0]$ the value $\bitvar{X}[0]+\bitvar{X}[7]$. -\item -Assign $\locvar{T}[1]$ the value $\bitvar{X}[1]+\bitvar{X}[6]$. -\item -Assign $\locvar{T}[2]$ the value $\bitvar{X}[2]+\bitvar{X}[5]$. -\item -Assign $\locvar{T}[3]$ the value $\bitvar{X}[3]+\bitvar{X}[4]$. -\item -Assign $\locvar{T}[4]$ the value $\bitvar{X}[3]-\bitvar{X}[4]$. -\item -Assign $\locvar{T}[5]$ the value $\bitvar{X}[2]-\bitvar{X}[5]$. -\item -Assign $\locvar{T}[6]$ the value $\bitvar{X}[1]-\bitvar{X}[6]$. -\item -Assign $\locvar{T}[7]$ the value $\bitvar{X}[0]-\bitvar{X}[7]$. -\item -Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[3]$. -\item -Assign $\locvar{T}[3]$ the value $\locvar{T}[0]-\locvar{T}[3]$. -\item -Assign $\locvar{T}[0]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[2]$. -\item -Assign $\locvar{T}[2]$ the value $\locvar{T}[1]-\locvar{T}[2]$. -\item -Assign $\locvar{T}[1]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[6]-\locvar{T}[5]$. -\item -Assign $\locvar{T}[6]$ the value - $(\locvar{C4}*(\locvar{T}[6]+\locvar{T}[5]))//16$. -\item -Assign $\locvar{T}[5]$ the value $(\locvar{C4}*\locvar{R})//16$. -\item -Assign \locvar{R} the value $\locvar{T}[4]+\locvar{T}[5]$. -\item -Assign $\locvar{T}[5]$ the value $\locvar{T}[4]-\locvar{T}[5]$. -\item -Assign $\locvar{T}[4]$ the value \locvar{R}. -\item -Assign \locvar{R} the value $\locvar{T}[7]+\locvar{T}[6]$. -\item -Assign $\locvar{T}[6]$ the value $\locvar{T}[7]-\locvar{T}[6]$. -\item -Assign $\locvar{T}[7]$ the value \locvar{R}. -\item -Assign $\bitvar{Y}[0]$ the value - $(\locvar{C4}*(\locvar{T}[0]+\locvar{T}[1]))//16$. -\item -Assign $\bitvar{Y}[4]$ the value - $(\locvar{C4}*(\locvar{T}[0]-\locvar{T}[1]))//16$. -\item -Assign $\bitvar{Y}[2]$ the value - $((\locvar{S6}*\locvar{T}[3])//16)+ - ((\locvar{C6}*\locvar{T}[2])//16)$. -\item -Assign $\bitvar{Y}[6]$ the value - $((\locvar{C6}*\locvar{T}[3])//16)- - ((\locvar{S6}*\locvar{T}[2])//16)$. -\item -Assign $\bitvar{Y}[1]$ the value - $((\locvar{S7}*\locvar{T}[7])//16)+ - ((\locvar{C7}*\locvar{T}[4])//16)$. -\item -Assign $\bitvar{Y}[5]$ the value - $((\locvar{S3}*\locvar{T}[6])//16)+ - ((\locvar{C3}*\locvar{T}[5])//16)$. -\item -Assign $\bitvar{Y}[3]$ the value - $((\locvar{C3}*\locvar{T}[6])//16)- - ((\locvar{S3}*\locvar{T}[5])//16)$. -\item -Assign $\bitvar{Y}[7]$ the value - $((\locvar{C7}*\locvar{T}[7])//16)- - ((\locvar{S7}*\locvar{T}[4])//16)$. -\end{enumerate} - -\subsection{The Complete Reconstruction Algorithm} -\label{sub:recon} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values - for AC coefficients for each \qi\ value. \\ -\bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values - for the DC coefficient for each \qi\ value. \\ -\bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 8 & No & A $\bitvar{NBMS}\times 64$ array - containing the base matrices. \\ -\bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 6 & No & A $2\times 3$ array containing the - number of quant ranges for a given \qti\ and \pli, respectively. -This is at most $63$. \\ -\bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 6 & No & A $2\times 3\times 63$ array of the - sizes of each quant range for a given \qti\ and \pli, respectively. -Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\ -\bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 9 & No & A $2\times 3\times 64$ array of the - \bmi's used for each quant range for a given \qti\ and \pli, respectively. -Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\ -\bitvar{RPYW} & Integer & 20 & No & The width of the $Y'$ plane of the - reference frames in pixels. \\ -\bitvar{RPYH} & Integer & 20 & No & The height of the $Y'$ plane of the - reference frames in pixels. \\ -\bitvar{RPCW} & Integer & 20 & No & The width of the $C_b$ and $C_r$ - planes of the reference frames in pixels. \\ -\bitvar{RPCH} & Integer & 20 & No & The height of the $C_b$ and $C_r$ - planes of the reference frames in pixels. \\ -\bitvar{GOLDREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the golden reference - frame. \\ -\bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the golden reference - frame. \\ -\bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the golden reference - frame. \\ -\bitvar{PREVREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the previous reference - frame. \\ -\bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the previous reference - frame. \\ -\bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the previous reference - frame. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of - flags indicating which blocks are coded. \\ -\bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} & - 3 & No & An \bitvar{NMBS}-element array of - coding modes for each macro block. \\ -\bitvar{MVECTS} & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} & - 6 & Yes & An \bitvar{NBS}-element array of - motion vectors for each block. \\ -\bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - coefficient count for each block. \\ -\bitvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} & - 6 & No & An \bitvar{NQIS}-element array of - \qi\ values. \\ -\bitvar{QIIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 2 & No & An \bitvar{NBS}-element array of - \locvar{\qii} values for each block. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the reconstructed frame. \\ -\bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the reconstructed frame. \\ -\bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the reconstructed frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{RPW} & Integer & 20 & No & The width of the current plane of the - current reference frame in pixels. \\ -\locvar{RPH} & Integer & 20 & No & The height of the current plane of - the current reference frame in pixels. \\ -\locvar{REFP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of the current plane of the current reference - frame. \\ -\locvar{BX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the current block. \\ -\locvar{BY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the current block. \\ -\locvar{MVX} & Integer & 5 & No & The horizontal component of the first - whole-pixel motion vector. \\ -\locvar{MVY} & Integer & 5 & No & The vertical component of the first - whole-pixel motion vector. \\ -\locvar{MVX2} & Integer & 5 & No & The horizontal component of the second - whole-pixel motion vector. \\ -\locvar{MVY2} & Integer & 5 & No & The vertical component of the second - whole-pixel motion vector. \\ -\locvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & An $8\times 8$ array of predictor - values to use for the current block. \\ -\locvar{RES} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $8\times 8$ array containing the - decoded residual for the current block. \\ -\locvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of quantization - values for each DCT coefficient in natural order. \\ -\locvar{DC} & Integer & 29 & Yes & The dequantized DC coefficient of a - block. \\ -\locvar{P} & Integer & 17 & Yes & A reconstructed pixel value. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\locvar{\mbi} & Integer & 32 & No & The index of the macro block - containing block \locvar{\bi}. \\ -\locvar{\pli} & Integer & 2 & No & The color plane index of the current - block. \\ -\locvar{\rfi} & Integer & 2 & No & The index of the reference frame - indicated by the coding mode for macro block \locvar{\mbi}. \\ -\locvar{\idx{bx}} & Integer & 3 & No & The horizontal pixel index in the - block. \\ -\locvar{\idx{by}} & Integer & 3 & No & The vertical pixel index in the - block. \\ -\locvar{\qti} & Integer & 1 & No & A quantization type index. -See Table~\ref{tab:quant-types}.\\ -\locvar{\idx{qi0}} & Integer & 6 & No & The quantization index of the DC - coefficient. \\ -\locvar{\qi} & Integer & 6 & No & The quantization index of the AC - coefficients. \\ -\bottomrule\end{tabularx} -\medskip - -This section takes the decoded packet data and uses the previously defined - procedures to reconstruct each block of the current frame. -For coded blocks, a predictor is formed using the coding mode and, if - applicable, the motion vector, and then the residual is computed from the - quantized DCT coefficients. -For uncoded blocks, the contents of the co-located block are copied from the - previous frame and the residual is cleared to zero. -Then the predictor and residual are added, and the result clamped to the range - $0\ldots 255$ and stored in the current frame. - -In the special case that a block contains only a DC coefficient, the - dequantization and inverse DCT transform is skipped. -Instead the constant pixel value for the entire block is computed in one step. -Note that the truncation of intermediate operations is omitted and the final - rounding is slightly different in this case. -The check for whether or not the block contains only a DC coefficient is based - on the coefficient count returned from the token decode procedure of - Section~\ref{sec:dct-decode}, and not by checking to see if the remaining - coefficient values are zero. -Also note that even when the coefficient count indicates the block contains - zero coefficients, the DC coefficient is still processed, as undoing DC - prediction might have made it non-zero. - -After this procedure, the frame is completely reconstructed, but before it can - be used as a reference frame, a loop filter must be run over it to help reduce - blocking artifacts. -This is detailed in Section~\ref{sec:loopfilter}. - -\begin{enumerate} -\item -Assign \locvar{\idx{qi0}} the value $\bitvar{QIS}[0]$. -\item -For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$: -\begin{enumerate} -\item -Assign \locvar{\pli} the index of the color plane block \locvar{\bi} belongs - to. -\item -Assign \locvar{BX} the horizontal pixel index of the lower-left corner of block - \locvar{\bi}. -\item -Assign \locvar{BY} the vertical pixel index of the lower-left corner of block - \locvar{\bi}. -\item -If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero: -\begin{enumerate} -\item -Assign \locvar{\mbi} the index of the macro block containing block - \locvar{\bi}. -\item -If $\bitvar{MBMODES}[\locvar{\mbi}]$ is 1 (INTRA), assign \locvar{\qti} the - value $0$. -\item -Otherwise, assign \locvar{\qti} the value $1$. -\item -Assign \locvar{\rfi} the value of the Reference Frame Index column of - Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$. -\item -If \locvar{\rfi} is zero, compute \locvar{PRED} using the procedure given in - Section~\ref{sub:predintra}. -\item -Otherwise: -\begin{enumerate} -\item -Assign \locvar{REFP}, \locvar{RPW}, and \locvar{RPH} the values given in - Table~\ref{tab:refp} corresponding to current value of \locvar{\rfi} and - \locvar{\pli}. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{cclll}\toprule -\locvar{\rfi} & \locvar{\pli} & -\locvar{REFP} & \locvar{RPW} & \locvar{RPH} \\\midrule -$1$ & $0$ & \bitvar{PREVREFY} & \bitvar{RPYW} & \bitvar{RPYH} \\ -$1$ & $1$ & \bitvar{PREVREFCB} & \bitvar{RPCW} & \bitvar{RPCH} \\ -$1$ & $2$ & \bitvar{PREVREFCR} & \bitvar{RPCW} & \bitvar{RPCH} \\ -$2$ & $0$ & \bitvar{GOLDREFY} & \bitvar{RPYW} & \bitvar{RPYH} \\ -$2$ & $1$ & \bitvar{GOLDREFCB} & \bitvar{RPCW} & \bitvar{RPCH} \\ -$2$ & $2$ & \bitvar{GOLDREFCR} & \bitvar{RPCW} & \bitvar{RPCH} \\ -\bottomrule\end{tabular} -\end{center} -\caption{Reference Planes and Sizes for Each \locvar{\rfi} and \locvar{\pli}} -\label{tab:refp} -\end{table} - -\item -Assign \locvar{MVX} the value -\begin{equation*} - \left\lfloor\lvert\bitvar{MVECTS}[\locvar{\bi}]_x\rvert\right\rfloor* - \sign(\bitvar{MVECTS}[\locvar{\bi}]_x). -\end{equation*} -\item -Assign \locvar{MVY} the value -\begin{equation*} - \left\lfloor\lvert\bitvar{MVECTS}[\locvar{\bi}]_y\rvert\right\rfloor* - \sign(\bitvar{MVECTS}[\locvar{\bi}]_y). -\end{equation*} -\item -Assign \locvar{MVX2} the value -\begin{equation*} - \left\lceil\lvert\bitvar{MVECTS}[\locvar{\bi}]_x\rvert\right\rceil* - \sign(\bitvar{MVECTS}[\locvar{\bi}]_x). -\end{equation*} -\item -Assign \locvar{MVY2} the value -\begin{equation*} - \left\lceil\lvert\bitvar{MVECTS}[\locvar{\bi}]_y\rvert\right\rceil* - \sign(\bitvar{MVECTS}[\locvar{\bi}]_y). -\end{equation*} -\item -If \locvar{MVX} equals \locvar{MVX2} and \locvar{MVY} equals \locvar{MVY2}, - use the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, \locvar{BX}, - \locvar{BY}, \locvar{MVX}, and \locvar{MVY}, compute \locvar{PRED} using the - procedure given in Section~\ref{sub:predfullpel}. -\item -Otherwise, use the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, - \locvar{BX}, \locvar{BY}, \locvar{MVX}, \locvar{MVY}, \locvar{MVX2}, and - \locvar{MVY2} to compute \locvar{PRED} using the procedure given in - Section~\ref{sub:predhalfpel}. -\end{enumerate} -\item -If $\bitvar{NCOEFFS}[\locvar{\bi}]$ is less than 2: -\begin{enumerate} -\item -Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, \\ - \bitvar{QRSIZES}, \bitvar{QRBMIS}, \locvar{\qti}, \locvar{\pli}, and - \locvar{\idx{qi0}}, use the procedure given in Section~\ref{sub:quant-mat} to - compute the DC quantization matrix \locvar{QMAT}. -\item -Assign \locvar{DC} the value -\begin{equation*} - (\bitvar{COEFFS}[\bitvar{\bi}][0]*\locvar{QMAT}[0]+15)>>5. -\end{equation*} -\item -Truncate \locvar{DC} to a 16-bit signed representation by dropping any - higher-order bits. -\item -For each value of \locvar{\idx{by}} from 0 to 7, and each value of - \locvar{\idx{bx}} from 0 to 7, assign - $\locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value \locvar{DC}. -\end{enumerate} -\item -Otherwise: -\begin{enumerate} -\item -Assign \locvar{\qi} the value $\bitvar{QIS}[\bitvar{QIIS}[\locvar{\bi}]]$. -\item -Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, \\ - \bitvar{QRSIZES}, \bitvar{QRBMIS}, \locvar{\qti}, \locvar{\pli}, - \locvar{\idx{qi0}}, and \locvar{\qi}, compute \locvar{DQC} using the procedure - given in Section~\ref{sub:dequant}. -\item -Using \locvar{DQC}, compute \locvar{RES} using the procedure given in - Section~\ref{sub:2d-idct}. -\end{enumerate} -\end{enumerate} -\item -Otherwise: -\begin{enumerate} -\item -Assign \locvar{\rfi} the value 1. -\item -Assign \locvar{REFP}, \locvar{RPW}, and \locvar{RPH} the values given in - Table~\ref{tab:refp} corresponding to current value of \locvar{\rfi} and - \locvar{\pli}. -\item -Assign \locvar{MVX} the value 0. -\item -Assign \locvar{MVY} the value 0. -\item -Using the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, \locvar{BX}, - \locvar{BY}, \locvar{MVX}, and \locvar{MVY}, compute \locvar{PRED} using the - procedure given in Section~\ref{sub:predfullpel}. -This is simply a copy of the co-located block in the previous reference frame. -\item -For each value of \locvar{\idx{by}} from 0 to 7, and each value of - \locvar{\idx{bx}} from 0 to 7, assign - $\locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value 0. -\end{enumerate} -\item -For each value of \locvar{\idx{by}} from 0 to 7, and each value of - \locvar{\idx{bx}} from 0 to 7: -\begin{enumerate} -\item -Assign \locvar{P} the value - $(\locvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]+ - \locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}])$. -\item -If \locvar{P} is greater than $255$, assign \locvar{P} the value $255$. -\item -If \locvar{P} is less than $0$, assign \locvar{P} the value $0$. -\item -If \locvar{\pli} equals 0, assign - $\bitvar{RECY}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$ - the value \locvar{P}. -\item -Otherwise, if \locvar{\pli} equals 1, assign - $\bitvar{RECB}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$ - the value \locvar{P}. -\item -Otherwise, \locvar{\pli} equals 2, so assign - $\bitvar{RECR}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$ - the value \locvar{P}. -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\section{Loop Filtering} -\label{sec:loopfilter} - -\begin{figure}[htbp] -\begin{center} -\includegraphics{lflim} -\end{center} -\caption{The loop filter response function.} -\label{fig:lflim} -\end{figure} - -The loop filter is a simple deblocking filter that is based on running a small - edge detecting filter over the coded block edges and adjusting the pixel - values by a tapered response. -The filter response is modulated by the following non-linear function: -\begin{align*} -\lflim(\locvar{R},\bitvar{L})&=\left\{\begin{array}{ll} -0, & \locvar{R}\le-2*\bitvar{L} \\ --\locvar{R}-2*\bitvar{L}, & -2*\bitvar{L}<\locvar{R}\le-\bitvar{L} \\ -\locvar{R}, & -\bitvar{L}<\locvar{R}<\bitvar{L} \\ --\locvar{R}+2*\bitvar{L}, & \bitvar{L}\le\locvar{R}<2*\bitvar{L} \\ -0, & 2*\bitvar{L}\le\locvar{R} -\end{array}\right. -\end{align*} -Here \bitvar{L} is a limiting value equal to $\bitvar{LFLIMS}[\idx{qi0}]$. -It defines the peaks of the function, illustrated in Figure~\ref{fig:lflim}. -\bitvar{LFLIMS} is an array of values specified in the setup header and is - indexed by \idx{qi0}, the first quantization index for the frame, the one used - for all the DC coefficients. -Larger values of \bitvar{L} indicate a stronger filter. - -\subsection{Horizontal Filter} -\label{sub:filth} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of a plane of the reconstructed frame. \\ -\bitvar{FX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the area to be filtered. \\ -\bitvar{FY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the area to be filtered. \\ -\bitvar{L} & Integer & 7 & No & The loop filter limit value. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of a plane of the reconstructed frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{R} & Integer & 9 & Yes & The edge detector response. \\ -\locvar{P} & Integer & 9 & Yes & A filtered pixel value. \\ -\locvar{\idx{by}} & Integer & 20 & No & The vertical pixel index in the - block. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure applies a $4$-tap horizontal filter to each row of a vertical - block edge. - -\begin{enumerate} -\item -For each value of \locvar{\idx{by}} from $0$ to $7$: -\begin{enumerate} -\item -Assign \locvar{R} the value -\begin{multline*} -(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}]- - 3*\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]+\\ - 3*\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]- - \bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+3]+4)>>3 -\end{multline*} -\item -Assign \locvar{P} the value - $(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]+ - \lflim(\locvar{R},\bitvar{L}))$. -\item -If \locvar{P} is less than zero, assign - $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value zero. -\item -Otherwise, if \locvar{P} is greater than $255$, assign - $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value $255$. -\item -Otherwise, assign - $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value - \locvar{P}. -\item -Assign \locvar{P} the value - $(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]- - \lflim(\locvar{R},\bitvar{L}))$. -\item -If \locvar{P} is less than zero, assign - $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value zero. -\item -Otherwise, if \locvar{P} is greater than $255$, assign - $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value $255$. -\item -Otherwise, assign - $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value - \locvar{P}. -\end{enumerate} -\end{enumerate} - -\subsection{Vertical Filter} -\label{sub:filtv} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of a plane of the reconstructed frame. \\ -\bitvar{FX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the area to be filtered. \\ -\bitvar{FY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the area to be filtered. \\ -\bitvar{L} & Integer & 7 & No & The loop filter limit value. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of a plane of the reconstructed frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{R} & Integer & 9 & Yes & The edge detector response. \\ -\locvar{P} & Integer & 9 & Yes & A filtered pixel value. \\ -\locvar{\idx{bx}} & Integer & 20 & No & The horizontal pixel index in the - block. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure applies a $4$-tap vertical filter to each column of a horizontal - block edge. - -\begin{enumerate} -\item -For each value of \locvar{\idx{bx}} from $0$ to $7$: -\begin{enumerate} -\item -Assign \locvar{R} the value -\begin{multline*} -(\bitvar{RECP}[\bitvar{FY}][\bitvar{FX}+\locvar{\idx{bx}}]- - 3*\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]+\\ - 3*\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]- - \bitvar{RECP}[\bitvar{FY}+3][\bitvar{FX}+\locvar{\idx{bx}}]+4)>>3 -\end{multline*} -\item -Assign \locvar{P} the value - $(\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]+ - \lflim(\locvar{R},\bitvar{L}))$. -\item -If \locvar{P} is less than zero, assign - $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value zero. -\item -Otherwise, if \locvar{P} is greater than $255$, assign - $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value $255$. -\item -Otherwise, assign - $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value - \locvar{P}. -\item -Assign \locvar{P} the value - $(\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]- - \lflim(\locvar{R},\bitvar{L}))$. -\item -If \locvar{P} is less than zero, assign - $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value zero. -\item -Otherwise, if \locvar{P} is greater than $255$, assign - $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value $255$. -\item -Otherwise, assign - $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value - \locvar{P}. -\end{enumerate} -\end{enumerate} - -\subsection{Complete Loop Filter} -\label{sub:loop-filt} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} & - 7 & No & A 64-element array of loop filter limit - values. \\ -\bitvar{RPYW} & Integer & 20 & No & The width of the $Y'$ plane of the - reconstruced frame in pixels. \\ -\bitvar{RPYH} & Integer & 20 & No & The height of the $Y'$ plane of the - reconstruced frame in pixels. \\ -\bitvar{RPCW} & Integer & 20 & No & The width of the $C_b$ and $C_r$ - planes of the reconstruced frame in pixels. \\ -\bitvar{RPCH} & Integer & 20 & No & The height of the $C_b$ and $C_r$ - planes of the reconstruced frame in pixels. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of - flags indicating which blocks are coded. \\ -\bitvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} & - 6 & No & An \bitvar{NQIS}-element array of - \qi\ values. \\ -\bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the reconstructed frame. \\ -\bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the reconstructed frame. \\ -\bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the reconstructed frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the reconstructed frame. \\ -\bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the reconstructed frame. \\ -\bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the reconstructed frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{RPW} & Integer & 20 & No & The width of the current plane of the - reconstructed frame in pixels. \\ -\locvar{RPH} & Integer & 20 & No & The height of the current plane of - the reconstructed frame in pixels. \\ -\locvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$ - array containing the contents of the current plane of the reconstruced - frame. \\ -\locvar{BX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the current block. \\ -\locvar{BY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the current block. \\ -\locvar{FX} & Integer & 20 & No & The horizontal pixel index of the - lower-left corner of the area to be filtered. \\ -\locvar{FY} & Integer & 20 & No & The vertical pixel index of the - lower-left corner of the area to be filtered. \\ -\locvar{L} & Integer & 7 & No & The loop filter limit value. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in - coded order. \\ -\locvar{\bj} & Integer & 36 & No & The index of a neighboring block in - coded order. \\ -\locvar{\pli} & Integer & 2 & No & The color plane index of the current - block. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure defines the order that the various block edges are filtered. -Because each application of one of the two filters above destructively modifies - the contents of the reconstructed image, the precise output obtained differs - depending on the order that horizontal and vertical filters are applied to the - edges of a single block. -The order defined here conforms to that used by VP3. - -\begin{enumerate} -\item -Assign \locvar{L} the value $\bitvar{LFLIMS}[\bitvar{QIS}[0]]$. -\item -For each block in {\em raster} order, with coded-order index \locvar{\bi}: -\begin{enumerate} -\item -If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero: -\begin{enumerate} -\item -Assign \locvar{\pli} the index of the color plane block \locvar{\bi} belongs - to. -\item -Assign \locvar{RECP}, \locvar{RPW}, and \locvar{RPH} the values given in - Table~\ref{tab:recp} corresponding to the value of \locvar{\pli}. - -\begin{table}[htbp] -\begin{center} -\begin{tabular}{clll}\toprule -\locvar{\pli} & \locvar{RECP} & \locvar{RPW} & \locvar{RPH} \\\midrule -$0$ & \bitvar{RECY} & \bitvar{RPYW} & \bitvar{RPYH} \\ -$1$ & \bitvar{RECCB} & \bitvar{RPCW} & \bitvar{RPCH} \\ -$2$ & \bitvar{RECCR} & \bitvar{RPCW} & \bitvar{RPCH} \\ -\bottomrule\end{tabular} -\end{center} -\caption{Reconstructed Planes and Sizes for Each \locvar{\pli}} -\label{tab:recp} -\end{table} - -\item -Assign \locvar{BX} the horizontal pixel index of the lower-left corner of the - block \locvar{\bi}. -\item -Assign \locvar{BY} the vertical pixel index of the lower-left corner of the - block \locvar{\bi}. -\item -If \locvar{BX} is greater than zero: -\begin{enumerate} -\item -Assign \locvar{FX} the value $(\locvar{BX}-2)$. -\item -Assign \locvar{FY} the value \locvar{BY}. -\item -Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the - horizontal block filter to the left edge of block \locvar{\bi} with the - procedure described in Section~\ref{sub:filth}. -\end{enumerate} -\item -If \locvar{BY} is greater than zero: -\begin{enumerate} -\item -Assign \locvar{FX} the value \locvar{BX}. -\item -Assign \locvar{FY} the value $(\locvar{BY}-2)$ -\item -Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the - vertical block filter to the bottom edge of block \locvar{\bi} with the - procedure described in Section~\ref{sub:filtv}. -\end{enumerate} -\item -If $(\locvar{BX}+8)$ is less than \locvar{RPW} and - $\bitvar{BCODED}[\locvar{\bj}]$ is zero, where \locvar{\bj} is the coded-order - index of the block adjacent to \locvar{\bi} on the right: -\begin{enumerate} -\item -Assign \locvar{FX} the value $(\locvar{BX}+6)$. -\item -Assign \locvar{FY} the value \locvar{BY}. -\item -Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the - horizontal block filter to the right edge of block \locvar{\bi} with the - procedure described in Section~\ref{sub:filth}. -\end{enumerate} -\item -If $(\locvar{BY}+8)$ is less than \locvar{RPH} and - $\bitvar{BCODED}[\locvar{\bj}]$ is zero, where \locvar{\bj} is the coded-order - index of the block adjacent to \locvar{\bi} above: -\begin{enumerate} -\item -Assign \locvar{FX} the value \locvar{BX}. -\item -Assign \locvar{FY} the value $(\locvar{BY}+6)$ -\item -Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the - vertical block filter to the top edge of block \locvar{\bi} with the - procedure described in Section~\ref{sub:filtv}. -\end{enumerate} -\end{enumerate} -\end{enumerate} -\end{enumerate} - -\paragraph{VP3 Compatibility} - -The original VP3 decoder implemented unrestricted motion vectors by enlarging - the reconstructed frame buffers and repeating the pixels on its edges into the - padding region. -However, for the previous reference frame this padding ocurred before the loop - filter was applied, but for the golden reference frame it occurred afterwards. - -This means that for the previous reference frame, the padding values were - required to be stored separately from the main image values. -Furthermore, even if the previous and golden reference frames were in fact the - same frame, they could have different padding values. -Finally, the encoder did not apply the loop filter at all, which resulted in - artifacts, particularly in near-static scenes, due to prediction-loop - mismatch. -This last can only be considered a bug in the VP3 encoder. - -Given all these things, Theora now uniformly applies the loop filter before - the reference frames are padded. -This means it is possible to use the same buffer for the previous and golden - reference frames when they do indeed refer to the same frame. -It also means that on architectures where memory bandwidth is limited, it is - possible to avoid storing padding values, and simply clamp the motion vectors - applied to each pixel as described in Sections~\ref{sub:predfullpel} - and~\ref{sub:predhalfpel}. -This means that the predicted pixel values along the edges of the frame might - differ slightly between VP3 and Theora, but since the VP3 encoder did not - apply the loop filter in the first place, this is not likely to impose any - serious compatibility issues. - -\section{Complete Frame Decode} - -\paragraph{Input parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{FMBW} & Integer & 16 & No & The width of the frame in macro - blocks. \\ -\bitvar{FMBH} & Integer & 16 & No & The height of the frame in macro - blocks. \\ -\bitvar{NSBS} & Integer & 32 & No & The total number of super blocks in a - frame. \\ -\bitvar{NBS} & Integer & 36 & No & The total number of blocks in a - frame. \\ -\bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a - frame. \\ -\bitvar{FRN} & Integer & 32 & No & The frame-rate numerator. \\ -\bitvar{FRD} & Integer & 32 & No & The frame-rate denominator. \\ -\bitvar{PARN} & Integer & 24 & No & The pixel aspect-ratio numerator. \\ -\bitvar{PARD} & Integer & 24 & No & The pixel aspect-ratio - denominator. \\ -\bitvar{CS} & Integer & 8 & No & The color space. \\ -\bitvar{PF} & Integer & 2 & No & The pixel format. \\ -\bitvar{NOMBR} & Integer & 24 & No & The nominal bitrate of the stream, in - bits per second. \\ -\bitvar{QUAL} & Integer & 6 & No & The quality hint. \\ -\bitvar{KFGSHIFT} & Integer & 5 & No & The amount to shift the key frame - number by in the granule position. \\ -\bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} & - 7 & No & A 64-element array of loop filter - limit values. \\ -\bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values - for AC coefficients for each \qi\ value. \\ -\bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} & - 16 & No & A 64-element array of scale values - for the DC coefficient for each \qi\ value. \\ -\bitvar{NBMS} & Integer & 10 & No & The number of base matrices. \\ -\bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 8 & No & A $\bitvar{NBMS}\times 64$ array - containing the base matrices. \\ -\bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} & - 6 & No & A $2\times 3$ array containing the - number of quant ranges for a given \qti\ and \pli, respectively. -This is at most $63$. \\ -\bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 6 & No & A $2\times 3\times 63$ array of the - sizes of each quant range for a given \qti\ and \pli, respectively. -Only the first $\bitvar{NQRS}[\qti][\pli]$ values will be used. \\ -\bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} & - 9 & No & A $2\times 3\times 64$ array of the - \bmi's used for each quant range for a given \qti\ and \pli, respectively. -Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values will be used. \\ -\bitvar{HTS} & \multicolumn{3}{l}{Huffman table array} - & An 80-element array of Huffman tables - with up to 32 entries each. \\ -\bitvar{GOLDREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the golden reference - frame. \\ -\bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the golden reference - frame. \\ -\bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the golden reference - frame. \\ -\bitvar{PREVREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the previous reference - frame. \\ -\bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the previous reference - frame. \\ -\bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the previous reference - frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Output parameters:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the reconstructed frame. \\ -\bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the reconstructed - frame. \\ -\bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the reconstructed - frame. \\ -\bitvar{GOLDREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the golden reference - frame. \\ -\bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the golden reference - frame. \\ -\bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the golden reference - frame. \\ -\bitvar{PREVREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$ - array containing the contents of the $Y'$ plane of the previous reference - frame. \\ -\bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_b$ plane of the previous reference - frame. \\ -\bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$ - array containing the contents of the $C_r$ plane of the previous reference - frame. \\ -\bottomrule\end{tabularx} - -\paragraph{Variables used:}\hfill\\* -\begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule -\multicolumn{1}{c}{Name} & -\multicolumn{1}{c}{Type} & -\multicolumn{1}{p{30pt}}{\centering Size (bits)} & -\multicolumn{1}{c}{Signed?} & -\multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead -\locvar{FTYPE} & Integer & 1 & No & The frame type. \\ -\locvar{NQIS} & Integer & 2 & No & The number of \qi\ values. \\ -\locvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} & - 6 & No & An \locvar{NQIS}-element array of - \qi\ values. \\ -\locvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} & - 1 & No & An \bitvar{NBS}-element array of flags - indicating which blocks are coded. \\ -\locvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} & - 3 & No & An \bitvar{NMBS}-element array of - coding modes for each macro block. \\ -\locvar{MVECTS} & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} & - 6 & Yes & An \bitvar{NBS}-element array of motion - vectors for each block. \\ -\locvar{QIIS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 2 & No & An \bitvar{NBS}-element array of - \locvar{\qii} values for each block. \\ -\locvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} & - 16 & Yes & An $\bitvar{NBS}\times 64$ array of - quantized DCT coefficient values for each block in zig-zag order. \\ -\locvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} & - 7 & No & An \bitvar{NBS}-element array of the - coefficient count for each block. \\ -\bitvar{RPYW} & Integer & 20 & No & The width of the $Y'$ plane of the - reference frames in pixels. \\ -\bitvar{RPYH} & Integer & 20 & No & The height of the $Y'$ plane of the - reference frames in pixels. \\ -\bitvar{RPCW} & Integer & 20 & No & The width of the $C_b$ and $C_r$ - planes of the reference frames in pixels. \\ -\bitvar{RPCH} & Integer & 20 & No & The height of the $C_b$ and $C_r$ - planes of the reference frames in pixels. \\ -\locvar{\bi} & Integer & 36 & No & The index of the current block in coded - order. \\ -\bottomrule\end{tabularx} -\medskip - -This procedure uses all the procedures defined in the previous section of this - chapter to decode and reconstruct a complete frame. -It takes as input values decoded from the headers, as well as the current - reference frames. -As output, it gives the uncropped, reconstructed frame. -This should be cropped to picture region before display. -As a special case, a 0-byte packet is treated exactly like an inter frame with - no coded blocks. - -\begin{enumerate} -\item -If the size of the data packet is non-zero: -\begin{enumerate} -\item -Decode the frame header values \locvar{FTYPE}, \locvar{NQIS}, and \locvar{QIS} - using the procedure given in Section~\ref{sub:frame-header}. -\item -Using \locvar{FTYPE}, \bitvar{NSBS}, and \bitvar{NBS}, decode the list of coded - block flags into \locvar{BCODED} using the procedure given in - Section~\ref{sub:coded-blocks}. -\item -Using \locvar{FTYPE}, \bitvar{NMBS}, \bitvar{NBS}, and \bitvar{BCODED}, decode - the macro block coding modes into \locvar{MBMODES} using the procedure given - in Section~\ref{sub:mb-modes}. -\item -If \locvar{FTYPE} is non-zero (inter frame), using \bitvar{PF}, \bitvar{NMBS}, - \locvar{MBMODES}, \bitvar{NBS}, and \locvar{BCODED}, decode the motion vectors - into \locvar{MVECTS} using the procedure given in Section~\ref{sub:mv-decode}. -\item -Using \bitvar{NBS}, \locvar{BCODED}, and \locvar{NQIS}, decode the block-level - \qi\ values into \locvar{QIIS} using the procedure given in - Section~\ref{sub:block-qis}. -\item -Using \bitvar{NBS}, \bitvar{NMBS}, \locvar{BCODED}, and \bitvar{HTS}, decode - the DCT coefficients into \locvar{NCOEFFS} and \locvar{NCOEFFS} using the - procedure given in Section~\ref{sub:dct-coeffs}. -\item -Using \locvar{BCODED} and \locvar{MBMODES}, undo the DC prediction on the DC - coefficients stored in \locvar{COEFFS} using the procedure given in - Section~\ref{sub:dc-pred-undo}. -\end{enumerate} -\item -Otherwise: -\begin{enumerate} -\item -Assign \locvar{FTYPE} the value 1 (inter frame). -\item -Assign \locvar{NQIS} the value 1. -\item -Assign $\locvar{QIS}[0]$ the value 63. -\item -For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, assign - $\locvar{BCODED}[\locvar{\bi}]$ the value zero. -\end{enumerate} -\item -Assign \locvar{RPYW} and \locvar{RPYH} the values $(16*\bitvar{FMBW})$ and - $(16*\bitvar{FMBH})$, respectively. -\item -Assign \locvar{RPCW} and \locvar{RPCH} the values from the row of - Table~\ref{tab:rpcwh-for-pf} corresponding to \bitvar{PF}. - -\begin{table}[tb] -\begin{center} -\begin{tabular}{crr}\toprule -\bitvar{PF} & \multicolumn{1}{c}{\locvar{RPCW}} - & \multicolumn{1}{c}{\locvar{RPCH}} \\\midrule -$0$ & $8*\bitvar{FMBW}$ & $8*\bitvar{FMBH}$ \\ -$2$ & $8*\bitvar{FMBW}$ & $16*\bitvar{FMBH}$ \\ -$3$ & $16*\bitvar{FMBW}$ & $16*\bitvar{FMBH}$ \\ -\bottomrule\end{tabular} -\end{center} -\caption{Width and Height of Chroma Planes for each Pixel Format} -\label{tab:rpcwh-for-pf} -\end{table} - -\item -Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, - \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{NBS}, \locvar{BCODED}, - \locvar{MBMODES}, \locvar{MVECTS}, \locvar{COEFFS}, \locvar{NCOEFFS}, - \locvar{QIS}, \locvar{QIIS}, \locvar{RPYW}, \locvar{RPYH}, \locvar{RPCW}, - \locvar{RPCH}, \bitvar{GOLDREFY}, \bitvar{GOLDREFCB}, \bitvar{GOLDREFCR}, - \bitvar{PREVREFY}, \bitvar{PREVREFCB}, and \bitvar{PREVREFCR}, reconstruct the - complete frame into \bitvar{RECY}, \bitvar{RECCB}, and \bitvar{RECCR} using - the procedure given in Section~\ref{sub:recon}. -\item -Using \bitvar{LFLIMS}, \locvar{RPYW}, \locvar{RPYH}, \locvar{RPCW}, - \locvar{RPCH}, \bitvar{NBS}, \locvar{BCODED}, and \locvar{QIS}, apply the loop - filter to the reconstructed frame in \bitvar{RECY}, \bitvar{RECCB}, and - \bitvar{RECCR} using the procedure given in Section~\ref{sub:loop-filt}. -\item -If \locvar{FTYPE} is zero (intra frame), assign \bitvar{GOLDREFY}, - \bitvar{GOLDREFCB}, and \bitvar{GOLDREFCR} the values \bitvar{RECY}, - \bitvar{RECCB}, and \bitvar{RECCR}, respectively. -\item -Assign \bitvar{PREVREFY}, \bitvar{PREVREFCB}, and \bitvar{PREVREFCR} the values - \bitvar{RECY}, \bitvar{RECCB}, and \bitvar{RECCR}, respectively. -\end{enumerate} - -%\backmatter -\appendix - -\chapter{Ogg Bitstream Encapsulation} -\label{app:oggencapsulation} - -\section{Overview} - -This document specifies the embedding or encapsulation of Theora packets - in an Ogg transport stream. - -Ogg is a stream oriented wrapper for coded, linear time-based data. -It provides syncronization, multiplexing, framing, error detection and - seeking landmarks for the decoder and complements the raw packet format - used by the Theora codec. - -This document assumes familiarity with the details of the Ogg standard. -The Xiph.org documentation provides an overview of the Ogg transport stream - format at \url{http://www.xiph.org/ogg/doc/oggstream.html} and a detailed - description at \url{http://www.xiph.org/ogg/doc/framing.html}. -The format is also defined in RFC~3533 \cite{rfc3533}. -While Theora packets can be embedded in a wide variety of media - containers and streaming mechanisms, the Xiph.org Foundation - recommends Ogg as the native format for Theora video in file-oriented - storage and transmission contexts. - -\subsection{MIME type} - -The generic MIME type of any Ogg file is {\tt application/ogg}. -The specific MIME type for the Ogg Theora profile documented here -is {\tt video/ogg}. This is the MIME type recommended for files -conforming to this appendix. The recommended filename extension -is {\tt .ogv}. - -Outside of an encapsulation, the mime type {\tt video/theora} may - be used to refer specifically to the Theora compressed video stream. - -\section{Embedding in a logical bitstream} - -Ogg separates the concept of a {\em logical bitstream} consisting of the - framing of a particular sequence of packets and complete within itself - from the {\em physical bitstream} which may consist either of a single - logical bitstream or a number of logical bitstreams multiplexed - together. -This section specifies the embedding of Theora packets in a logical Ogg - bitstream. -The mapping of Ogg Theora logical bitstreams into a multiplexed physical Ogg - stream is described in the next section. - -\subsection{Headers} - -The initial identification header packet appears by itself in a - single Ogg page. -This page defines the start of the logical stream and MUST have - the `beginning of stream' flag set. - -The second and third header packets (comment metadata and decoder - setup data) can together span one or more Ogg pages. -If there are additional non-normative header packets, they MUST be - included in this sequence of pages as well. -The comment header packet MUST begin the second Ogg page in the logical - bitstream, and there MUST be a page break between the last header - packet and the first frame data packet. - -These two page break requirements facilitate stream identification and - simplify header acquisition for seeking and live streaming applications. - -All header pages MUST have their granule position field set to zero. - -\subsection{Frame data} - -The first frame data packet in a logical bitstream MUST begin a new Ogg - page. -All other data packets are placed one at a time into Ogg pages - until the end of the stream. -Packets can span pages and multiple packets can be placed within any - one page. -The last page in the logical bitstream SHOULD have its - 'end of stream' flag set to indicate complete transmission - of the available video. - -Frame data pages MUST be marked with a granule position corresponding to - the end of the display interval of the last frame/packet that finishes - in that page. See the next section for details. - -\subsection{Granule position} - -Data packets are marked by a granulepos derived from the count of decodable -frames after that packet is processed. The field itself is divided into two -sections, the width of the less significant section being given by the KFGSHIFT -parameter decoded from the identification header -(Section~\ref{sec:idheader}). -The more significant portion of the field gives the count of coded -frames after the coding of the last keyframe in stream, and the less -significant portion gives the count of frames since the last keyframe. -Thus a stream would begin with a split granulepos of $1|0$ (a keyframe), -followed by $1|1$, $1|2$, $1|3$, etc. Around a keyframe in the -middle of the stream the granulepos sequence might be $1234|35$, -$1234|36$, $1234|37$, $1271|0$ (for the keyframe), $1271|1$, and so -on. In this way the granulepos field increased monotonically as required -by the Ogg format, but contains information necessary to efficiently -find the previous keyframe to continue decoding after a seek. - -Prior to bitstream version 3.2.1, data packets were marked by a -granulepos derived from the index of the frame being decoded, -rather than the count. That is they marked the beginning of the -display interval of a frame rather than the end. Such streams -have the VREV field of the identification header set to `0' -instead of `1'. They can be interpreted according to the description -above by adding 1 to the more signification field of the split -granulepos when VREV is less than 1. - -\section{Multiplexed stream mapping} - -Applications supporting Ogg Theora must support Theora bitstreams - multiplexed with compressed audio data in the Vorbis I and Speex - formats, and should support Ogg-encapsulated MNG graphics for overlays. - -Multiple audio and video bitstreams may be multiplexed together. -How playback of multiple/alternate streams is handled is up to the - application. -Some conventions based on included metadata aide interoperability - in this respect. -%TODO: describe multiple vs. alternate streams, language mapping -% and reference metadata descriptions. - -\subsection{Chained streams} - -Ogg Theora decoders and playback applications MUST support both grouped - streams (multiplexed concurrent logical streams) and chained streams - (sequential concatenation of independent physical bitstreams). - -The number and codec data types of multiplexed streams and the decoder - parameters for those stream types that re-occur can all change at a - chaining boundary. -A playback application MUST be prepared to handle such changes and - SHOULD do so smoothly with the minimum possible visible disruption. -The specification of grouped streams below applies independently to each - segment of a chained bitstream. - -\subsection{Grouped streams} - -At the beginning of a multiplexed stream, the `beginning of stream' - pages for each logical bitstream will be grouped together. -Within these, the first page to occur MUST be the Theora page. -This facilitates identification of Ogg Theora files among other - Ogg-encapsulated content. -A playback application must nevertheless handle streams where this - arrangement is not correct. -%TBT: Then what's the point of requiring it in the spec? - -If there is more than one Theora logical stream, the first page should - be from the primary stream. -That is, the best choice for the stream a generic player should begin - displaying without special user direction. -If there is more than one audio stream, or of any other stream - type, the identification page of the primary stream of that type - should be placed before the others. -%TBT: That's all pretty vague. - -After the `beginning of stream' pages, the header pages of each of - the logical streams MUST be grouped together before any data pages - occur. - -After all the header pages have been placed, - the data pages are multiplexed together. -They should be placed in the stream in increasing order by the - time equivalents of their granule position fields. -This facilitates seeking while limiting the buffering requirements of the - playback demultiplexer. -%TODO: A lot of this language is encoder-oriented. -%TODO: We define a decoder-oriented specification. -%TODO: The language should be changed to match. - -\cleardoublepage -\chapter{VP3} - -\section{VP3 Compatibility} -\label{app:vp3-compat} -This section lists all of the encoder and decoder issues that may affect VP3 - compatibly. -Each is described in more detail in the text itself. -This list is provided merely for reference. - -\begin{itemize} -\item -Bitstream headers (Section~\ref{sec:headers}). -\begin{itemize} -\item -Identification header (Section~\ref{sec:idheader}). -\begin{itemize} -\item -Non-multiple of 16 picture sizes. -\item -Standardized color spaces. -\item -Support for $4:4:4$ and $4:2:2$ pixel formats. -\end{itemize} -\item -Setup header -\begin{itemize} -\item -Loop filter limit values (Section~\ref{sub:loop-filter-limits}). -\item -Quantization parameters (Section~\ref{sub:quant-params}). -\item -Huffman tables (Section~\ref{sub:huffman-tables}). -\end{itemize} -\end{itemize} -\item -Frame header format (Section~\ref{sub:frame-header}). -\item -Extended long-run bit strings (Section~\ref{sub:long-run}). -\item -INTER\_MV\_FOUR handling of uncoded blocks (Section~\ref{sub:mb-mv-decode}). -\item -Block-level \qi\ values (Section~\ref{sub:block-qis}). -\item -Zero-length EOB runs (Section~\ref{sub:eob-token}). -\item -Unrestricted motion vector padding and the loop filter - (Section~\ref{sub:loop-filt}). -\end{itemize} - -\section{Loop Filter Limit Values} -\label{app:vp3-loop-filter-limits} - -The hard-coded loop filter limit values used in VP3 are defined as follows: -\begin{align*} -\bitvar{LFLIMS} = & \begin{array}[t]{r@{}rrrrrrrr@{}l} -\{ & 30, & 25, & 20, & 20, & 15, & 15, & 14, & 14, & \\ - & 13, & 13, & 12, & 12, & 11, & 11, & 10, & 10, & \\ - & 9, & 9, & 8, & 8, & 7, & 7, & 7, & 7, & \\ - & 6, & 6, & 6, & 6, & 5, & 5, & 5, & 5, & \\ - & 4, & 4, & 4, & 4, & 3, & 3, & 3, & 3, & \\ - & 2, & 2, & 2, & 2, & 2, & 2, & 2, & 2, & \\ - & 0, & 0, & 0, & 0, & 0, & 0, & 0, & 0, & \\ - & 0, & 0, & 0, & 0, & 0, & 0, & 0, & 0\;\ & \!\} \\ -\end{array} -\end{align*} - -\section{Quantization Parameters} -\label{app:vp3-quant-params} - -The hard-coded quantization parameters used by VP3 are defined as follows: - -\begin{align*} -\bitvar{ACSCALE} = & \begin{array}[t]{r@{}rrrrrrrr@{}l} -\{ & 500, & 450, & 400, & 370, & 340, & 310, & 285, & 265, & \\ - & 245, & 225, & 210, & 195, & 185, & 180, & 170, & 160, & \\ - & 150, & 145, & 135, & 130, & 125, & 115, & 110, & 107, & \\ - & 100, & 96, & 93, & 89, & 85, & 82, & 75, & 74, & \\ - & 70, & 68, & 64, & 60, & 57, & 56, & 52, & 50, & \\ - & 49, & 45, & 44, & 43, & 40, & 38, & 37, & 35, & \\ - & 33, & 32, & 30, & 29, & 28, & 25, & 24, & 22, & \\ - & 21, & 19, & 18, & 17, & 15, & 13, & 12, & 10\;\ & \!\} \\ -\end{array} \\ -\bitvar{DCSCALE} = & \begin{array}[t]{r@{}rrrrrrrr@{}l} -\{ & 220, & 200, & 190, & 180, & 170, & 170, & 160, & 160, & \\ - & 150, & 150, & 140, & 140, & 130, & 130, & 120, & 120, & \\ - & 110, & 110, & 100, & 100, & 90, & 90, & 90, & 80, & \\ - & 80, & 80, & 70, & 70, & 70, & 60, & 60, & 60, & \\ - & 60, & 50, & 50, & 50, & 50, & 40, & 40, & 40, & \\ - & 40, & 40, & 30, & 30, & 30, & 30, & 30, & 30, & \\ - & 30, & 20, & 20, & 20, & 20, & 20, & 20, & 20, & \\ - & 20, & 10, & 10, & 10, & 10, & 10, & 10, & 10\;\ & \!\} \\ -\end{array} -\end{align*} - -VP3 defines only a single quantization range for each quantization type and - color plane, and the base matrix used is constant throughout the range. -There are three base matrices defined. -The first is used for the $Y'$ channel of INTRA mode blocks, and the second for - both the $C_b$ and $C_r$ channels of INTRA mode blocks. -The last is used for INTER mode blocks of all channels. - -\begin{align*} -\bitvar{BMS} = \{ & \begin{array}[t]{r@{}rrrrrrrr@{}l} -\{ & 16, & 11, & 10, & 16, & 24, & 40, & 51, & 61, & \\ - & 12, & 12, & 14, & 19, & 26, & 58, & 60, & 55, & \\ - & 14, & 13, & 16, & 24, & 40, & 57, & 69, & 56, & \\ - & 14, & 17, & 22, & 29, & 51, & 87, & 80, & 62, & \\ - & 18, & 22, & 37, & 58, & 68, & 109, & 103, & 77, & \\ - & 24, & 35, & 55, & 64, & 81, & 104, & 113, & 92, & \\ - & 49, & 64, & 78, & 87, & 103, & 121, & 120, & 101, & \\ - & 72, & 92, & 95, & 98, & 112, & 100, & 103, & 99\;\ & \!\}, \\ -%\end{array} \\ -%& \begin{array}[t]{r@{}rrrrrrrr@{}l} -\{ & 17, & 18, & 24, & 47, & 99, & 99, & 99, & 99, & \\ - & 18, & 21, & 26, & 66, & 99, & 99, & 99, & 99, & \\ - & 24, & 26, & 56, & 99, & 99, & 99, & 99, & 99, & \\ - & 47, & 66, & 99, & 99, & 99, & 99, & 99, & 99, & \\ - & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99, & \\ - & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99, & \\ - & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99, & \\ - & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99\;\ & \!\}, \\ -%\end{array} \\ -%& \begin{array}[t]{r@{}rrrrrrrr@{}l} -\{ & 16, & 16, & 16, & 20, & 24, & 28, & 32, & 40, & \\ - & 16, & 16, & 20, & 24, & 28, & 32, & 40, & 48, & \\ - & 16, & 20, & 24, & 28, & 32, & 40, & 48, & 64, & \\ - & 20, & 24, & 28, & 32, & 40, & 48, & 64, & 64, & \\ - & 24, & 28, & 32, & 40, & 48, & 64, & 64, & 64, & \\ - & 28, & 32, & 40, & 48, & 64, & 64, & 64, & 96, & \\ - & 32, & 40, & 48, & 64, & 64, & 64, & 96, & 128, & \\ - & 40, & 48, & 64, & 64, & 64, & 96, & 128, & 128\;\ & \!\}\;\;\} \\ -\end{array} -\end{align*} - -The remaining parameters simply assign these matrices to the proper quant - ranges. - -\begin{align*} -\bitvar{NQRS} = & \{ \{1, 1, 1\}, \{1, 1, 1\} \} \\ -\bitvar{QRSIZES} = & - \{ \{ \{1\}, \{1\}, \{1\} \}, \{ \{1\}, \{1\}, \{1\} \} \} \\ -\bitvar{QRBMIS} = & - \{ \{ \{0, 0\}, \{1, 1\}, \{1, 1\} \}, \{ \{2, 2\}, \{2, 2\}, \{2, 2\} \} \} \\ -\end{align*} - -\section{Huffman Tables} -\label{app:vp3-huffman-tables} - -The following tables contain the hard-coded Huffman codes used by VP3. -There are 80 tables in all, each with a Huffman code for all 32 token values. -The tokens are sorted by the most significant bits of their Huffman code. -This is the same order in which they will be decoded from the setup header. - -\include{vp3huff} - -\cleardoublepage -\chapter{Colophon} - -Ogg is a \href{http://www.xiph.org}{Xiph.org Foundation} effort to protect - essential tenets of Internet multimedia from corporate hostage-taking; Open - Source is the net's greatest tool to keep everyone honest. -See \href{http://www.xiph.org/about.html}{About the Xiph.org Foundation} for - details. - -Ogg Theora is the first Ogg video codec. -Anyone may freely use and distribute the Ogg and Theora specifications, whether - in private, public, or corporate capacity. -However, the Xiph.org Foundation and the Ogg project reserve the right to set - the Ogg Theora specification and certify specification compliance. - -Xiph.org's Theora software codec implementation is distributed under a BSD-like - license. -This does not restrict third parties from distributing independent - implementations of Theora software under other licenses. - -\begin{wrapfigure}{l}{0pt} -\includegraphics[width=2.5cm]{xifish} -\end{wrapfigure} - -These pages are Copyright \textcopyright{} 2004-2007 Xiph.org Foundation. -All rights reserved. -Ogg, Theora, Vorbis, Xiph.org Foundation and their logos are trademarks - (\texttrademark) of the \href{http://www.xiph.org}{Xiph.org Foundation}. - -This document is set in \LaTeX. - - - -\cleardoublepage -\bibliography{spec} - -\end{document} |